diff --git a/20251014.md b/20251014.md index 81b55c6..01926e0 100644 --- a/20251014.md +++ b/20251014.md @@ -61,4 +61,51 @@ vllm serve /home/ss/vllm-py12/qwen3-06b \ --max-model-len 32768 \ --trust-remote-code -``` \ No newline at end of file +``` +#### 新建了一个脚本去测试结构化输出函数的bug + +```shell +vim /home/ss/vllm-py12/vllm-crash-test.py +``` + +```python +from enum import Enum +from pydantic import BaseModel +from vllm import LLM, SamplingParams +from vllm.sampling_params import GuidedDecodingParams + +# Guided decoding by JSON using Pydantic schema +class CarType(str, Enum): + sedan = "sedan" + suv = "SUV" + truck = "Truck" + coupe = "Coupe" + +class CarDescription(BaseModel): + brand: str + model: str + car_type: CarType + +json_schema = CarDescription.model_json_schema() +# guided_decoding_params_json = GuidedDecodingParams(json=json_schema) +sampling_params_json = SamplingParams(guided_decoding={}) +prompt_json = ( + "Generate a JSON with the brand, model and car_type of" + "the most iconic car from the 90's" +) + +def format_output(title: str, output: str): + print(f"{'-' * 50}\n{title}: {output}\n{'-' * 50}") + +def generate_output(prompt: str, sampling_params: SamplingParams, llm: LLM): + outputs = llm.generate(prompts=prompt, sampling_params=sampling_params) + return outputs[0].outputs[0].text + +def main(): + llm = LLM(model="qwen", max_model_len=100) + json_output = generate_output(prompt_json, sampling_params_json, llm) + format_output("Guided decoding by JSON", json_output) + +if __name__ == "__main__": + main() +```