diff --git a/20251014.md b/20251014.md index 4cd670b..81b55c6 100644 --- a/20251014.md +++ b/20251014.md @@ -61,134 +61,4 @@ vllm serve /home/ss/vllm-py12/qwen3-06b \ --max-model-len 32768 \ --trust-remote-code -``` -#### 新建了一个脚本去测试结构化输出函数的bug - -```shell -vim /home/ss/vllm-py12/vllm-crash-test.py -``` - -```python -from enum import Enum -from pydantic import BaseModel -from vllm import LLM, SamplingParams -from vllm.sampling_params import GuidedDecodingParams - -# 定义结构化输出 schema -class CarType(str, Enum): - sedan = "sedan" - suv = "SUV" - truck = "Truck" - coupe = "Coupe" - -class CarDescription(BaseModel): - brand: str - model: str - car_type: CarType - -# 获取 JSON schema -json_schema = CarDescription.model_json_schema() - -# 设置 prompt -prompt = ( - "Generate a JSON with the brand, model and car_type of " - "the most iconic car from the 90's" -) - -def format_output(title: str, output: str): - print(f"{'-' * 50}\n{title}: {output}\n{'-' * 50}") - -def main(): - # 1. 初始化本地 LLM,加载本地模型文件 - llm = LLM( - model="/home/ss/vllm-py12/qwen3-06b", # 指向你的本地模型路径 - max_model_len=1024, - enable_prefix_caching=True, - gpu_memory_utilization=0.9, - ) - - # 2. 构造一个无效的 guided_decoding:没有任何有效字段 - # 这将导致 get_structured_output_key() 中 raise ValueError - guided_decoding_invalid = GuidedDecodingParams( - json=None, - json_object=False, - regex=None, - choice=None, - grammar=None, - structural_tag=None - ) - - sampling_params = SamplingParams( - temperature=0.0, - max_tokens=512, - guided_decoding=guided_decoding_invalid # ✅ 传入但无有效字段 - ) - - # 3. 生成输出(预期会触发 ValueError) - try: - outputs = llm.generate(prompts=prompt, sampling_params=sampling_params) - for output in outputs: - generated_text = output.outputs[0].text - format_output("Output", generated_text) - except Exception as e: - print(f"Caught expected error: {e}") - -if __name__ == "__main__": - main() -``` - -#### 复现 - -```shell -python /home/ss/vllm-py12/vllm-crash-test.py -``` - -```text -/home/ss/vllm-py12/vllm-crash-test.py:50: DeprecationWarning: guided_decoding is deprecated. This will be removed in v0.12.0 or v1.0.0, which ever is soonest. Please use structured_outputs instead. - sampling_params = SamplingParams( -Adding requests: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 93.46it/s] -Processed prompts: 0%| | 0/1 [00:00