1
0
md/20251014.md
liushuang ddbee9f81f add
2025-10-15 10:56:27 +08:00

194 lines
10 KiB
Markdown
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# RTX4090笔电操作记录
```shell
# 因清华大学开源镜像站 HTTP/403 换了中科大的镜像站,配置信息存放在这里
cat /etc/apt/sources.list
# 安装 openssh 端口号是默认的 22 没有修改
sudo apt install openssh-server -y
sudo systemctl enable ssh
sudo systemctl start ssh
# 安装 NVDIA 显卡驱动和
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get -y install cuda-toolkit-12-8
sudo apt-get install -y cuda-drivers
nvidia-smi
# 安装 nvidia-cuda-toolkit
apt install nvidia-cuda-toolkit
nvcc -V
# 创建了一个新的目录,用于存储 vllm 使用的模型或其他文件
mkdir /home/ss/vllm-py12 && cd /home/ss/vllm-py12
# 用 conda 建了个新环境,以下 pip install 都是在该环境执行的
conda create -n vllm-py12 python=3.12 -y
conda activate vllm-py12
# 安装 vllm
pip install vllm -i http://mirrors.cloud.tencent.com/pypi/simple --extra-index-url https://download.pytorch.org/whl/cu128
# 安装 modelscope
pip install modelscope -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
# 拉取 gpt-oss-20b 模型
modelscope download --model openai-mirror/gpt-oss-20b --local_dir /home/ss/vllm-py12/gpt-oss-20b
# 运行 gpt-oss-20b 模型失败,移动端的 RTX4090 只有 16GB 显存,至少需要 16~24GB 显存
vllm serve \
/home/ss/vllm-py12/gpt-oss-20b \
--port 18777 \
--api-key token_lcfc \
--served-model-name gpt-oss-20b \
--gpu-memory-utilization 0.95 \
--tool-call-parser openai \
--enable-auto-tool-choice
# Qwen3-8b 也需要 16~24GB显存所以下载了 Qwen3-0.6B
modelscope download --model Qwen/Qwen3-0.6B --local_dir /home/ss/vllm-py12/qwen3-06b
# 运行 Qwen3-8b
vllm serve /home/ss/vllm-py12/qwen3-06b \
--host 0.0.0.0 \
--port 8000 \
--served-model-name Qwen3-0.6B \
--tensor-parallel-size 1 \
--dtype auto \
--gpu-memory-utilization 0.9 \
--max-model-len 32768 \
--trust-remote-code
```
#### 新建了一个脚本去测试结构化输出函数的bug
```shell
vim /home/ss/vllm-py12/vllm-crash-test.py
```
```python
from enum import Enum
from pydantic import BaseModel
from vllm import LLM, SamplingParams
from vllm.sampling_params import GuidedDecodingParams
# 定义结构化输出 schema
class CarType(str, Enum):
sedan = "sedan"
suv = "SUV"
truck = "Truck"
coupe = "Coupe"
class CarDescription(BaseModel):
brand: str
model: str
car_type: CarType
# 获取 JSON schema
json_schema = CarDescription.model_json_schema()
# 设置 prompt
prompt = (
"Generate a JSON with the brand, model and car_type of "
"the most iconic car from the 90's"
)
def format_output(title: str, output: str):
print(f"{'-' * 50}\n{title}: {output}\n{'-' * 50}")
def main():
# 1. 初始化本地 LLM加载本地模型文件
llm = LLM(
model="/home/ss/vllm-py12/qwen3-06b", # 指向你的本地模型路径
max_model_len=1024,
enable_prefix_caching=True,
gpu_memory_utilization=0.9,
)
# 2. 构造一个无效的 guided_decoding没有任何有效字段
# 这将导致 get_structured_output_key() 中 raise ValueError
guided_decoding_invalid = GuidedDecodingParams(
json=None,
json_object=False,
regex=None,
choice=None,
grammar=None,
structural_tag=None
)
sampling_params = SamplingParams(
temperature=0.0,
max_tokens=512,
guided_decoding=guided_decoding_invalid # ✅ 传入但无有效字段
)
# 3. 生成输出(预期会触发 ValueError
try:
outputs = llm.generate(prompts=prompt, sampling_params=sampling_params)
for output in outputs:
generated_text = output.outputs[0].text
format_output("Output", generated_text)
except Exception as e:
print(f"Caught expected error: {e}")
if __name__ == "__main__":
main()
```
#### 复现
```shell
python /home/ss/vllm-py12/vllm-crash-test.py
```
```text
/home/ss/vllm-py12/vllm-crash-test.py:50: DeprecationWarning: guided_decoding is deprecated. This will be removed in v0.12.0 or v1.0.0, which ever is soonest. Please use structured_outputs instead.
sampling_params = SamplingParams(
Adding requests: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 93.46it/s]
Processed prompts: 0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s](EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] EngineCore encountered a fatal error.
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] Traceback (most recent call last):
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 701, in run_engine_core
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] engine_core.run_busy_loop()
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 728, in run_busy_loop
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] self._process_engine_step()
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 754, in _process_engine_step
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] outputs, model_executed = self.step_fn()
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 283, in step
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] scheduler_output = self.scheduler.schedule()
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/core/sched/scheduler.py", line 359, in schedule
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] if structured_output_req and structured_output_req.grammar:
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/structured_output/request.py", line 45, in grammar
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] completed = self._check_grammar_completion()
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/structured_output/request.py", line 33, in _check_grammar_completion
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] self._grammar = self._grammar.result(timeout=0.0001)
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/concurrent/futures/_base.py", line 449, in result
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] return self.__get_result()
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] raise self._exception
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/concurrent/futures/thread.py", line 59, in run
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] result = self.fn(*self.args, **self.kwargs)
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/structured_output/__init__.py", line 128, in _async_create_grammar
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] key = request.structured_output_request.structured_output_key # type: ignore[union-attr]
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/functools.py", line 998, in __get__
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] val = self.func(instance)
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/structured_output/request.py", line 58, in structured_output_key
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] return get_structured_output_key(self.sampling_params)
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] File "/root/miniconda3/envs/vllm-py12/lib/python3.12/site-packages/vllm/v1/structured_output/request.py", line 86, in get_structured_output_key
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] raise ValueError("No valid structured output parameter found")
(EngineCore_DP0 pid=190093) ERROR 10-15 10:50:58 [core.py:710] ValueError: No valid structured output parameter found
Caught expected error: EngineCore encountered an issue. See stack trace (above) for the root cause.
Processed prompts: 0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]
(vllm-py12) root@ss-IdeaPad-PC:/home/ss/vllm-py12#
```