From fbc162cbecd18866d4c2f117919957dae614318f Mon Sep 17 00:00:00 2001 From: liushuang Date: Wed, 15 Oct 2025 10:09:20 +0800 Subject: [PATCH] =?UTF-8?q?RTX4090=E7=AC=94=E7=94=B5=E6=93=8D=E4=BD=9C?= =?UTF-8?q?=E8=AE=B0=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 20251014.md | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/20251014.md b/20251014.md index b6f592f..bce3ea3 100644 --- a/20251014.md +++ b/20251014.md @@ -35,13 +35,28 @@ pip install modelscope -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host # 拉取 gpt-oss-20b 模型 modelscope download --model openai-mirror/gpt-oss-20b --local_dir /home/ss/vllm-py12/gpt-oss-20b -# 运行模型 +# 运行 gpt-oss-20b 模型失败,移动端的 RTX4090 只有 16GB 显存,至少需要 16~24GB 显存 vllm serve \ -/home/ss/vllm-py12/gpt-oss-20b \ ---port 18777 \ ---api-key token_lcfc \ ---served-model-name gpt-oss-20b \ ---gpu-memory-utilization 0.95 \ ---tool-call-parser openai \ ---enable-auto-tool-choice + /home/ss/vllm-py12/gpt-oss-20b \ + --port 18777 \ + --api-key token_lcfc \ + --served-model-name gpt-oss-20b \ + --gpu-memory-utilization 0.95 \ + --tool-call-parser openai \ + --enable-auto-tool-choice + +# Qwen3-8b 也需要 16~24GB显存,所以下载了 Qwen3-0.6B +modelscope download --model Qwen/Qwen3-0.6B --local_dir /home/ss/vllm-py12/qwen3-06b + +# 运行 Qwen3-8b +vllm serve \ + --host 0.0.0.0 \ + --port 18778 \ + --model /home/ss/vllm-py12/qwen3-06b \ + --tensor-parallel-size 1 \ + --dtype auto \ + --gpu-memory-utilization 0.9 \ + --max-model-len 32768 \ + --trust-remote-code + ``` \ No newline at end of file