From fbc162cbecd18866d4c2f117919957dae614318f Mon Sep 17 00:00:00 2001
From: liushuang <V-LiuShuang@lcfuturecenter.com>
Date: Wed, 15 Oct 2025 10:09:20 +0800
Subject: [PATCH] =?UTF-8?q?RTX4090=E7=AC=94=E7=94=B5=E6=93=8D=E4=BD=9C?=
 =?UTF-8?q?=E8=AE=B0=E5=BD=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 20251014.md | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/20251014.md b/20251014.md
index b6f592f..bce3ea3 100644
--- a/20251014.md
+++ b/20251014.md
@@ -35,13 +35,28 @@ pip install modelscope -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host
 # 拉取 gpt-oss-20b 模型
 modelscope download --model openai-mirror/gpt-oss-20b --local_dir /home/ss/vllm-py12/gpt-oss-20b
 
-# 运行模型
+# 运行 gpt-oss-20b 模型失败，移动端的 RTX4090 只有 16GB 显存，至少需要 16~24GB 显存
 vllm serve \
-/home/ss/vllm-py12/gpt-oss-20b \
---port 18777 \
---api-key token_lcfc \
---served-model-name gpt-oss-20b \
---gpu-memory-utilization 0.95  \
---tool-call-parser openai \
---enable-auto-tool-choice
+    /home/ss/vllm-py12/gpt-oss-20b \
+    --port 18777 \
+    --api-key token_lcfc \
+    --served-model-name gpt-oss-20b \
+    --gpu-memory-utilization 0.95  \
+    --tool-call-parser openai \
+    --enable-auto-tool-choice
+
+# Qwen3-8b 也需要 16~24GB显存，所以下载了 Qwen3-0.6B 
+modelscope download --model Qwen/Qwen3-0.6B --local_dir /home/ss/vllm-py12/qwen3-06b
+
+# 运行 Qwen3-8b 
+vllm serve \
+    --host 0.0.0.0 \
+    --port 18778 \
+    --model /home/ss/vllm-py12/qwen3-06b \
+    --tensor-parallel-size 1 \
+    --dtype auto \
+    --gpu-memory-utilization 0.9 \
+    --max-model-len 32768 \
+    --trust-remote-code
+
 ```
\ No newline at end of file