RTX4090笔电操作记录

2025-10-15 10:09:20 +08:00 · 2025-10-15 10:09:20 +08:00 · fbc162cbec
commit fbc162cbec
parent 0e40424906
1 changed files with 23 additions and 8 deletions
--- a/20251014.md
+++ b/20251014.md
@ -35,7 +35,7 @@ pip install modelscope -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host
 # 拉取 gpt-oss-20b 模型
 modelscope download --model openai-mirror/gpt-oss-20b --local_dir /home/ss/vllm-py12/gpt-oss-20b

-# 运行模型
+# 运行 gpt-oss-20b 模型失败，移动端的 RTX4090 只有 16GB 显存，至少需要 16~24GB 显存
 vllm serve \
    /home/ss/vllm-py12/gpt-oss-20b \
    --port 18777 \
@ -44,4 +44,19 @@ vllm serve \
    --gpu-memory-utilization 0.95  \
    --tool-call-parser openai \
    --enable-auto-tool-choice
+
+# Qwen3-8b 也需要 16~24GB显存，所以下载了 Qwen3-0.6B 
+modelscope download --model Qwen/Qwen3-0.6B --local_dir /home/ss/vllm-py12/qwen3-06b
+
+# 运行 Qwen3-8b 
+vllm serve \
+    --host 0.0.0.0 \
+    --port 18778 \
+    --model /home/ss/vllm-py12/qwen3-06b \
+    --tensor-parallel-size 1 \
+    --dtype auto \
+    --gpu-memory-utilization 0.9 \
+    --max-model-len 32768 \
+    --trust-remote-code
+
 ```