RTX4090笔电操作记录
This commit is contained in:
parent
0e40424906
commit
fbc162cbec
17
20251014.md
17
20251014.md
@ -35,7 +35,7 @@ pip install modelscope -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host
|
||||
# 拉取 gpt-oss-20b 模型
|
||||
modelscope download --model openai-mirror/gpt-oss-20b --local_dir /home/ss/vllm-py12/gpt-oss-20b
|
||||
|
||||
# 运行模型
|
||||
# 运行 gpt-oss-20b 模型失败,移动端的 RTX4090 只有 16GB 显存,至少需要 16~24GB 显存
|
||||
vllm serve \
|
||||
/home/ss/vllm-py12/gpt-oss-20b \
|
||||
--port 18777 \
|
||||
@ -44,4 +44,19 @@ vllm serve \
|
||||
--gpu-memory-utilization 0.95 \
|
||||
--tool-call-parser openai \
|
||||
--enable-auto-tool-choice
|
||||
|
||||
# Qwen3-8b 也需要 16~24GB显存,所以下载了 Qwen3-0.6B
|
||||
modelscope download --model Qwen/Qwen3-0.6B --local_dir /home/ss/vllm-py12/qwen3-06b
|
||||
|
||||
# 运行 Qwen3-8b
|
||||
vllm serve \
|
||||
--host 0.0.0.0 \
|
||||
--port 18778 \
|
||||
--model /home/ss/vllm-py12/qwen3-06b \
|
||||
--tensor-parallel-size 1 \
|
||||
--dtype auto \
|
||||
--gpu-memory-utilization 0.9 \
|
||||
--max-model-len 32768 \
|
||||
--trust-remote-code
|
||||
|
||||
```
|
||||
Loading…
Reference in New Issue
Block a user