2025-11-07 16:36:17 +08:00
|
|
|
|
## 为什么要写这一篇
|
|
|
|
|
|
|
2025-11-07 16:37:32 +08:00
|
|
|
|
因为当时 vLLM 部署的 gpt-oss-120b 的模型,总是造成 vLLM 宕机,分析宕机时的崩溃日志是由于 vLLM 根据模型的回复内容调用结构化输出的工具,然后模型回复的内容跟结构后输出函数不兼容所以抛了 ValueError 导致 vLLM 的引擎进程退出,APIServer 与引擎的进程有心跳机制,发现引擎宕机了,所以自杀了。但是没有请求参数日志,不知道啥样的请求参数触发了结构化输出的功能。在 Open AI API 层面是有校验 response_format 的参数合法性的,不合法会直接拒绝。所以当务之急是先捕获请求参数,结合 vLLM 的宕机时间,尝试复现宕机的参数。
|
2025-11-07 16:36:17 +08:00
|
|
|
|
|
|
|
|
|
|
## 踩了很多坑
|
|
|
|
|
|
|
|
|
|
|
|
- 第一反应是看看能不能调整 vLLM 有没有开启打印请求参数的能力,vLLM 的版本是 v0.11.0,查看源码发现是没有的。
|
|
|
|
|
|
- 第二种就是使用抓包工具去实时抓包,选择了 tshark 它是 wireshark 的命令行版本。
|
|
|
|
|
|
- 在服务器后台运行了一晚上,天塌了!这叼东西会一直写临时文件,存储路径:/tmp/*.pcap。
|
|
|
|
|
|
- 它是抓包网卡的流量,在服务器部署了好几个大模型和 OpenAI API 端点。
|
|
|
|
|
|
- 而且有其他同事在压测,一直在并发调用 Open AI API,所以一晚上写了 300G+ 的临时文件,服务器直接告警了,运维挨批了。
|
|
|
|
|
|
|
|
|
|
|
|
## 终极方案
|
|
|
|
|
|
|
|
|
|
|
|
不修改 vLLM 的源码,也不用抓包工具,针对 vLLM 部署的 gpt-oss-120b 的 Open AI API 加一层反向代理,把请求参数写到 access_log 并轮转。
|
|
|
|
|
|
|
2025-11-07 16:38:26 +08:00
|
|
|
|
## OpenResty
|
2025-11-07 16:36:17 +08:00
|
|
|
|
|
2025-11-07 16:38:26 +08:00
|
|
|
|
用这玩意儿是因为它可以在 nginx.conf 里面写 Lua 脚本,还提供了一系列的增强能力,比直接用 nginx 更省心。
|
2025-11-07 16:36:17 +08:00
|
|
|
|
|
2025-11-07 16:36:59 +08:00
|
|
|
|
### nginx.conf
|
2025-11-07 16:36:17 +08:00
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
worker_processes auto;
|
|
|
|
|
|
error_log stderr warn;
|
|
|
|
|
|
|
|
|
|
|
|
events {
|
|
|
|
|
|
worker_connections 4096;
|
|
|
|
|
|
use epoll;
|
|
|
|
|
|
multi_accept on;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
http {
|
|
|
|
|
|
lua_need_request_body on;
|
|
|
|
|
|
log_escape_non_ascii off;
|
|
|
|
|
|
|
|
|
|
|
|
# 注意:这里不再使用 $time_local,而是用自定义变量 $log_time
|
|
|
|
|
|
log_format llm_audit '[$log_time] | $request_uri | $raw_body';
|
|
|
|
|
|
|
|
|
|
|
|
upstream llm_backend {
|
|
|
|
|
|
server 127.0.0.1:8080; # 修改成你的服务地址
|
|
|
|
|
|
keepalive 32;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
server {
|
|
|
|
|
|
listen 8000;
|
|
|
|
|
|
server_name _;
|
|
|
|
|
|
|
|
|
|
|
|
client_max_body_size 100M;
|
|
|
|
|
|
client_body_buffer_size 128k;
|
|
|
|
|
|
client_body_in_single_buffer on;
|
|
|
|
|
|
|
|
|
|
|
|
location /v1/chat/completions {
|
|
|
|
|
|
set $log_time "";
|
|
|
|
|
|
set $raw_body "";
|
|
|
|
|
|
|
|
|
|
|
|
rewrite_by_lua_block {
|
|
|
|
|
|
local now = ngx.time()
|
|
|
|
|
|
local tm = os.date("*t", now)
|
|
|
|
|
|
ngx.var.log_time = string.format(
|
|
|
|
|
|
"%04d-%02d-%02d %02d:%02d:%02d",
|
|
|
|
|
|
tm.year, tm.month, tm.day,
|
|
|
|
|
|
tm.hour, tm.min, tm.sec
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
local body = ngx.var.request_body or ""
|
|
|
|
|
|
ngx.var.raw_body = body
|
|
|
|
|
|
|
|
|
|
|
|
-- 判断是否为流式请求
|
|
|
|
|
|
if body ~= "" then
|
|
|
|
|
|
local cjson = require "cjson.safe"
|
|
|
|
|
|
local ok, json = pcall(cjson.decode, body)
|
|
|
|
|
|
if ok and type(json) == "table" and json.stream == true then
|
|
|
|
|
|
ngx.exec("@stream")
|
|
|
|
|
|
return
|
|
|
|
|
|
end
|
|
|
|
|
|
end
|
|
|
|
|
|
ngx.exec("@normal")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# ========== 流式响应 ==========
|
|
|
|
|
|
location @stream {
|
|
|
|
|
|
internal;
|
|
|
|
|
|
access_log /hook/request.log llm_audit;
|
|
|
|
|
|
|
|
|
|
|
|
proxy_pass http://llm_backend;
|
|
|
|
|
|
proxy_http_version 1.1;
|
|
|
|
|
|
proxy_set_header Connection "";
|
|
|
|
|
|
proxy_set_header Host $host;
|
|
|
|
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
|
|
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
|
|
|
|
|
|
|
|
|
|
proxy_buffering off;
|
|
|
|
|
|
proxy_cache off;
|
|
|
|
|
|
send_timeout 600s;
|
|
|
|
|
|
proxy_connect_timeout 5s;
|
|
|
|
|
|
proxy_send_timeout 60s;
|
|
|
|
|
|
proxy_read_timeout 600s;
|
|
|
|
|
|
proxy_socket_keepalive on;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# ========== 非流式响应 ==========
|
|
|
|
|
|
location @normal {
|
|
|
|
|
|
internal;
|
|
|
|
|
|
access_log /hook/request.log llm_audit;
|
|
|
|
|
|
|
|
|
|
|
|
proxy_pass http://llm_backend;
|
|
|
|
|
|
proxy_http_version 1.1;
|
|
|
|
|
|
proxy_set_header Connection "";
|
|
|
|
|
|
proxy_set_header Host $host;
|
|
|
|
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
|
|
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
|
|
|
|
|
|
|
|
|
|
proxy_buffering on;
|
|
|
|
|
|
proxy_cache off;
|
|
|
|
|
|
send_timeout 600s;
|
|
|
|
|
|
proxy_connect_timeout 5s;
|
|
|
|
|
|
proxy_send_timeout 60s;
|
|
|
|
|
|
proxy_read_timeout 600s;
|
|
|
|
|
|
proxy_socket_keepalive on;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
```
|
|
|
|
|
|
|
2025-11-07 16:36:59 +08:00
|
|
|
|
### /hook/request.log
|
2025-11-07 16:36:17 +08:00
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
/hook/request.log {
|
|
|
|
|
|
daily
|
|
|
|
|
|
rotate 5
|
|
|
|
|
|
size 20M
|
|
|
|
|
|
compress
|
|
|
|
|
|
delaycompress
|
|
|
|
|
|
missingok
|
|
|
|
|
|
notifempty
|
|
|
|
|
|
copytruncate
|
|
|
|
|
|
su root root # 如果 nginx 以非 root 用户运行,需匹配权限
|
|
|
|
|
|
}
|
|
|
|
|
|
```
|
|
|
|
|
|
|
2025-11-07 16:36:59 +08:00
|
|
|
|
### docker-compose.yml
|
2025-11-07 16:36:17 +08:00
|
|
|
|
|
|
|
|
|
|
```yml
|
|
|
|
|
|
version: "3.8"
|
|
|
|
|
|
services:
|
|
|
|
|
|
openresty:
|
|
|
|
|
|
image: docker.1ms.run/openresty/openresty:jammy
|
|
|
|
|
|
container_name: openresty
|
|
|
|
|
|
environment:
|
|
|
|
|
|
- TZ=Asia/Shanghai
|
|
|
|
|
|
- http_proxy=
|
|
|
|
|
|
- https_proxy=
|
|
|
|
|
|
- HTTP_PROXY=
|
|
|
|
|
|
- HTTPS_PROXY=
|
|
|
|
|
|
- no_proxy=
|
|
|
|
|
|
- NO_PROXY=
|
|
|
|
|
|
ports:
|
|
|
|
|
|
- "28000:8000"
|
|
|
|
|
|
volumes:
|
|
|
|
|
|
# 宿主机这个目录可以查看 request.log 也必须包含 nginx.conf
|
|
|
|
|
|
- ./hook:/hook
|
|
|
|
|
|
# 日志轮转配置文件
|
|
|
|
|
|
- ./hook/hook-nginx:/etc/logrotate.d/hook-nginx
|
|
|
|
|
|
command: ["/usr/local/openresty/bin/openresty", "-c", "/hook/nginx.conf", "-g", "daemon off;"]
|
|
|
|
|
|
restart: on-failure:3
|
|
|
|
|
|
```
|