參考 https://forums.developer.nvidia.com/t/building-local-hybrid-llms-on-dgx-spark-that-outperform-top-cloud-models/359569
參考 https://forums.developer.nvidia.com/t/dgx-spark-rag-on-docker/363125
$ > searxng/limiter.toml
$ vi searxng/settings.yml
use_default_settings: true
search:
formats:
- html
- json
server:
# 安全起見,secret_key 可用 openssl rand -hex 32 產生
secret_key: "c199725f396362fd99ad0e3239fbb5be9d01c04083cffb7e16d50301c67288ee"
limiter: false
image_proxy: true
valkey:
url: redis://redis:6379/0
engines:
- name: ahmia
disabled: true
- name: torch
disabled: true
$ vi docker-compose-llm.yaml
version: '3.8'
services:
caddy:
container_name: caddy
image: docker.io/library/caddy:2-alpine
networks:
- webnet
ports:
- "80:80" # HTTP 埠號
- "443:443" # HTTPS 埠號
- "443:443/udp" # HTTP/3 支援
restart: unless-stopped
volumes:
- ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro
- caddy-data:/data:rw
- caddy-config:/config:rw
environment:
- SEARXNG_HOSTNAME=${SEARXNG_HOSTNAME:-localhost}
- SEARXNG_TLS=${LETSENCRYPT_EMAIL:-internal}
redis:
container_name: redis
image: docker.io/valkey/valkey:8-alpine
command: valkey-server --save 30 1 --loglevel warning
restart: unless-stopped
networks:
- webnet
volumes:
- valkey-data2:/data
searxng:
container_name: searxng
image: docker.io/searxng/searxng:latest
restart: unless-stopped
networks:
- webnet
ports:
- "0.0.0.0:8888:8080"
volumes:
- ./searxng:/etc/searxng:rw
- searxng-log:/var/cache/searxng:rw
environment:
- SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/searxng/
#- SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/
litellm:
image: ghcr.io/berriai/litellm:main-latest
container_name: litellm
restart: unless-stopped
ports:
- "4000:4000"
volumes:
- ./litellm_config.yaml:/app/config.yaml:ro
command: --config /app/config.yaml --detailed_debug --num_workers 4
networks:
- webnet
environment:
- LITELLM_MASTER_KEY=${LITELLM_KEY}
env_file:
- .env
open-webui:
image: ghcr.io/open-webui/open-webui:cuda
container_name: open-webui
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- open-webui:/app/backend/data
- ${HOME_CACHE:-~/.cache}/huggingface:/root/.cache/huggingface
environment:
- ENABLE_WEB_SEARCH=true
- WEB_SEARCH_ENGINE=searxng
- SEARXNG_URL=http://searxng:8080
- OPENAI_API_BASE_URL=http://litellm:4000/v1
- OPENAI_API_KEY=${LITELLM_KEY:-0p3n-w3bu!}
networks:
- webnet
networks:
webnet:
external: true
volumes:
caddy-data:
caddy-config:
valkey-data2:
searxng-log:
open-webui:
$ vi caddy/Caddyfile
{$SEARXNG_HOSTNAME} {
encode gzip zstd
header {
Strict-Transport-Security "max-age=31536000;"
X-Content-Type-Options "nosniff"
X-Frame-Options "SAMEORIGIN"
Referrer-Policy "no-referrer"
}
#handle /searxng* {
# uri strip_prefix /searxng
# reverse_proxy searxng:8080
#}
#handle /litellm* {
# uri strip_prefix /litellm
# reverse_proxy litellm:4000
#}
handle_path /searxng* {
reverse_proxy searxng:8080
}
handle_path /litellm* {
reverse_proxy litellm:4000
}
handle {
reverse_proxy open-webui:8080
}
}
$ vi litellm_config.yaml
model_list:
- model_name: Nemotron-3-Nano-30B-A3B # 這是你在 Open WebUI 選單中會看到的名稱
litellm_params:
model: openai/Nemotron-3-Nano-30B-A3B # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
api_base: http://vllm-Nemotron-3-Nano-30B-A3B:8000/v1 # vLLM 的 API 地址
health_check_url: http://vllm-Nemotron-3-Nano-30B-A3B:8000/health
api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
rpm: 10 # 每分鐘請求限制 (選填)
- model_name: Qwen3.5-35B-A3B # 這是你在 Open WebUI 選單中會看到的名稱
litellm_params:
model: openai/Qwen3.5-35B-A3B # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
api_base: http://vllm-Qwen3.5-35B-A3B:8000/v1 # vLLM 的 API 地址
health_check_url: http://vllm-Qwen3.5-35B-A3B:8000/health
api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
rpm: 10 # 每分鐘請求限制 (選填)
- model_name: GLM-4.7-Flash # 這是你在 Open WebUI 選單中會看到的名稱
litellm_params:
model: openai/GLM-4.7-Flash # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
api_base: http://vllm-GLM-4.7-Flash:8000/v1 # vLLM 的 API 地址
health_check_url: http://vllm-GLM-4.7-Flash:8000/health
api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
rpm: 10 # 每分鐘請求限制 (選填)
- model_name: Qwen3.5-9B # 這是你在 Open WebUI 選單中會看到的名稱
litellm_params:
model: openai/Qwen3.5-9B # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
api_base: http://192.168.0.107:8080/v1 # vLLM 的 API 地址
api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
rpm: 10 # 每分鐘請求限制 (選填)
litellm_settings:
drop_params: True # 如果 Open WebUI 傳送了 vLLM 不支援的參數,自動剔除避免報錯
set_verbose: False # 若要除錯可改為 True 查看詳細日誌
#general_settings:
# #master_key: ${LITELLM_KEY} # 對應你 .env 裡的 OPENAI_API_KEY (0p3n-w3bu!)
# master_key: asdfasdf23
$ vi .env
SEARXNG_HOSTNAME=www.fwwrcom.tw
LETSENCRYPT_EMAIL=ewr@gmail.com
HOME_CACHE=/home/spark/.cache
LITELLM_KEY=asdfasdf23
$ vi sta_llm.sh
#!/bin/bash
# 1. 檢查 .env 檔案是否存在 (避免啟動失敗)
if [ ! -f .env ]; then
echo "❌ 錯誤: 找不到 .env 檔案,請先建立它。"
exit 1
fi
# 2. 執行 Docker Compose
echo "🚀 正在啟動 LLM 服務..."
docker compose -f docker-compose-llm.yaml up -d
# 3. 檢查啟動狀態
if [ $? -eq 0 ]; then
echo "✅ 服務已成功在背景執行!"
echo "使用 'docker compose -f docker-compose-llm.yaml logs -f' 查看全部日誌。"
echo "使用 'docker compose -f docker-compose-llm.yaml logs -f litellm' 查看 litellm 日誌。"
else
echo "❌ 啟動失敗,請檢查配置。"
fi
$ docker compose -f docker-compose-llm.yaml up -d
$ docker compose -f docker-compose-llm.yaml up -d --force-recreate
# 測試 searxng 網路
$ docker exec -it caddy ping searxng
# 瀏覽器開啟 http://localhost:8888
$ curl "http://localhost:8888/search?q=test&format=json"
$ curl "http://192.168.0.108:8888/search?q=test&format=json"
$ curl "https://www.fwwrcom.com.tw/searxng/search?q=test&format=json"
$ curl "http://192.168.0.108:8888/"
$ curl "https://www.fwwrcom.com.tw/searxng/"
$ curl http://192.168.0.108:4000/v1/models \
-H "Authorization: Bearer asdfasdf23"
$ curl -s http://192.168.0.108:4000/v1/chat/completions \
-H "Authorization: Bearer asdfasdf23" \
-H "Content-Type: application/json" \
-d '{
"model": "Qwen3.5-35B-A3B",
"messages": [{"role": "user", "content": "請你自我介紹"}],
"max_tokens": 64
}'
$ curl -i https://www.fwwrcom.com.tw/litellm/v1/chat/completions \
-H "Authorization: Bearer asdfasdf23" \
-H "Content-Type: application/json" \
-d '{
"model": "Qwen3.5-35B-A3B",
"messages": [{"role": "user", "content": "請你自我介紹"}],
"max_tokens": 64
}'
沒有留言:
張貼留言