網頁

2026年3月19日 星期四

Ollama 之 Open-webui + Searxng + Redis + Caddy

參考 https://forums.developer.nvidia.com/t/playbook-1-open-webui-searxng-private-web-search-on-dgx-spark/359578
參考 https://forums.developer.nvidia.com/t/building-local-hybrid-llms-on-dgx-spark-that-outperform-top-cloud-models/359569
參考 https://forums.developer.nvidia.com/t/dgx-spark-rag-on-docker/363125

$ > searxng/limiter.toml
$ vi searxng/settings.yml
use_default_settings: true

search:
  formats:
    - html
    - json

server:
  # 安全起見,secret_key 可用 openssl rand -hex 32 產生
  secret_key: "c199725f396362fd99ad0e3239fbb5be9d01c04083cffb7e16d50301c67288ee"
  limiter: false
  image_proxy: true

valkey:
  url: redis://redis:6379/0

engines:
  - name: ahmia
    disabled: true
  - name: torch
    disabled: true

$ vi docker-compose-llm.yaml
version: '3.8'
services:
  caddy:
    container_name: caddy
    image: docker.io/library/caddy:2-alpine
    networks:
      - webnet
    ports:
      - "80:80"   # HTTP 埠號
      - "443:443" # HTTPS 埠號
      - "443:443/udp" # HTTP/3 支援
    restart: unless-stopped
    volumes:
      - ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro
      - caddy-data:/data:rw
      - caddy-config:/config:rw
    environment:
      - SEARXNG_HOSTNAME=${SEARXNG_HOSTNAME:-localhost}
      - SEARXNG_TLS=${LETSENCRYPT_EMAIL:-internal}

  redis:
    container_name: redis
    image: docker.io/valkey/valkey:8-alpine
    command: valkey-server --save 30 1 --loglevel warning
    restart: unless-stopped
    networks:
      - webnet
    volumes:
      - valkey-data2:/data

  searxng:
    container_name: searxng
    image: docker.io/searxng/searxng:latest
    restart: unless-stopped
    networks:
      - webnet
    ports:
      - "0.0.0.0:8888:8080"
    volumes:
      - ./searxng:/etc/searxng:rw
      - searxng-log:/var/cache/searxng:rw
    environment:
      - SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/searxng/
      #- SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/

  litellm:
    image: ghcr.io/berriai/litellm:main-latest
    container_name: litellm
    restart: unless-stopped
    ports:
      - "4000:4000"
    volumes:
      - ./litellm_config.yaml:/app/config.yaml:ro
    command: --config /app/config.yaml --detailed_debug --num_workers 4
    networks:
      - webnet
    environment:
      - LITELLM_MASTER_KEY=${LITELLM_KEY}
    env_file:
      - .env

  open-webui:
    image: ghcr.io/open-webui/open-webui:cuda
    container_name: open-webui
    restart: unless-stopped
    ports:
      - "8080:8080"
    volumes:
      - open-webui:/app/backend/data
      - ${HOME_CACHE:-~/.cache}/huggingface:/root/.cache/huggingface
    environment:
      - ENABLE_WEB_SEARCH=true
      - WEB_SEARCH_ENGINE=searxng
      - SEARXNG_URL=http://searxng:8080
      - OPENAI_API_BASE_URL=http://litellm:4000/v1
      - OPENAI_API_KEY=${LITELLM_KEY:-0p3n-w3bu!}
    networks:
      - webnet
      
networks:
  webnet:
    external: true

volumes:
  caddy-data:
  caddy-config:
  valkey-data2:
  searxng-log:
  open-webui:

$ vi caddy/Caddyfile
{$SEARXNG_HOSTNAME} {
    encode gzip zstd
    header {
        Strict-Transport-Security "max-age=31536000;"
        X-Content-Type-Options "nosniff"
        X-Frame-Options "SAMEORIGIN"
        Referrer-Policy "no-referrer"
    }
    #handle /searxng* {
    #    uri strip_prefix /searxng
    #    reverse_proxy searxng:8080
    #}
    #handle /litellm* {
    #    uri strip_prefix /litellm
    #    reverse_proxy litellm:4000
    #}
    handle_path /searxng* {
        reverse_proxy searxng:8080
    }
    handle_path /litellm* {
        reverse_proxy litellm:4000
    }
    handle {
        reverse_proxy open-webui:8080
    }
}

$ vi litellm_config.yaml
model_list:
  - model_name: Nemotron-3-Nano-30B-A3B # 這是你在 Open WebUI 選單中會看到的名稱
    litellm_params:
      model: openai/Nemotron-3-Nano-30B-A3B  # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
      api_base: http://vllm-Nemotron-3-Nano-30B-A3B:8000/v1 # vLLM 的 API 地址
      health_check_url: http://vllm-Nemotron-3-Nano-30B-A3B:8000/health
      api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
      rpm: 10 # 每分鐘請求限制 (選填)
  - model_name: Qwen3.5-35B-A3B # 這是你在 Open WebUI 選單中會看到的名稱
    litellm_params:
      model: openai/Qwen3.5-35B-A3B  # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
      api_base: http://vllm-Qwen3.5-35B-A3B:8000/v1 # vLLM 的 API 地址
      health_check_url: http://vllm-Qwen3.5-35B-A3B:8000/health
      api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
      rpm: 10 # 每分鐘請求限制 (選填)
  - model_name: GLM-4.7-Flash # 這是你在 Open WebUI 選單中會看到的名稱
    litellm_params:
      model: openai/GLM-4.7-Flash  # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
      api_base: http://vllm-GLM-4.7-Flash:8000/v1 # vLLM 的 API 地址
      health_check_url: http://vllm-GLM-4.7-Flash:8000/health
      api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
      rpm: 10 # 每分鐘請求限制 (選填)
  - model_name: Qwen3.5-9B # 這是你在 Open WebUI 選單中會看到的名稱
    litellm_params:
      model: openai/Qwen3.5-9B  # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
      api_base: http://192.168.0.107:8080/v1 # vLLM 的 API 地址
      api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
      rpm: 10 # 每分鐘請求限制 (選填)

litellm_settings:
  drop_params: True       # 如果 Open WebUI 傳送了 vLLM 不支援的參數,自動剔除避免報錯
  set_verbose: False      # 若要除錯可改為 True 查看詳細日誌

#general_settings:
#  #master_key: ${LITELLM_KEY} # 對應你 .env 裡的 OPENAI_API_KEY (0p3n-w3bu!)
#  master_key: asdfasdf23

$ vi .env
SEARXNG_HOSTNAME=www.fwwrcom.tw
LETSENCRYPT_EMAIL=ewr@gmail.com
HOME_CACHE=/home/spark/.cache
LITELLM_KEY=asdfasdf23

$ vi sta_llm.sh
#!/bin/bash

# 1. 檢查 .env 檔案是否存在 (避免啟動失敗)
if [ ! -f .env ]; then
    echo "❌ 錯誤: 找不到 .env 檔案,請先建立它。"
    exit 1
fi

# 2. 執行 Docker Compose
echo "🚀 正在啟動 LLM 服務..."
docker compose -f docker-compose-llm.yaml up -d

# 3. 檢查啟動狀態
if [ $? -eq 0 ]; then
    echo "✅ 服務已成功在背景執行!"
    echo "使用 'docker compose -f docker-compose-llm.yaml logs -f' 查看全部日誌。"
    echo "使用 'docker compose -f docker-compose-llm.yaml logs -f litellm' 查看 litellm 日誌。"
else
    echo "❌ 啟動失敗,請檢查配置。"
fi


$ docker compose -f docker-compose-llm.yaml up -d
$ docker compose -f docker-compose-llm.yaml up -d --force-recreate
# 測試 searxng 網路
$ docker exec -it caddy ping searxng
# 瀏覽器開啟 http://localhost:8888
$ curl "http://localhost:8888/search?q=test&format=json"
$ curl "http://192.168.0.108:8888/search?q=test&format=json"
$ curl "https://www.fwwrcom.com.tw/searxng/search?q=test&format=json"

$ curl "http://192.168.0.108:8888/"
$ curl "https://www.fwwrcom.com.tw/searxng/"

$ curl http://192.168.0.108:4000/v1/models \
  -H "Authorization: Bearer asdfasdf23"
$ curl -s http://192.168.0.108:4000/v1/chat/completions \
  -H "Authorization: Bearer asdfasdf23" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "Qwen3.5-35B-A3B",
    "messages": [{"role": "user", "content": "請你自我介紹"}],
    "max_tokens": 64
  }'

$ curl -i https://www.fwwrcom.com.tw/litellm/v1/chat/completions \
  -H "Authorization: Bearer asdfasdf23" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "Qwen3.5-35B-A3B",
    "messages": [{"role": "user", "content": "請你自我介紹"}],
    "max_tokens": 64
  }'


沒有留言:

張貼留言