網頁

2026年3月19日 星期四

DGX Spark 之 ConnectX-7

原先在機器上都可發現 ConnectX-7 的蹤跡
但在一次系統更新後,發現 ConnectX-7 消失了

經查詢發現是經由 /etc/nvidia/cx7-hotplug-enabled 檔案,控制是否啟動 ConnectX-7
spark@gx10-spark:~$ ls -al /etc/nvidia/cx7-hotplug-enabled 
-rw-r--r-- 1 root root 278 Mar 12 10:06 /etc/nvidia/cx7-hotplug-enabled
spark@gx10-spark:~$ cat /etc/nvidia/cx7-hotplug-enabled 
# CX7 Hotplug Configuration
# This file controls CX7 hotplug functionality on DGX Spark systems.
# Presence of this file: enables hotplug
# Absence of this file: disables hotplug (safe default)
# To disable hotplug, remove this file or uninstall dgx-spark-mlnx-hotplug package.
spark@gx10-spark:~$ 


spark@gx10-spark:~$ ifconfig
br-060c917749b1: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500
        inet 172.22.0.1  netmask 255.255.0.0  broadcast 172.22.255.255
        ether 52:84:c6:04:50:2e  txqueuelen 0  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

br-69dc297fd5bc: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500
        inet 172.21.0.1  netmask 255.255.0.0  broadcast 172.21.255.255
        ether ae:8d:e1:e3:d8:a4  txqueuelen 0  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

docker0: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500
        inet 172.17.0.1  netmask 255.255.0.0  broadcast 172.17.255.255
        ether fa:8e:d1:c2:3a:4c  txqueuelen 0  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 31 overruns 0  carrier 0  collisions 0

enP2p1s0f0np0: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500
        ether 30:c5:99:40:83:24  txqueuelen 1000  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

enP2p1s0f1np1: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500
        ether 30:c5:99:40:83:25  txqueuelen 1000  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

enP7s7: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.0.108  netmask 255.255.255.0  broadcast 192.168.0.255
        inet6 fe80::2b9:90bd:9766:aed  prefixlen 64  scopeid 0x20<link>
        ether 30:c5:99:40:83:1f  txqueuelen 1000  (Ethernet)
        RX packets 31075  bytes 1955490 (1.9 MB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 44295  bytes 34158888 (34.1 MB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
        device interrupt 81  

enp1s0f0np0: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500
        ether 30:c5:99:40:83:20  txqueuelen 1000  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

enp1s0f1np1: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500
        ether 30:c5:99:40:83:21  txqueuelen 1000  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>
        loop  txqueuelen 1000  (Local Loopback)
        RX packets 46099  bytes 33381287 (33.3 MB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 46099  bytes 33381287 (33.3 MB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

wlP9s9: flags=4099<UP,BROADCAST,MULTICAST>  mtu 1500
        ether 50:bb:b5:a4:24:84  txqueuelen 1000  (Ethernet)
        RX packets 0  bytes 0 (0.0 B)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 0  bytes 0 (0.0 B)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

$ sudo lshw -class network -short
H/W path Device Class Description
========================================================
/0/100/0 enp1s0f0np0 network MT2910 Family [ConnectX-7]
/0/100/0.1 enp1s0f1np1 network MT2910 Family [ConnectX-7]
/0/101/0 enP2p1s0f0np0 network MT2910 Family [ConnectX-7]
/0/101/0.1 enP2p1s0f1np1 network MT2910 Family [ConnectX-7]
/0/103/0      enP7s7          network        Realtek Semiconductor Co., Ltd.
/0/104/0      wlP9s9          network        MEDIATEK Corp.

spark@gx10-spark:~$ sudo fwupdmgr get-devices
ASUSTeK COMPUTER INC. GX10
├─ESL01TBTLCZ-27J2-TYN:
│     Device ID:          7de5ffdca08fa52d95fd4bb42aa5d07a4b35d2dd
│     Summary:            NVM Express solid state drive
│     Current version:    ERFM12.0
│     Vendor:             Phison Electronics Corporation (NVME:0x1987)
│     Serial Number:      511250702501001126
│     GUIDs:              3d29962d-a81b-5b11-bd43-aec65c7e9e60 ← NVME\VEN_1987&DEV_5027
│                         18fbf8a9-d429-57e9-b174-ea8afd7e6877 ← NVME\VEN_1987&DEV_5027&SUBSYS_19875027
│                         ed9808fe-4f78-5d97-ab19-c0e627af31bf ← ESL01TBTLCZ-27J2-TYN
│     Device Flags:       • Internal device
│                         • Updatable
│                         • System requires external power source
│                         • Needs shutdown after installation
│                         • Device is usable for the duration of the update
│   
├─Embedded Controller:
│     Device ID:          798397787f4afcf1e2bb8575cb19630f12180584
│     Summary:            UEFI System Resource Table device (Updated via capsule-on-disk)
│     Current version:    0x02000004
│     Minimum Version:    0x01000000
│     Vendor:             Asus (DMI:American Megatrends International, LLC.)
│     Update State:       Success
│     GUID:               c3cccaf0-9a3e-4ee2-992e-9f0cf9b55fa0
│     Device Flags:       • Internal device
│                         • Updatable
│                         • System requires external power source
│                         • Supported on remote server
│                         • Needs a reboot after installation
│                         • Device is usable for the duration of the update
│                         • Signed Payload
│     Device Requests:    • Message
│   
├─MT2910 Family [ConnectX-7]:
│     Device ID:          ce4c74a5188d5b9cdb1e72ed32dad2d313c1c999
│     Current version:    01
│     Vendor:             Mellanox Technologies (PCI:0x15B3, PCI:0x10DE)
│     GUIDs:              12029307-5bb1-5200-99a5-536f1be9d081 ← PCI\VEN_15B3&DEV_1021
│                         b5e95689-ad65-5e57-8778-897f04396256 ← PCI\VEN_15B3&DEV_1021&SUBSYS_15B321EC
│                         cfc0de0b-adb3-5060-ba22-e4010a78368f ← PCI\VEN_10DE&DEV_22CE
│                         59007998-a3d7-54a3-b30e-eb3b77e2f351 ← PCI\VEN_10DE&DEV_22CE&SUBSYS_15B321EC
│     Device Flags:       • Internal device
│                         • Cryptographic hash verification is available
│   
├─MT2910 Family [ConnectX-7]:
│     Device ID:          7d29f2075dcafb4488b40c73f199cf46bb76bddb
│     Current version:    01
│     Vendor:             Mellanox Technologies (PCI:0x15B3, PCI:0x10DE)
│     GUIDs:              12029307-5bb1-5200-99a5-536f1be9d081 ← PCI\VEN_15B3&DEV_1021
│                         b5e95689-ad65-5e57-8778-897f04396256 ← PCI\VEN_15B3&DEV_1021&SUBSYS_15B321EC
│                         cfc0de0b-adb3-5060-ba22-e4010a78368f ← PCI\VEN_10DE&DEV_22CE
│                         59007998-a3d7-54a3-b30e-eb3b77e2f351 ← PCI\VEN_10DE&DEV_22CE&SUBSYS_15B321EC
│     Device Flags:       • Internal device
│                         • Cryptographic hash verification is available
│   
├─MT2910 Family [ConnectX-7]:
│     Device ID:          024ec185fcba9289f4336862423686455165d68a
│     Current version:    01
│     Vendor:             Mellanox Technologies (PCI:0x15B3, PCI:0x10DE)
│     GUIDs:              12029307-5bb1-5200-99a5-536f1be9d081 ← PCI\VEN_15B3&DEV_1021
│                         b5e95689-ad65-5e57-8778-897f04396256 ← PCI\VEN_15B3&DEV_1021&SUBSYS_15B321EC
│                         cfc0de0b-adb3-5060-ba22-e4010a78368f ← PCI\VEN_10DE&DEV_22CE
│                         59007998-a3d7-54a3-b30e-eb3b77e2f351 ← PCI\VEN_10DE&DEV_22CE&SUBSYS_15B321EC
│     Device Flags:       • Internal device
│                         • Cryptographic hash verification is available
│   
├─MT2910 Family [ConnectX-7]:
│     Device ID:          fd0f3bbe941288a4198e7476ae94fd87b6e58b15
│     Current version:    01
│     Vendor:             Mellanox Technologies (PCI:0x15B3, PCI:0x10DE)
│     GUIDs:              12029307-5bb1-5200-99a5-536f1be9d081 ← PCI\VEN_15B3&DEV_1021
│                         b5e95689-ad65-5e57-8778-897f04396256 ← PCI\VEN_15B3&DEV_1021&SUBSYS_15B321EC
│                         cfc0de0b-adb3-5060-ba22-e4010a78368f ← PCI\VEN_10DE&DEV_22CE
│                         59007998-a3d7-54a3-b30e-eb3b77e2f351 ← PCI\VEN_10DE&DEV_22CE&SUBSYS_15B321EC
│     Device Flags:       • Internal device
│                         • Cryptographic hash verification is available
│   
├─SV300S37A48:
│     Device ID:          df2bc95dfb8bd5d535f85cdf9ad662d25bc8bda6
│     Summary:            SCSI device
│     Current version:    8a
│     Vendor:             KINGSTON (SCSI:KINGSTON)
│     Serial Number:      50026b726204f2e9
│     GUIDs:              074d3e05-f8d3-5fbe-8b98-b37df122f06c ← SCSI\VEN_KINGSTON&DEV_SV300S37A48
│                         167b2441-5d5d-538c-bc01-b49059831d58 ← SCSI\VEN_KINGSTON&DEV_SV300S37A48&REV_8a
│     Device Flags:       • Internal device
│   
├─UEFI Device Firmware:
│     Device ID:          1df564c6ffffdc355893f9d0ec29813e0a1141b5
│     Summary:            UEFI System Resource Table device (Updated via capsule-on-disk)
│     Current version:    0x03000005
│     Minimum Version:    0x02000000
│     Vendor:             Asus (DMI:American Megatrends International, LLC.)
│     Update State:       Success
│     GUID:               f1392323-3920-4598-a932-ef06360cf403
│     Device Flags:       • Internal device
│                         • Updatable
│                         • System requires external power source
│                         • Supported on remote server
│                         • Needs a reboot after installation
│                         • Device is usable for the duration of the update
│                         • Signed Payload
│     Device Requests:    • Message
│   
├─UEFI Device Firmware:
│     Device ID:          7fd0410f4194ca73faaa60d3392bcd91ef4ee070
│     Summary:            UEFI System Resource Table device (Updated via capsule-on-disk)
│     Current version:    0x00000507
│     Minimum Version:    0x00000309
│     Vendor:             Asus (DMI:American Megatrends International, LLC.)
│     Update State:       Success
│     GUID:               fe75bb1c-5ccc-4936-b603-cc7cf945dc30
│     Device Flags:       • Internal device
│                         • Updatable
│                         • System requires external power source
│                         • Supported on remote server
│                         • Needs a reboot after installation
│                         • Device is usable for the duration of the update
│                         • Signed Payload
│     Device Requests:    • Message
│   
├─UEFI dbx:
│     Device ID:          362301da643102b9f38477387e2193e57abaa590
│     Summary:            UEFI revocation database
│     Current version:    20230501
│     Minimum Version:    20230501
│     Vendor:             UEFI:Microsoft
│     Install Duration:   1 second
│     GUIDs:              026c46fa-db36-5397-883d-047809df980a ← UEFI\CRT_103560ADA2E78C48DDA52A2D71A00FC1D30F469E1E20332FDA01CDE9B796B049&ARCH_AA64
│                         67d35028-ca5b-5834-834a-f97380381082 ← UEFI\CRT_A1117F516A32CEFCBA3F2D1ACE10A87972FD6BBE8FE0D0B996E09E65D802A503&ARCH_AA64
│                         10ec82f4-ff64-5362-9e5d-688febf5dbb0 ← UEFI\CRT_3CD3F0309EDAE228767A976DD40D9F4AFFC4FBD5218F2E8CC3C9DD97E8AC6F9D&ARCH_AA64
│     Device Flags:       • Internal device
│                         • Updatable
│                         • Needs a reboot after installation
│                         • Device is usable for the duration of the update
│                         • Only version upgrades are allowed
│                         • Signed Payload
│   
└─Unifying Receiver:
      Device ID:          ecbb086d2f75882bb4cd0f6bbd0df5ffba00cd39
      Summary:            Miniaturised USB wireless receiver
      Current version:    RQR12.10_B0032
      Bootloader Version: BOT01.02_B0014
      Vendor:             Logitech, Inc. (HIDRAW:0x046D, USB:0x046D)
      Install Duration:   30 seconds
      GUIDs:              9d131a0c-a606-580f-8eda-80587250b8d6
                          279ed287-3607-549e-bacc-f873bb9838c4 ← HIDRAW\VEN_046D&DEV_C52B
      Device Flags:       • Updatable
                          • Supported on remote server
                          • Unsigned Payload



orin 上,之空間不足

# 找尋占空間之檔案
$ sudo du -sh /* 2>/dev/null | sort -hr | head -n 10
$ sudo du -sh /home/* 2>/dev/null | sort -hr | head -n 10

# 清空 Docker 的 cache
$ docker system df
$ docker builder prune

# Docker 資料搬家
$ sudo systemctl stop docker
$ sudo systemctl stop docker.socket
$ sudo rsync -aqxP /var/lib/docker/ /mnt/Data/docker_data
$ sudo vi /etc/docker/daemon.json 
{
    "runtimes": {
        "nvidia": {
            "args": [],
            "path": "nvidia-container-runtime"
        }
    },
    "data-root": "/mnt/Data/docker-data",
    "iptables": false
}
$ sudo systemctl start docker
$ sudo systemctl start docker.socket

Orin 上安裝 Ollama 相關套件

參考 https://yingrenn.blogspot.com/2026/03/ollama-open-webui-searxng-redis-caddy.html

$ >searxng/limiter.toml
$ vi searxng/settings.yml
use_default_settings: true

search:
  formats:
    - html
    - json

server:
  # 安全起見,secret_key 可用 openssl rand -hex 32 產生
  secret_key: "c199725f396362fd99ad0e3239fbb5be9d01c04083cffb7e16d50301c67288ee"
  limiter: false
  image_proxy: true
  real_ip: true # 如果你有用 Nginx/Caddy,這行必須為 true
  limiter: false # 關閉內建限制器,limiter.toml 保持空檔

# 處理 Bot 檢測報錯的核心設定
bot_detection:
  ip_limit:
    filter_link_local: true

valkey:
  url: redis://redis:6379/0

engines:
  - name: ahmia
    disabled: true
  - name: torch
    disabled: true

$ vi Caddyfile
{$SEARXNG_HOSTNAME} {
    encode gzip zstd
    header {
        Strict-Transport-Security "max-age=31536000;"
        X-Content-Type-Options "nosniff"
        X-Frame-Options "SAMEORIGIN"
        Referrer-Policy "no-referrer"
    }
    #handle /searxng* {
    #    uri strip_prefix /searxng
    #    reverse_proxy searxng:8080
    #}
    #handle /litellm* {
    #    uri strip_prefix /litellm
    #    reverse_proxy litellm:4000
    #}
    handle_path /searxng* {
        reverse_proxy searxng:8080
    }
    handle_path /litellm* {
        reverse_proxy litellm:4000
    }
    handle {
        reverse_proxy open-webui:8080
    }
}

$ vi docker-compose-llm.yaml
version: '3.8'
services:
  caddy:
    container_name: caddy
    image: docker.io/library/caddy:2-alpine
    networks:
      - webnet
    ports:
      - "80:80"   # HTTP 埠號
      - "443:443" # HTTPS 埠號
      - "443:443/udp" # HTTP/3 支援
    restart: unless-stopped
    volumes:
      - ./Caddyfile:/etc/caddy/Caddyfile:ro
      - caddy-data:/data:rw
      - caddy-config:/config:rw
    environment:
      - SEARXNG_HOSTNAME=${SEARXNG_HOSTNAME:-localhost}
      - SEARXNG_TLS=${LETSENCRYPT_EMAIL:-internal}

  redis:
    container_name: redis
    image: docker.io/valkey/valkey:8-alpine
    command: valkey-server --save 30 1 --loglevel warning
    restart: unless-stopped
    networks:
      - webnet
    volumes:
      - valkey-data2:/data

  searxng:
    container_name: searxng
    image: docker.io/searxng/searxng:latest
    restart: unless-stopped
    networks:
      - webnet
    ports:
      - "0.0.0.0:8888:8080"
    volumes:
      - ./searxng:/etc/searxng:rw
      - searxng-log:/var/cache/searxng:rw
    environment:
      - SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/searxng/
      #- SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/

  litellm:
    image: ghcr.io/berriai/litellm:main-latest
    container_name: litellm
    restart: unless-stopped
    ports:
      - "4000:4000"
    volumes:
      - ./litellm_config.yaml:/app/config.yaml:ro
    command: --config /app/config.yaml --detailed_debug --num_workers 4
    networks:
      - webnet
    environment:
      - LITELLM_MASTER_KEY=${LITELLM_KEY}
    env_file:
      - .env

  open-webui:
    image: ghcr.io/open-webui/open-webui:cuda
    container_name: open-webui
    restart: unless-stopped
    ports:
      - "8080:8080"
    volumes:
      - open-webui:/app/backend/data
      - ${HOME_CACHE:-~/.cache}/huggingface:/root/.cache/huggingface
    environment:
      - ENABLE_WEB_SEARCH=true
      - WEB_SEARCH_ENGINE=searxng
      - SEARXNG_URL=http://searxng:8080
      - OPENAI_API_BASE_URL=http://litellm:4000/v1
      - OPENAI_API_KEY=${LITELLM_KEY:-0p3n-w3bu!}
    networks:
      - webnet
      
networks:
  webnet:
    external: true

volumes:
  caddy-data:
  caddy-config:
  valkey-data2:
  searxng-log:
  open-webui:

$ vi .env
SEARXNG_HOSTNAME=www.abcd.com.tw
LETSENCRYPT_EMAIL=abcd@gmail.com
HOME_CACHE=/home/mic-733ao/.cache
LITELLM_KEY=abcd


$ vi sta_llm.sh
#!/bin/bash

# 1. 檢查 .env 檔案是否存在 (避免啟動失敗)
if [ ! -f .env ]; then
    echo "❌ 錯誤: 找不到 .env 檔案,請先建立它。"
    exit 1
fi

# 2. 執行 Docker Compose
echo "🚀 正在啟動 LLM 服務..."
docker compose -f docker-compose-llm.yaml up -d

# 3. 檢查啟動狀態
if [ $? -eq 0 ]; then
    echo "✅ 服務已成功在背景執行!"
    echo "使用 'docker compose -f docker-compose-llm.yaml logs -f' 查看全部日誌。"
    echo "使用 'docker compose -f docker-compose-llm.yaml logs -f litellm' 查看 litellm 日誌。"
else
    echo "❌ 啟動失敗,請檢查配置。"
fi

$ docker pull docker.io/library/caddy:2-alpine
$ docker pull docker.io/valkey/valkey:8-alpine
$ docker pull docker.io/searxng/searxng:latest
$ docker pull ghcr.io/berriai/litellm:main-latest
$ docker pull ghcr.io/open-webui/open-webui:cuda

$ docker network create webnet
$ docker compose -f docker-compose-llm.yaml up -d
$ docker compose -f docker-compose-llm.yaml up -d --force-recreate

# 瀏覽器開啟 open-webui
# 右上圖像/Admin Panel/Settings/Web Search
Web Search: ON
Web Search Engine: searxng
Searxng Query URL: http://searxng:8080
Searxng search language: all

# 測試 searxng 網路
$ docker exec -it caddy ping searxng
# 瀏覽器開啟 http://localhost:8888
$ curl "http://localhost:8888/search?q=test&format=json"
$ curl "http://192.168.0.107:8888/search?q=test&format=json"
$ curl "https://www.abcd.com.tw/searxng/search?q=test&format=json"

$ curl -s http://localhost:8000/v1/chat/completions \
  -H "Authorization: Bearer abcd" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "Qwen3.5-9B",
    "messages": [{"role": "user", "content": "請你自我介紹"}],
    "max_tokens": 64
  }'
$ curl http://192.168.0.107:4000/v1/models \
  -H "Authorization: Bearer abcd"
$ curl -s http://192.168.0.107:4000/v1/chat/completions \
  -H "Authorization: Bearer abcd" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "Qwen3.5-9B",
    "messages": [{"role": "user", "content": "請你自我介紹"}],
    "max_tokens": 64
  }'








Ollama 之 Open-webui + Searxng + Redis + Caddy

參考 https://forums.developer.nvidia.com/t/playbook-1-open-webui-searxng-private-web-search-on-dgx-spark/359578
參考 https://forums.developer.nvidia.com/t/building-local-hybrid-llms-on-dgx-spark-that-outperform-top-cloud-models/359569
參考 https://forums.developer.nvidia.com/t/dgx-spark-rag-on-docker/363125

$ > searxng/limiter.toml
$ vi searxng/settings.yml
use_default_settings: true

search:
  formats:
    - html
    - json

server:
  # 安全起見,secret_key 可用 openssl rand -hex 32 產生
  secret_key: "c199725f396362fd99ad0e3239fbb5be9d01c04083cffb7e16d50301c67288ee"
  limiter: false
  image_proxy: true

valkey:
  url: redis://redis:6379/0

engines:
  - name: ahmia
    disabled: true
  - name: torch
    disabled: true

$ vi docker-compose-llm.yaml
version: '3.8'
services:
  caddy:
    container_name: caddy
    image: docker.io/library/caddy:2-alpine
    networks:
      - webnet
    ports:
      - "80:80"   # HTTP 埠號
      - "443:443" # HTTPS 埠號
      - "443:443/udp" # HTTP/3 支援
    restart: unless-stopped
    volumes:
      - ./caddy/Caddyfile:/etc/caddy/Caddyfile:ro
      - caddy-data:/data:rw
      - caddy-config:/config:rw
    environment:
      - SEARXNG_HOSTNAME=${SEARXNG_HOSTNAME:-localhost}
      - SEARXNG_TLS=${LETSENCRYPT_EMAIL:-internal}

  redis:
    container_name: redis
    image: docker.io/valkey/valkey:8-alpine
    command: valkey-server --save 30 1 --loglevel warning
    restart: unless-stopped
    networks:
      - webnet
    volumes:
      - valkey-data2:/data

  searxng:
    container_name: searxng
    image: docker.io/searxng/searxng:latest
    restart: unless-stopped
    networks:
      - webnet
    ports:
      - "0.0.0.0:8888:8080"
    volumes:
      - ./searxng:/etc/searxng:rw
      - searxng-log:/var/cache/searxng:rw
    environment:
      - SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/searxng/
      #- SEARXNG_BASE_URL=https://${SEARXNG_HOSTNAME:-localhost}/

  litellm:
    image: ghcr.io/berriai/litellm:main-latest
    container_name: litellm
    restart: unless-stopped
    ports:
      - "4000:4000"
    volumes:
      - ./litellm_config.yaml:/app/config.yaml:ro
    command: --config /app/config.yaml --detailed_debug --num_workers 4
    networks:
      - webnet
    environment:
      - LITELLM_MASTER_KEY=${LITELLM_KEY}
    env_file:
      - .env

  open-webui:
    image: ghcr.io/open-webui/open-webui:cuda
    container_name: open-webui
    restart: unless-stopped
    ports:
      - "8080:8080"
    volumes:
      - open-webui:/app/backend/data
      - ${HOME_CACHE:-~/.cache}/huggingface:/root/.cache/huggingface
    environment:
      - ENABLE_WEB_SEARCH=true
      - WEB_SEARCH_ENGINE=searxng
      - SEARXNG_URL=http://searxng:8080
      - OPENAI_API_BASE_URL=http://litellm:4000/v1
      - OPENAI_API_KEY=${LITELLM_KEY:-0p3n-w3bu!}
    networks:
      - webnet
      
networks:
  webnet:
    external: true

volumes:
  caddy-data:
  caddy-config:
  valkey-data2:
  searxng-log:
  open-webui:

$ vi caddy/Caddyfile
{$SEARXNG_HOSTNAME} {
    encode gzip zstd
    header {
        Strict-Transport-Security "max-age=31536000;"
        X-Content-Type-Options "nosniff"
        X-Frame-Options "SAMEORIGIN"
        Referrer-Policy "no-referrer"
    }
    #handle /searxng* {
    #    uri strip_prefix /searxng
    #    reverse_proxy searxng:8080
    #}
    #handle /litellm* {
    #    uri strip_prefix /litellm
    #    reverse_proxy litellm:4000
    #}
    handle_path /searxng* {
        reverse_proxy searxng:8080
    }
    handle_path /litellm* {
        reverse_proxy litellm:4000
    }
    handle {
        reverse_proxy open-webui:8080
    }
}

$ vi litellm_config.yaml
model_list:
  - model_name: Nemotron-3-Nano-30B-A3B # 這是你在 Open WebUI 選單中會看到的名稱
    litellm_params:
      model: openai/Nemotron-3-Nano-30B-A3B  # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
      api_base: http://vllm-Nemotron-3-Nano-30B-A3B:8000/v1 # vLLM 的 API 地址
      health_check_url: http://vllm-Nemotron-3-Nano-30B-A3B:8000/health
      api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
      rpm: 10 # 每分鐘請求限制 (選填)
  - model_name: Qwen3.5-35B-A3B # 這是你在 Open WebUI 選單中會看到的名稱
    litellm_params:
      model: openai/Qwen3.5-35B-A3B  # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
      api_base: http://vllm-Qwen3.5-35B-A3B:8000/v1 # vLLM 的 API 地址
      health_check_url: http://vllm-Qwen3.5-35B-A3B:8000/health
      api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
      rpm: 10 # 每分鐘請求限制 (選填)
  - model_name: GLM-4.7-Flash # 這是你在 Open WebUI 選單中會看到的名稱
    litellm_params:
      model: openai/GLM-4.7-Flash  # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
      api_base: http://vllm-GLM-4.7-Flash:8000/v1 # vLLM 的 API 地址
      health_check_url: http://vllm-GLM-4.7-Flash:8000/health
      api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
      rpm: 10 # 每分鐘請求限制 (選填)
  - model_name: Qwen3.5-9B # 這是你在 Open WebUI 選單中會看到的名稱
    litellm_params:
      model: openai/Qwen3.5-9B  # 這裡填寫你 vLLM 載入的模型完整路徑或名稱
      api_base: http://192.168.0.107:8080/v1 # vLLM 的 API 地址
      api_key: "not-needed" # vLLM 預設不需 key,但 litellm 要求必填
      rpm: 10 # 每分鐘請求限制 (選填)

litellm_settings:
  drop_params: True       # 如果 Open WebUI 傳送了 vLLM 不支援的參數,自動剔除避免報錯
  set_verbose: False      # 若要除錯可改為 True 查看詳細日誌

#general_settings:
#  #master_key: ${LITELLM_KEY} # 對應你 .env 裡的 OPENAI_API_KEY (0p3n-w3bu!)
#  master_key: asdfasdf23

$ vi .env
SEARXNG_HOSTNAME=www.fwwrcom.tw
LETSENCRYPT_EMAIL=ewr@gmail.com
HOME_CACHE=/home/spark/.cache
LITELLM_KEY=asdfasdf23

$ vi sta_llm.sh
#!/bin/bash

# 1. 檢查 .env 檔案是否存在 (避免啟動失敗)
if [ ! -f .env ]; then
    echo "❌ 錯誤: 找不到 .env 檔案,請先建立它。"
    exit 1
fi

# 2. 執行 Docker Compose
echo "🚀 正在啟動 LLM 服務..."
docker compose -f docker-compose-llm.yaml up -d

# 3. 檢查啟動狀態
if [ $? -eq 0 ]; then
    echo "✅ 服務已成功在背景執行!"
    echo "使用 'docker compose -f docker-compose-llm.yaml logs -f' 查看全部日誌。"
    echo "使用 'docker compose -f docker-compose-llm.yaml logs -f litellm' 查看 litellm 日誌。"
else
    echo "❌ 啟動失敗,請檢查配置。"
fi


$ docker compose -f docker-compose-llm.yaml up -d
$ docker compose -f docker-compose-llm.yaml up -d --force-recreate
# 測試 searxng 網路
$ docker exec -it caddy ping searxng
# 瀏覽器開啟 http://localhost:8888
$ curl "http://localhost:8888/search?q=test&format=json"
$ curl "http://192.168.0.108:8888/search?q=test&format=json"
$ curl "https://www.fwwrcom.com.tw/searxng/search?q=test&format=json"

$ curl "http://192.168.0.108:8888/"
$ curl "https://www.fwwrcom.com.tw/searxng/"

$ curl http://192.168.0.108:4000/v1/models \
  -H "Authorization: Bearer asdfasdf23"
$ curl -s http://192.168.0.108:4000/v1/chat/completions \
  -H "Authorization: Bearer asdfasdf23" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "Qwen3.5-35B-A3B",
    "messages": [{"role": "user", "content": "請你自我介紹"}],
    "max_tokens": 64
  }'

$ curl -i https://www.fwwrcom.com.tw/litellm/v1/chat/completions \
  -H "Authorization: Bearer asdfasdf23" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "Qwen3.5-35B-A3B",
    "messages": [{"role": "user", "content": "請你自我介紹"}],
    "max_tokens": 64
  }'


2026年3月12日 星期四

DGX Spark 如何避免 OOM 當機

# 參考 https://forums.developer.nvidia.com/t/mitigating-oom-system-freezes-on-uma-based-single-board-computers/362769
# 另外可參考 DGX Spark 之溫度 https://yingrenn.blogspot.com/2026/02/dgx-spark.html

# 安裝輕量級的 Dropbear SSH
$ sudo apt update && sudo apt install dropbear
$ sudo vi /etc/default/dropbear
NO_START=0
DROPBEAR_PORT=2222

$ sudo systemctl enable dropbear
$ sudo systemctl start dropbear

# Standard connection (OpenSSH)
$ ssh spark@<your-ip>
# Emergency connection (Dropbear)
$ ssh spark@<your-ip> -p 2222

# 安裝 earlyoom
$ sudo apt update
$ sudo apt install earlyoom
$ sudo vi /etc/default/earlyoom
EARLYOOM_ARGS="-m 5 -s 10 --avoid 'pipewire|wireplumber|systemd|ssh|journald' --prefer 'vllm|python|triton'"
# This tells earlyoom to intervene when RAM is under 5% AND Swap is under 10%. 
# It will aggressively target vllm or Python scripts over other processes

$ sudo EDITOR=vi systemctl edit earlyoom
### Editing /etc/systemd/system/earlyoom.service.d/override.conf
### Anything between here and the comment below will become the contents of the drop-in file

[Service]
LimitMEMLOCK=infinity
CapabilityBoundingSet=CAP_IPC_LOCK CAP_SYS_NICE CAP_KILL
AmbientCapabilities=CAP_IPC_LOCK CAP_SYS_NICE CAP_KILL
MemoryLock=infinity
OOMScoreAdjust=-1000

### Edits below this comment will be discarded


### /usr/lib/systemd/system/earlyoom.service
# [Unit]
# Description=Early OOM Daemon
# Documentation=man:earlyoom(1) https://github.com/rfjakob/earlyoom
# [Service]
# EnvironmentFile=-/etc/default/earlyoom
# ExecStart=/usr/bin/earlyoom $EARLYOOM_ARGS
# # Run as an unprivileged user with random user id
# DynamicUser=true
# # Allow killing processes and calling mlockall()
# AmbientCapabilities=CAP_KILL CAP_IPC_LOCK
# # We don't need write access anywhere
# ProtectSystem=strict
# # We don't need /home at all, make it inaccessible
# ProtectHome=true
# # earlyoom never exits on it's own, so have systemd
# # restart it should it get killed for some reason.
# Restart=always
# # set memory limits and max tasks number
# TasksMax=10
# MemoryMax=50M
# [Install]
# WantedBy=multi-user.target

$ sudo systemctl daemon-reload
$ sudo systemctl restart earlyoom
$ sudo systemctl status earlyoom

# 查詢 log
$ journalctl -u earlyoom -f

$ cat /etc/systemd/system/earlyoom.service.d/override.conf 
[Service]
LimitMEMLOCK=infinity
CapabilityBoundingSet=CAP_IPC_LOCK CAP_SYS_NICE CAP_KILL
AmbientCapabilities=CAP_IPC_LOCK CAP_SYS_NICE CAP_KILL
MemoryLock=infinity
OOMScoreAdjust=-1000
$ ps aux |grep earlyoom
earlyoom   80791  0.0  0.0   2288  1688 ?        SLs  11:55   0:00 /usr/bin/earlyoom -m 5 -s 10 --avoid pipewire|wireplumber|systemd|ssh|journald --prefer vllm|python|triton
$ cat /proc/$(pgrep earlyoom)/oom_score_adj
-1000

2026年2月9日 星期一

DGX Spark 安裝 piper tts

參考 https://github.com/OHF-Voice/piper1-gpl/tree/main
參考 https://huggingface.co/csukuangfj/vits-piper-zh_CN-huayan-medium
參考 https://huggingface.co/csukuangfj/vits-piper-zh_CN-huayan-x_low

$ uv venv --python 3.13
$ source .venv/bin/activate
$ uv pip install piper-tts
$ uv pip install g2pw
$ uv pip install requests
$ uv pip install torch --index-url https://download.pytorch.org/whl/cu130
$ uv pip install unicode_rbnf
$ uv pip install sentence_stream
$ uv pip install fastapi
$ uv pip install uvicorn
$ uv pip install python-multipart


$ python3 -m piper.download_voices
$ python3 -m piper.download_voices zh_CN-huayan-x_low --download-dir models
$ ls models/
$ python3 -m piper.download_voices zh_CN-huayan-medium --download-dir models
$ python3 -m piper.download_voices zh_CN-chaowen-medium --download-dir models
$ python3 -m piper.download_voices zh_CN-xiao_ya-medium --download-dir models
$ python3 -m piper.download_voices en_US-lessac-medium --download-dir models

# for voice.synthesize, 用 curl 可以成功,但 open-webui 測試失敗
$ curl -X POST http://127.0.0.1:8100/v1/audio/speech \
     -H "Content-Type: application/json" \
     -d '{"input": "你好,這是一段測試語音。"}' \
     --output output.pcm
$ ffmpeg -f s16le -ar 16000 -ac 1 -i output.pcm \
       -codec:a libmp3lame -b:a 128k output.mp3

# voice.synthesize_wav, 用 curl 可以成功,並且 open-webui 測試成功
$ curl -X POST http://127.0.0.1:8100/v1/audio/speech \
  -H "Content-Type: application/json" \
  -d '{"input":"你好,這是 synthesize_wav 測試"}' \
  --output output.wav

2026年2月6日 星期五

DGX Spark 使用 Qwen3-ASR-1.7B

參考 https://huggingface.co/Qwen/Qwen3-ASR-1.7B

$ export HF_TOKEN=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
$ hf download Qwen/Qwen3-ASR-1.7B --local-dir Qwen3-ASR-1.7B
$ hf download Qwen/Qwen3-ASR-0.6B --local-dir Qwen3-ASR-0.6B

$ uv init qwen3-asr
$ cd qwen3-asr/
$ rm .python-version
# 參考 cu130 版本資訊 https://download.pytorch.org/whl/cu130/
$ uv venv --python 3.13
$ source .venv/bin/activate
$ uv pip install -e .
$ uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130
$ uv pip install qwen-asr
$ uv pip uninstall torch torchvision torchaudio
$ uv pip install torch==2.9.1 --index-url https://download.pytorch.org/whl/cu130
$ uv pip install torchvision==0.24.1 --index-url https://download.pytorch.org/whl/cu130
$ uv pip install torchaudio==2.9.1 --index-url https://download.pytorch.org/whl/cu130
# 別想著直接使用下列命令安裝,會導致之後的安裝 flash-attn 失敗
# uv pip install torch==2.9.1 torchvision==0.24.1 torchaudio==2.9.1 --index-url https://download.pytorch.org/whl/cu130

# 安裝 flash-attn, 使用 wheels 要求 python 3.10
# uv pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.6.4/flash_attn-2.8.3%2Bcu130torch2.9-cp310-cp310-linux_aarch64.whl
# 安裝 flash-attn 使用編譯安裝,要求使用 torch 2.9.1
$ uv pip install numpy ninja packaging setuptools wheel
$ export TORCH_CUDA_ARCH_LIST="12.1"
$ export CUDA_HOME=/usr/local/cuda-13.0
$ FLASH_ATTENTION_FORCE_BUILD=TRUE MAX_JOBS=4 uv pip install flash-attn --no-build-isolation --no-cache-dir
# 因為很耗記憶體,所以時常會出現 Out of memory, 可以在很長的 log 中找到 Killed
# 或者使用下列兩個命令查詢確認

# 安裝 qwen-asr, 因為內建 vllm 會去找 CUDA 12 版本,所以失敗
# git clone https://github.com/QwenLM/Qwen3-ASR.git
# uv pip install -e ./Qwen3-ASR[vllm] --no-build-isolation -v
# 直接安裝 vllm
$ uv pip install https://github.com/vllm-project/vllm/releases/download/v0.14.0/vllm-0.14.0+cu130-cp38-abi3-manylinux_2_35_aarch64.whl

$ qwen-asr-serve /mnt/models/Qwen3-ASR-0.6B \
  --allowed-local-media-path /home/spark/DiskD/audio_llm \
  --gpu-memory-utilization 0.5 \
  --host 0.0.0.0 --port 8000
$ curl http://localhost:8000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -X POST \
  -d '{
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "audio_url",
            "audio_url": {
              "url": "file:///home/spark/DiskD/audio_llm/breeze-asr/output.wav"
            }
          },
          {
            "type": "audio_url",
            "audio_url": {
              "url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR-Repo/asr_en.wav"
            }
          }
        ]
      }
    ]
  }' | jq -r '.choices[0].message.content'

$ uvicorn test_c:app --host 0.0.0.0 --port 8000
$ curl -X POST "http://localhost:8000/v1/audio/transcriptions" \
  -F "file=@/home/spark/DiskD/audio_llm/breeze-asr/output.wav" \
  -F "model_name=gpt-4o-mini-transcribe" \
  -F "language=zh" | jq
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  525k  100  2955  100  522k   2355   416k  0:00:01  0:00:01 --:--:--  418k
{
  "results": [
    {
      "language": "Chinese",
      "text": "说书相声这种东西,人靠一张嘴,通过语言的结构,把看官听众吸引到故事里面。在演出的时候,要求你身上的每个动作都必须要有含义。",
      "time_stamps": {
        "items": [
          {
            "text": "说",
            "start_time": 0.08,
            "end_time": 0.32
          },
          {
            "text": "书",
            "start_time": 0.32,
            "end_time": 0.48
          },
          {
            "text": "相",
            "start_time": 0.48,
            "end_time": 0.72
          },
          {
            "text": "声",
            "start_time": 0.72,
            "end_time": 1.04
          },
          ........
          {
            "text": "有",
            "start_time": 11.52,
            "end_time": 11.6
          },
          {
            "text": "含",
            "start_time": 11.6,
            "end_time": 11.84
          },
          {
            "text": "义",
            "start_time": 11.84,
            "end_time": 12.08
          }
        ]
      }
    }
  ]
}

$ curl -X POST "http://localhost:8000/v1/audio/transcriptions" \
  -F "file_url=https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR-Repo/asr_en.wav" \
  -F "model_name=gpt-4o-mini-transcribe" \
  -F "language=en" | jq
$ curl -X POST "http://localhost:8000/v1/audio/transcriptions" \
  -F "file=@/home/spark/DiskD/audio_llm/breeze-asr/output.wav" \
  -F "file_url=https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen3-ASR-Repo/asr_en.wav" \
  -F "model_name=gpt-4o-mini-transcribe" | jq