生活紀錄: DGX Spark 上安裝 GPT-SoVITS

# 原始的 GPT-SoVITS 在 spark 上無法使用 cuda

# 安裝 cpu 版本，以作為參考

$ docker compose -f my_docker-compose.yaml up -d

$ docker exec -it GPT-SoVITS-CU128 bash

(base) root@65f69181a6d7:/workspace/GPT-SoVITS# python --version

Python 3.12.12

(base) root@65f69181a6d7:/workspace/GPT-SoVITS# pip list

Package Version

------------------------ -----------

absl-py 2.4.0

accelerate 1.12.0

aiofiles 23.2.1

aiohappyeyeballs 2.6.1

aiohttp 3.13.3

aiosignal 1.4.0

aliyun-python-sdk-core 2.16.0

aliyun-python-sdk-kms 2.16.5

annotated-doc 0.0.4

annotated-types 0.7.0

antlr4-python3-runtime 4.9.3

anyio 4.12.1

archspec 0.2.5

attrs 25.4.0

audioread 3.1.0

av 16.1.0

backports.zstd 1.3.0

boltons 25.0.0

Brotli 1.2.0

budoux 0.7.0

certifi 2026.2.25

cffi 2.0.0

chardet 5.2.0

charset-normalizer 3.4.4

click 8.3.1

cn2an 0.5.23

colorlog 6.10.1

conda 26.1.0

conda-libmamba-solver 25.11.0

conda-package-handling 2.4.0

conda_package_streaming 0.12.0

contourpy 1.3.3

crcmod 1.7

cryptography 46.0.4

ctranslate2 4.7.1

cuda-bindings 12.9.4

cuda-pathfinder 1.2.2

cycler 0.12.1

decorator 5.2.1

Distance 0.1.3

distro 1.9.0

dnspython 2.8.0

editdistance 0.8.1

einops 0.8.2

einx 0.3.0

email-validator 2.3.0

fast-langdetect 1.0.0

fastapi 0.128.5

fastapi-cli 0.0.20

fastapi-cloud-cli 0.11.0

fastar 0.8.0

faster-whisper 1.2.1

fasttext-predict 0.9.2.4

ffmpeg-python 0.2.0

ffmpy 1.0.0

filelock 3.20.0

flash_attn 2.8.3

flatbuffers 25.12.19

fonttools 4.61.1

frozendict 2.4.7

frozenlist 1.8.0

fsspec 2025.12.0

funasr 1.0.27

future 1.0.0

g2p-en 2.1.0

g2pk2 0.0.3

gradio 4.44.1

gradio_client 1.3.0

grpcio 1.78.0

h11 0.16.0

h2 4.3.0

hf-xet 1.2.0

hpack 4.1.0

httpcore 1.0.9

httptools 0.7.1

httpx 0.28.1

huggingface_hub 0.36.2

hydra-core 1.3.2

hyperframe 6.1.0

idna 3.11

importlib_resources 6.5.2

inflect 7.5.0

jaconv 0.5.0

jamo 0.4.1

jieba 0.42.1

jieba_fast 0.53

Jinja2 3.1.6

jmespath 0.10.0

joblib 1.5.3

jsonpatch 1.33

jsonpointer 3.0.0

kaldiio 2.18.1

kiwisolver 1.4.9

ko-pron 1.3

lazy_loader 0.4

libmambapy 2.5.0

librosa 0.10.2

lightning-utilities 0.15.2

llvmlite 0.46.0

loguru 0.7.3

Markdown 3.10.1

markdown-it-py 4.0.0

MarkupSafe 2.1.5

matplotlib 3.10.8

mdurl 0.1.2

menuinst 2.4.2

modelscope 1.34.0

more-itertools 10.8.0

mpmath 1.3.0

msgpack 1.1.2

multidict 6.7.1

networkx 3.6.1

ninja 1.13.0

nltk 3.9.2

numba 0.63.1

numpy 1.26.4

nvidia-cublas-cu12 12.8.4.1

nvidia-cuda-cupti-cu12 12.8.90

nvidia-cuda-nvrtc-cu12 12.8.93

nvidia-cuda-runtime-cu12 12.8.90

nvidia-cudnn-cu12 9.10.2.21

nvidia-cufft-cu12 11.3.3.83

nvidia-cufile-cu12 1.13.1.3

nvidia-curand-cu12 10.3.9.90

nvidia-cusolver-cu12 11.7.3.90

nvidia-cusparse-cu12 12.5.8.93

nvidia-cusparselt-cu12 0.7.1

nvidia-nccl-cu12 2.27.5

nvidia-nvjitlink-cu12 12.8.93

nvidia-nvshmem-cu12 3.4.5

nvidia-nvtx-cu12 12.8.90

omegaconf 2.3.0

onnxruntime 1.24.1

openai-whisper 20250625

OpenCC 1.2.0

orjson 3.11.7

oss2 2.19.1

packaging 26.0

pandas 2.3.3

peft 0.17.1

pillow 10.4.0

pip 26.0.1

platformdirs 4.5.1

pluggy 1.6.0

pooch 1.9.0

proces 0.1.7

propcache 0.4.1

protobuf 6.33.5

psutil 7.2.2

pycosat 0.6.6

pycparser 2.22

pycryptodome 3.23.0

pydantic 2.10.6

pydantic_core 2.27.2

pydantic-extra-types 2.11.0

pydantic-settings 2.12.0

pydub 0.25.1

Pygments 2.19.2

pynndescent 0.6.0

pyopenjtalk 0.4.1

pyparsing 3.3.2

pypinyin 0.55.0

PySocks 1.7.1

python-dateutil 2.9.0.post0

python-dotenv 1.2.1

python-mecab-ko 1.3.7

python-mecab-ko-dic 2.1.1.post2

python-multipart 0.0.22

pytorch-lightning 2.6.1

pytorch-wpe 0.0.1

pytz 2025.2

PyYAML 6.0.3

regex 2026.1.15

requests 2.32.5

rich 14.3.2

rich-toolkit 0.18.1

rignore 0.7.6

robust-downloader 0.0.2

rotary-embedding-torch 0.8.9

ruamel.yaml 0.18.17

ruamel.yaml.clib 0.2.15

ruff 0.15.0

safetensors 0.7.0

scikit-learn 1.8.0

scipy 1.17.0

semantic-version 2.10.0

sentencepiece 0.2.1

sentry-sdk 2.52.0

setuptools 81.0.0

shellingham 1.5.4

six 1.17.0

soundfile 0.13.1

soxr 1.0.0

split-lang 2.1.1

starlette 0.52.1

sympy 1.14.0

tensorboard 2.20.0

tensorboard-data-server 0.7.2

tensorboardX 2.6.4

threadpoolctl 3.6.0

tiktoken 0.12.0

ToJyutping 3.2.0

tokenizers 0.21.4

tomlkit 0.12.0

torch 2.7.0+cpu

torch-complex 0.4.4

torchaudio 2.7.0

torchmetrics 1.5.0

tqdm 4.67.3

transformers 4.50.0

triton 3.6.0

truststore 0.10.4

typeguard 4.4.4

typer 0.21.1

typing_extensions 4.15.0

typing-inspection 0.4.2

tzdata 2025.3

umap-learn 0.5.11

urllib3 2.6.3

uvicorn 0.40.0

uvloop 0.22.1

watchfiles 1.1.1

websockets 12.0

Werkzeug 3.1.5

wheel 0.46.3

wordsegment 1.3.1

x-transformers 2.16.0

yarl 1.22.0

zstandard 0.25.0

(base) root@65f69181a6d7:/workspace/GPT-SoVITS# python3 -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'CUDA is available: {torch.cuda.is_available()}'); print(f'CUDA version: {torch.version.cuda}')"

PyTorch version: 2.7.0+cpu

CUDA is available: False

CUDA version: None

(base) root@65f69181a6d7:/workspace/GPT-SoVITS# python3 -c "import flash_attn; print(f'Flash Attention version: {flash_attn.__version__}')"

Traceback (most recent call last):

File "<string>", line 1, in <module>

File "/root/conda/lib/python3.12/site-packages/flash_attn/__init__.py", line 3, in <module>

from flash_attn.flash_attn_interface import (

File "/root/conda/lib/python3.12/site-packages/flash_attn/flash_attn_interface.py", line 15, in <module>

import flash_attn_2_cuda as flash_attn_gpu

ImportError: libcudart.so.12: cannot open shared object file: No such file or directory

(base) root@65f69181a6d7:/workspace/GPT-SoVITS# python3 -c "import librosa; print('import librosa ok')"

import librosa ok

(base) root@65f69181a6d7:/workspace/GPT-SoVITS# python3 -c "import transformers; print('Transformers OK')"

Transformers OK

=============================================

# 參考 https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html

# docker image 不要使用 nvcr.io/nvidia/pytorch, 因為沒有 torchaudio

# 使用 nvidia/cuda:13.1.1-devel-ubuntu24.04

$ git clone https://github.com/XXXXRT666/Docker-Base.git

$ cd Docker-Base

$ vi my_Dockerfile

$ docker image rm gpt-sovits-spark:v2p-0217

$ docker builder prune

$ docker build --progress=plain \

--no-cache \

--build-arg CUDA_VERSION=13.1 \

-t gpt-sovits-spark:v2p-0217 \

-f my_Dockerfile . 2>&1 | tee ../aaa.txt

$ docker run --rm -it --gpus all gpt-sovits-spark:v2p-0217 /bin/bash

root@1f79611b4722:/workspace# python3 --version

Python 3.12.3

root@1f79611b4722:/workspace# nvidia-smi

Tue Apr 14 07:07:43 2026

+-----------------------------------------------------------------------------------------+

| NVIDIA-SMI 580.142 Driver Version: 580.142 CUDA Version: 13.1 |

+-----------------------------------------+------------------------+----------------------+

root@1158f7b98c55:/workspace# ninja --version

1.13.0.git.kitware.jobserver-pipe-1

root@1f79611b4722:/workspace# ffmpeg -version

built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)

configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/aarch64-linux-gnu --incdir=/usr/include/aarch64-linux-gnu --arch=arm64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-openal --enable-opencl --enable-opengl --disable-sndio --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-ladspa --enable-libbluray --enable-libjack --enable-libpulse --enable-librabbitmq --enable-librist --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libx264 --enable-libzmq --enable-libzvbi --enable-lv2 --enable-sdl2 --enable-libplacebo --enable-librav1e --enable-pocketsphinx --enable-librsvg --enable-libjxl --enable-shared

libavutil 58. 29.100 / 58. 29.100

libavcodec 60. 31.102 / 60. 31.102

libavformat 60. 16.100 / 60. 16.100

libavdevice 60. 3.100 / 60. 3.100

libavfilter 9. 12.100 / 9. 12.100

libswscale 7. 5.100 / 7. 5.100

libswresample 4. 12.100 / 4. 12.100

libpostproc 57. 3.100 / 57. 3.100

(.venv) root@7d3af3d35cd6:/workspace# pip list

Package Version

------------------------ ----------------

accelerate 1.13.0

audioread 3.1.0

certifi 2026.2.25

cffi 2.0.0

charset-normalizer 3.4.7

cuda-bindings 13.0.3

cuda-pathfinder 1.5.3

cuda-toolkit 13.0.3.0

decorator 5.2.1

dllist 2.0.0

einops 0.8.2

filelock 3.28.0

flash_attn 2.8.3

fsspec 2026.3.0

hf-xet 1.4.3

huggingface_hub 0.36.2

idna 3.11

Jinja2 3.1.6

joblib 1.5.3

lazy-loader 0.5

librosa 0.11.0

llvmlite 0.47.0

MarkupSafe 3.0.3

mpmath 1.3.0

msgpack 1.1.2

networkx 3.6.1

numba 0.65.0

numpy 2.4.4

nvidia-cublas 13.1.0.3

nvidia-cuda-cupti 13.0.85

nvidia-cuda-nvrtc 13.0.88

nvidia-cuda-runtime 13.0.96

nvidia-cuda-runtime-cu13 0.0.0a0

nvidia-cudnn-cu13 9.15.1.9

nvidia-cufft 12.0.0.61

nvidia-cufile 1.15.1.6

nvidia-curand 10.4.0.35

nvidia-cusolver 12.0.4.66

nvidia-cusparse 12.6.3.3

nvidia-cusparselt-cu13 0.8.0

nvidia-nccl-cu13 2.28.9

nvidia-nvjitlink 13.0.88

nvidia-nvshmem-cu13 3.4.5

nvidia-nvtx 13.0.85

packaging 26.1

pillow 12.2.0

pip 26.0.1

platformdirs 4.9.6

pooch 1.9.0

psutil 7.2.2

pycparser 3.0

PyYAML 6.0.3

regex 2026.4.4

requests 2.33.1

safetensors 0.7.0

scikit-learn 1.8.0

scipy 1.17.1

sentencepiece 0.2.1

setuptools 82.0.1

soundfile 0.13.1

soxr 1.0.0

sympy 1.14.0

tensorrt 10.14.1.48.post1

tensorrt_cu13 10.14.1.48.post1

tensorrt_cu13_bindings 10.14.1.48.post1

tensorrt_cu13_libs 10.14.1.48.post1

threadpoolctl 3.6.0

tokenizers 0.21.4

torch 2.10.0+cu130

torch_tensorrt 2.10.0+cu130

torchaudio 2.10.0+cu130

torchcodec 0.10.0+cu130

torchvision 0.25.0+cu130

tqdm 4.67.3

transformers 4.50.0

triton 3.6.0

typing_extensions 4.15.0

urllib3 2.6.3

wheel 0.46.3

(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'CUDA is available: {torch.cuda.is_available()}'); print(f'CUDA version: {torch.version.cuda}')"

PyTorch version: 2.10.0+cu130

CUDA is available: True

CUDA version: 13.0

(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torch; a=torch.randn(1, 3, 224, 224).cuda(); print('GPU ok')"

/workspace/.venv/lib/python3.12/site-packages/torch/cuda/__init__.py:435: UserWarning:

Found GPU0 NVIDIA GB10 which is of cuda capability 12.1.

Minimum and Maximum cuda capability supported by this version of PyTorch is

(8.0) - (12.0)

queued_call()

GPU ok

(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torchaudio; print(f'torchaudio version: {torchaudio.__version__}')"

torchaudio version: 2.10.0+cu130

(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torchcodec; print(f'torchcodec version: {torchcodec.__version__}')"

torchcodec version: 0.10.0+cu130

(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torch; import torchcodec; print('modules:', dir(torchcodec))"

modules: ['AudioSamples', 'Frame', 'FrameBatch', 'Path', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', '_core', '_frame', '_internally_replaced_utils', 'cmake_prefix_path', 'core_library_path', 'decoders', 'encoders', 'ffmpeg_major_version', 'samplers', 'transforms', 'version']

(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import flash_attn; print(f'Flash Attention version: {flash_attn.__version__}')"

Flash Attention version: 2.8.3

(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torchvision; print(f'torchvision version: {torchvision.__version__}')"

torchvision version: 0.25.0+cu130

(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import librosa; print('import librosa ok')"

import librosa ok

(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import transformers; print('Transformers OK')"

Transformers OK

=============================================

$ docker image rm gpt-sovits-cu128-fixed:latest

$ docker builder prune

$ docker build --progress=plain \

--no-cache \

-t gpt-sovits-cu128-fixed \

-f my_Dockerfile . 2>&1 | tee ../aaa.txt

$ docker run --rm -it --gpus all gpt-sovits-cu128-fixed /bin/bash

$ ffmpeg -i /home/spark/DiskD/audio_llm/audio_openai/audio_openai/audio_files/6551d5b0-e35f-43ee-b262-79c55ad548ea.webm \

-vn -acodec pcm_s16le -ar 44100 -ac 2 samples/hard_way.wav

$ docker compose -f my_docker-compose.yaml up -d

$ docker logs -f GPT-SoVITS-CU128

$ docker stop GPT-SoVITS-CU128

$ docker start GPT-SoVITS-CU128

$ docker restart GPT-SoVITS-CU128

$ docker logs -f GPT-SoVITS-CU128

$ curl -X POST http://localhost:9880/ \

-H "Content-Type: application/json" \

-d '{

"text": "你好，這是中英文語音 Zero-shot TTS 測試",

"text_language": "zh"

}' \

--output out.wav

$ curl -X POST http://localhost:9880/ \

-H "Content-Type: application/json" \

-d '{

"refer_wav_path": "samples/output.wav",

"prompt_text": "說書相生這種東西仍靠一張嘴，通過語言的結構把看官聽眾吸引到故事裡面，在演出的時候，要求你身上的每個動作都必須要有含義。",

"prompt_language": "zh",

"text": "你好，這是中英文語音 Zero-shot TTS 測試",

"text_language": "zh"

}' \

--output out.wav

$ docker exec -it GPT-SoVITS-CU128 bash

(base)# python webui.py

生活紀錄

網頁

2026年4月17日星期五

DGX Spark 上安裝 GPT-SoVITS

沒有留言:

張貼留言

網頁

2026年4月17日 星期五

DGX Spark 上安裝 GPT-SoVITS

沒有留言:

張貼留言

2026年4月17日星期五