# 原始的 GPT-SoVITS 在 spark 上無法使用 cuda
# 安裝 cpu 版本,以作為參考
$ docker compose -f my_docker-compose.yaml up -d
$ docker exec -it GPT-SoVITS-CU128 bash
(base) root@65f69181a6d7:/workspace/GPT-SoVITS# python --version
Python 3.12.12
(base) root@65f69181a6d7:/workspace/GPT-SoVITS# pip list
Package Version
------------------------ -----------
absl-py 2.4.0
accelerate 1.12.0
aiofiles 23.2.1
aiohappyeyeballs 2.6.1
aiohttp 3.13.3
aiosignal 1.4.0
aliyun-python-sdk-core 2.16.0
aliyun-python-sdk-kms 2.16.5
annotated-doc 0.0.4
annotated-types 0.7.0
antlr4-python3-runtime 4.9.3
anyio 4.12.1
archspec 0.2.5
attrs 25.4.0
audioread 3.1.0
av 16.1.0
backports.zstd 1.3.0
boltons 25.0.0
Brotli 1.2.0
budoux 0.7.0
certifi 2026.2.25
cffi 2.0.0
chardet 5.2.0
charset-normalizer 3.4.4
click 8.3.1
cn2an 0.5.23
colorlog 6.10.1
conda 26.1.0
conda-libmamba-solver 25.11.0
conda-package-handling 2.4.0
conda_package_streaming 0.12.0
contourpy 1.3.3
crcmod 1.7
cryptography 46.0.4
ctranslate2 4.7.1
cuda-bindings 12.9.4
cuda-pathfinder 1.2.2
cycler 0.12.1
decorator 5.2.1
Distance 0.1.3
distro 1.9.0
dnspython 2.8.0
editdistance 0.8.1
einops 0.8.2
einx 0.3.0
email-validator 2.3.0
fast-langdetect 1.0.0
fastapi 0.128.5
fastapi-cli 0.0.20
fastapi-cloud-cli 0.11.0
fastar 0.8.0
faster-whisper 1.2.1
fasttext-predict 0.9.2.4
ffmpeg-python 0.2.0
ffmpy 1.0.0
filelock 3.20.0
flash_attn 2.8.3
flatbuffers 25.12.19
fonttools 4.61.1
frozendict 2.4.7
frozenlist 1.8.0
fsspec 2025.12.0
funasr 1.0.27
future 1.0.0
g2p-en 2.1.0
g2pk2 0.0.3
gradio 4.44.1
gradio_client 1.3.0
grpcio 1.78.0
h11 0.16.0
h2 4.3.0
hf-xet 1.2.0
hpack 4.1.0
httpcore 1.0.9
httptools 0.7.1
httpx 0.28.1
huggingface_hub 0.36.2
hydra-core 1.3.2
hyperframe 6.1.0
idna 3.11
importlib_resources 6.5.2
inflect 7.5.0
jaconv 0.5.0
jamo 0.4.1
jieba 0.42.1
jieba_fast 0.53
Jinja2 3.1.6
jmespath 0.10.0
joblib 1.5.3
jsonpatch 1.33
jsonpointer 3.0.0
kaldiio 2.18.1
kiwisolver 1.4.9
ko-pron 1.3
lazy_loader 0.4
libmambapy 2.5.0
librosa 0.10.2
lightning-utilities 0.15.2
llvmlite 0.46.0
loguru 0.7.3
Markdown 3.10.1
markdown-it-py 4.0.0
MarkupSafe 2.1.5
matplotlib 3.10.8
mdurl 0.1.2
menuinst 2.4.2
modelscope 1.34.0
more-itertools 10.8.0
mpmath 1.3.0
msgpack 1.1.2
multidict 6.7.1
networkx 3.6.1
ninja 1.13.0
nltk 3.9.2
numba 0.63.1
numpy 1.26.4
nvidia-cublas-cu12 12.8.4.1
nvidia-cuda-cupti-cu12 12.8.90
nvidia-cuda-nvrtc-cu12 12.8.93
nvidia-cuda-runtime-cu12 12.8.90
nvidia-cudnn-cu12 9.10.2.21
nvidia-cufft-cu12 11.3.3.83
nvidia-cufile-cu12 1.13.1.3
nvidia-curand-cu12 10.3.9.90
nvidia-cusolver-cu12 11.7.3.90
nvidia-cusparse-cu12 12.5.8.93
nvidia-cusparselt-cu12 0.7.1
nvidia-nccl-cu12 2.27.5
nvidia-nvjitlink-cu12 12.8.93
nvidia-nvshmem-cu12 3.4.5
nvidia-nvtx-cu12 12.8.90
omegaconf 2.3.0
onnxruntime 1.24.1
openai-whisper 20250625
OpenCC 1.2.0
orjson 3.11.7
oss2 2.19.1
packaging 26.0
pandas 2.3.3
peft 0.17.1
pillow 10.4.0
pip 26.0.1
platformdirs 4.5.1
pluggy 1.6.0
pooch 1.9.0
proces 0.1.7
propcache 0.4.1
protobuf 6.33.5
psutil 7.2.2
pycosat 0.6.6
pycparser 2.22
pycryptodome 3.23.0
pydantic 2.10.6
pydantic_core 2.27.2
pydantic-extra-types 2.11.0
pydantic-settings 2.12.0
pydub 0.25.1
Pygments 2.19.2
pynndescent 0.6.0
pyopenjtalk 0.4.1
pyparsing 3.3.2
pypinyin 0.55.0
PySocks 1.7.1
python-dateutil 2.9.0.post0
python-dotenv 1.2.1
python-mecab-ko 1.3.7
python-mecab-ko-dic 2.1.1.post2
python-multipart 0.0.22
pytorch-lightning 2.6.1
pytorch-wpe 0.0.1
pytz 2025.2
PyYAML 6.0.3
regex 2026.1.15
requests 2.32.5
rich 14.3.2
rich-toolkit 0.18.1
rignore 0.7.6
robust-downloader 0.0.2
rotary-embedding-torch 0.8.9
ruamel.yaml 0.18.17
ruamel.yaml.clib 0.2.15
ruff 0.15.0
safetensors 0.7.0
scikit-learn 1.8.0
scipy 1.17.0
semantic-version 2.10.0
sentencepiece 0.2.1
sentry-sdk 2.52.0
setuptools 81.0.0
shellingham 1.5.4
six 1.17.0
soundfile 0.13.1
soxr 1.0.0
split-lang 2.1.1
starlette 0.52.1
sympy 1.14.0
tensorboard 2.20.0
tensorboard-data-server 0.7.2
tensorboardX 2.6.4
threadpoolctl 3.6.0
tiktoken 0.12.0
ToJyutping 3.2.0
tokenizers 0.21.4
tomlkit 0.12.0
torch 2.7.0+cpu
torch-complex 0.4.4
torchaudio 2.7.0
torchmetrics 1.5.0
tqdm 4.67.3
transformers 4.50.0
triton 3.6.0
truststore 0.10.4
typeguard 4.4.4
typer 0.21.1
typing_extensions 4.15.0
typing-inspection 0.4.2
tzdata 2025.3
umap-learn 0.5.11
urllib3 2.6.3
uvicorn 0.40.0
uvloop 0.22.1
watchfiles 1.1.1
websockets 12.0
Werkzeug 3.1.5
wheel 0.46.3
wordsegment 1.3.1
x-transformers 2.16.0
yarl 1.22.0
zstandard 0.25.0
(base) root@65f69181a6d7:/workspace/GPT-SoVITS# python3 -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'CUDA is available: {torch.cuda.is_available()}'); print(f'CUDA version: {torch.version.cuda}')"
PyTorch version: 2.7.0+cpu
CUDA is available: False
CUDA version: None
(base) root@65f69181a6d7:/workspace/GPT-SoVITS# python3 -c "import flash_attn; print(f'Flash Attention version: {flash_attn.__version__}')"
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/root/conda/lib/python3.12/site-packages/flash_attn/__init__.py", line 3, in <module>
from flash_attn.flash_attn_interface import (
File "/root/conda/lib/python3.12/site-packages/flash_attn/flash_attn_interface.py", line 15, in <module>
import flash_attn_2_cuda as flash_attn_gpu
ImportError: libcudart.so.12: cannot open shared object file: No such file or directory
(base) root@65f69181a6d7:/workspace/GPT-SoVITS# python3 -c "import librosa; print('import librosa ok')"
import librosa ok
(base) root@65f69181a6d7:/workspace/GPT-SoVITS# python3 -c "import transformers; print('Transformers OK')"
Transformers OK
=============================================
# 參考 https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html
# docker image 不要使用 nvcr.io/nvidia/pytorch, 因為沒有 torchaudio
# 使用 nvidia/cuda:13.1.1-devel-ubuntu24.04
$ git clone https://github.com/XXXXRT666/Docker-Base.git
$ cd Docker-Base
$ vi my_Dockerfile
$ docker image rm gpt-sovits-spark:v2p-0217
$ docker builder prune
$ docker build --progress=plain \
--no-cache \
--build-arg CUDA_VERSION=13.1 \
-t gpt-sovits-spark:v2p-0217 \
-f my_Dockerfile . 2>&1 | tee ../aaa.txt
$ docker run --rm -it --gpus all gpt-sovits-spark:v2p-0217 /bin/bash
root@1f79611b4722:/workspace# python3 --version
Python 3.12.3
root@1f79611b4722:/workspace# nvidia-smi
Tue Apr 14 07:07:43 2026
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.142 Driver Version: 580.142 CUDA Version: 13.1 |
+-----------------------------------------+------------------------+----------------------+
root@1158f7b98c55:/workspace# ninja --version
1.13.0.git.kitware.jobserver-pipe-1
root@1f79611b4722:/workspace# ffmpeg -version
ffmpeg version 6.1.1-3ubuntu5 Copyright (c) 2000-2023 the FFmpeg developers
built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)
configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/aarch64-linux-gnu --incdir=/usr/include/aarch64-linux-gnu --arch=arm64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-openal --enable-opencl --enable-opengl --disable-sndio --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-ladspa --enable-libbluray --enable-libjack --enable-libpulse --enable-librabbitmq --enable-librist --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libx264 --enable-libzmq --enable-libzvbi --enable-lv2 --enable-sdl2 --enable-libplacebo --enable-librav1e --enable-pocketsphinx --enable-librsvg --enable-libjxl --enable-shared
libavutil 58. 29.100 / 58. 29.100
libavcodec 60. 31.102 / 60. 31.102
libavformat 60. 16.100 / 60. 16.100
libavdevice 60. 3.100 / 60. 3.100
libavfilter 9. 12.100 / 9. 12.100
libswscale 7. 5.100 / 7. 5.100
libswresample 4. 12.100 / 4. 12.100
libpostproc 57. 3.100 / 57. 3.100
(.venv) root@7d3af3d35cd6:/workspace# pip list
Package Version
------------------------ ----------------
accelerate 1.13.0
audioread 3.1.0
certifi 2026.2.25
cffi 2.0.0
charset-normalizer 3.4.7
cuda-bindings 13.0.3
cuda-pathfinder 1.5.3
cuda-toolkit 13.0.3.0
decorator 5.2.1
dllist 2.0.0
einops 0.8.2
filelock 3.28.0
flash_attn 2.8.3
fsspec 2026.3.0
hf-xet 1.4.3
huggingface_hub 0.36.2
idna 3.11
Jinja2 3.1.6
joblib 1.5.3
lazy-loader 0.5
librosa 0.11.0
llvmlite 0.47.0
MarkupSafe 3.0.3
mpmath 1.3.0
msgpack 1.1.2
networkx 3.6.1
numba 0.65.0
numpy 2.4.4
nvidia-cublas 13.1.0.3
nvidia-cuda-cupti 13.0.85
nvidia-cuda-nvrtc 13.0.88
nvidia-cuda-runtime 13.0.96
nvidia-cuda-runtime-cu13 0.0.0a0
nvidia-cudnn-cu13 9.15.1.9
nvidia-cufft 12.0.0.61
nvidia-cufile 1.15.1.6
nvidia-curand 10.4.0.35
nvidia-cusolver 12.0.4.66
nvidia-cusparse 12.6.3.3
nvidia-cusparselt-cu13 0.8.0
nvidia-nccl-cu13 2.28.9
nvidia-nvjitlink 13.0.88
nvidia-nvshmem-cu13 3.4.5
nvidia-nvtx 13.0.85
packaging 26.1
pillow 12.2.0
pip 26.0.1
platformdirs 4.9.6
pooch 1.9.0
psutil 7.2.2
pycparser 3.0
PyYAML 6.0.3
regex 2026.4.4
requests 2.33.1
safetensors 0.7.0
scikit-learn 1.8.0
scipy 1.17.1
sentencepiece 0.2.1
setuptools 82.0.1
soundfile 0.13.1
soxr 1.0.0
sympy 1.14.0
tensorrt 10.14.1.48.post1
tensorrt_cu13 10.14.1.48.post1
tensorrt_cu13_bindings 10.14.1.48.post1
tensorrt_cu13_libs 10.14.1.48.post1
threadpoolctl 3.6.0
tokenizers 0.21.4
torch 2.10.0+cu130
torch_tensorrt 2.10.0+cu130
torchaudio 2.10.0+cu130
torchcodec 0.10.0+cu130
torchvision 0.25.0+cu130
tqdm 4.67.3
transformers 4.50.0
triton 3.6.0
typing_extensions 4.15.0
urllib3 2.6.3
wheel 0.46.3
(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'CUDA is available: {torch.cuda.is_available()}'); print(f'CUDA version: {torch.version.cuda}')"
PyTorch version: 2.10.0+cu130
CUDA is available: True
CUDA version: 13.0
(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torch; a=torch.randn(1, 3, 224, 224).cuda(); print('GPU ok')"
/workspace/.venv/lib/python3.12/site-packages/torch/cuda/__init__.py:435: UserWarning:
Found GPU0 NVIDIA GB10 which is of cuda capability 12.1.
Minimum and Maximum cuda capability supported by this version of PyTorch is
(8.0) - (12.0)
queued_call()
GPU ok
(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torchaudio; print(f'torchaudio version: {torchaudio.__version__}')"
torchaudio version: 2.10.0+cu130
(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torchcodec; print(f'torchcodec version: {torchcodec.__version__}')"
torchcodec version: 0.10.0+cu130
(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torch; import torchcodec; print('modules:', dir(torchcodec))"
modules: ['AudioSamples', 'Frame', 'FrameBatch', 'Path', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', '_core', '_frame', '_internally_replaced_utils', 'cmake_prefix_path', 'core_library_path', 'decoders', 'encoders', 'ffmpeg_major_version', 'samplers', 'transforms', 'version']
(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import flash_attn; print(f'Flash Attention version: {flash_attn.__version__}')"
Flash Attention version: 2.8.3
(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import torchvision; print(f'torchvision version: {torchvision.__version__}')"
torchvision version: 0.25.0+cu130
(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import librosa; print('import librosa ok')"
import librosa ok
(.venv) root@7d3af3d35cd6:/workspace# python3 -c "import transformers; print('Transformers OK')"
Transformers OK
=============================================
$ docker image rm gpt-sovits-cu128-fixed:latest
$ docker builder prune
$ docker build --progress=plain \
--no-cache \
-t gpt-sovits-cu128-fixed \
-f my_Dockerfile . 2>&1 | tee ../aaa.txt
$ docker run --rm -it --gpus all gpt-sovits-cu128-fixed /bin/bash
$ ffmpeg -i /home/spark/DiskD/audio_llm/audio_openai/audio_openai/audio_files/6551d5b0-e35f-43ee-b262-79c55ad548ea.webm \
-vn -acodec pcm_s16le -ar 44100 -ac 2 samples/hard_way.wav
$ docker compose -f my_docker-compose.yaml up -d
$ docker logs -f GPT-SoVITS-CU128
$ docker stop GPT-SoVITS-CU128
$ docker start GPT-SoVITS-CU128
$ docker restart GPT-SoVITS-CU128
$ docker logs -f GPT-SoVITS-CU128
$ curl -X POST http://localhost:9880/ \
-H "Content-Type: application/json" \
-d '{
"text": "你好,這是中英文語音 Zero-shot TTS 測試",
"text_language": "zh"
}' \
--output out.wav
$ curl -X POST http://localhost:9880/ \
-H "Content-Type: application/json" \
-d '{
"refer_wav_path": "samples/output.wav",
"prompt_text": "說書相生這種東西仍靠一張嘴,通過語言的結構把看官聽眾吸引到故事裡面,在演出的時候,要求你身上的每個動作都必須要有含義。",
"prompt_language": "zh",
"text": "你好,這是中英文語音 Zero-shot TTS 測試",
"text_language": "zh"
}' \
--output out.wav
$ docker exec -it GPT-SoVITS-CU128 bash
(base)# python webui.py
沒有留言:
張貼留言