網頁

2026年4月27日 星期一

DGX Spark 安裝 faster-qwen3-tts

$ git clone https://github.com/andimarafioti/faster-qwen3-tts.git
$ cd faster-qwen3-tts/
$ uv venv
$ source .venv/bin/activate
$ python -c "import torch; assert torch.cuda.is_available(), 'CUDA not available'; print(f'PyTorch {torch.__version__}, CUDA {torch.version.cuda}, GPU: {torch.cuda.get_device_name(0)}')" || {
    echo ""
    echo "WARNING: CUDA not available. You may need to install a CUDA-enabled PyTorch wheel."
    echo "  uv pip install torch --index-url https://download.pytorch.org/whl/cu128 --python .venv/bin/python"
    echo "  Note: RTX 50xx / Blackwell GPUs need CUDA 12.8 wheels (PyTorch 2.7+)."
}
$ MAX_JOBS=6 uv pip install flash-attn --no-build-isolation
$ python3 -c "import flash_attn; print(f'Flash Attention version: {flash_attn.__version__}')"

$ uv pip install sounddevice
$ faster-qwen3-tts clone \
  --model /mnt/480SSD/models/Qwen3-TTS-12Hz-1.7B-Base \
  --text "你好,這是中英文語音 Zero-shot TTS 測試" \
  --language English \
  --ref-audio ref_audio.wav \
  --ref-text "I'm confused why some people have super short timelines, yet at the same time are bullish on scaling up reinforcement learning atop LLMs. If we're actually close to a human-like learner, then this whole approach of training on verifiable outcomes is doomed." \
  --output out.wav
$ faster-qwen3-tts custom --model /mnt/480SSD/models/Qwen3-TTS-12Hz-1.7B-CustomVoice --list-speakers --text aaa --output out.wav
$ faster-qwen3-tts custom \
  --model /mnt/480SSD/models/Qwen3-TTS-12Hz-1.7B-CustomVoice \
  --speaker aiden \
  --text "你好,這是中英文語音 Zero-shot TTS 測試" \
  --language English \
  --output out.wav
$ uv pip install -e ".[demo]"
$ uv pip install nano_parakeet
$ python demo/server.py --model /mnt/480SSD/models/Qwen3-TTS-12Hz-1.7B-Base
# open http://localhost:7860
$ uv pip install "faster-qwen3-tts[demo]"
$ ffmpeg -i /home/spark/DiskD/audio_llm/audio_openai/audio_openai/audio_files/6551d5b0-e35f-43ee-b262-79c55ad548ea.webm \
  -vn -acodec pcm_s16le -ar 44100 -ac 2 examples/hard_way.wav
$ python examples/openai_server.py \
    --model /mnt/480SSD/models/Qwen3-TTS-12Hz-1.7B-Base \
    --ref-audio examples/hard_way.wav \
    --ref-text "行路難,行路難,多歧路,今安在?長風破浪會有時,直掛雲帆濟滄海。" \
    --language English --port 8102
$ curl http://localhost:8102/v1/audio/speech \
    -H "Content-Type: application/json" \
    -d '{"model": "tts-1", "input": "你好,這是中英文語音 Zero-shot TTS 測試", "voice": "alloy", "response_format": "wav"}' \
    --output speech.wav

沒有留言:

張貼留言