Fix CUDA OOM by mutually unloading Whisper and ChatTTS on 8GB GPU.

Release GPU memory before TTS/ASR switches, lower TTS token limits, and set PYTORCH_CUDA_ALLOC_CONF in PM2.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dekun
2026-06-12 17:03:37 +08:00
parent 82f99c0b89
commit 0cce6cda7c
7 changed files with 169 additions and 40 deletions
+5 -2
View File
@@ -136,8 +136,11 @@ TTS_TOP_P = 0.7
TTS_TOP_K = 20
TTS_SPEED_PROMPT = "[speed_5]"
# 单段 TTS 最大字数(超长稿按句切分后逐段合成再拼接)
TTS_MAX_CHARS_PER_CHUNK = _env_int("TTS_MAX_CHARS_PER_CHUNK", 280)
# 单段 TTS 最大字数(超长稿按句切分后逐段合成再拼接8GB 显存建议 ≤200
TTS_MAX_CHARS_PER_CHUNK = _env_int("TTS_MAX_CHARS_PER_CHUNK", 200)
# ChatTTS 单段最大生成 token(越小越省显存,长句会自动切多段)
TTS_MAX_NEW_TOKEN = _env_int("TTS_MAX_NEW_TOKEN", 1024)
# ---------------------------------------------------------------------------
# 上传临时文件目录