Fix CUDA OOM by mutually unloading Whisper and ChatTTS on 8GB GPU.
Release GPU memory before TTS/ASR switches, lower TTS token limits, and set PYTORCH_CUDA_ALLOC_CONF in PM2. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -136,8 +136,11 @@ TTS_TOP_P = 0.7
|
||||
TTS_TOP_K = 20
|
||||
TTS_SPEED_PROMPT = "[speed_5]"
|
||||
|
||||
# 单段 TTS 最大字数(超长稿按句切分后逐段合成再拼接)
|
||||
TTS_MAX_CHARS_PER_CHUNK = _env_int("TTS_MAX_CHARS_PER_CHUNK", 280)
|
||||
# 单段 TTS 最大字数(超长稿按句切分后逐段合成再拼接;8GB 显存建议 ≤200)
|
||||
TTS_MAX_CHARS_PER_CHUNK = _env_int("TTS_MAX_CHARS_PER_CHUNK", 200)
|
||||
|
||||
# ChatTTS 单段最大生成 token(越小越省显存,长句会自动切多段)
|
||||
TTS_MAX_NEW_TOKEN = _env_int("TTS_MAX_NEW_TOKEN", 1024)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 上传临时文件目录
|
||||
|
||||
Reference in New Issue
Block a user