Fix CUDA OOM by mutually unloading Whisper and ChatTTS on 8GB GPU.

Release GPU memory before TTS/ASR switches, lower TTS token limits, and set PYTORCH_CUDA_ALLOC_CONF in PM2.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dekun
2026-06-12 17:03:37 +08:00
parent 82f99c0b89
commit 0cce6cda7c
7 changed files with 169 additions and 40 deletions
+19
View File
@@ -156,6 +156,14 @@ def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
if not audio_path:
return False, "未提供音频文件路径。"
# 识别前释放 ChatTTS,避免与 Whisper 同占 8GB 显存
try:
from tts_service import reset_chattts_instance
reset_chattts_instance()
except Exception:
logger.debug("释放 ChatTTS 显存时跳过", exc_info=True)
model, init_error = get_whisper_model()
if model is None:
return False, init_error or "Whisper 模型不可用。"
@@ -199,6 +207,17 @@ def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
def reset_whisper_model() -> None:
"""卸载 Whisper 模型并回收 GPU 显存。"""
global _model, _model_error
if _model is not None:
try:
del _model
except Exception:
pass
_model = None
_model_error = None
from gpu_utils import release_cuda_cache
release_cuda_cache()
logger.info("Whisper 模型已卸载,显存已尝试回收。")