Fix CUDA OOM by mutually unloading Whisper and ChatTTS on 8GB GPU.

Release GPU memory before TTS/ASR switches, lower TTS token limits, and set PYTORCH_CUDA_ALLOC_CONF in PM2. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-12 17:03:37 +08:00
parent 82f99c0b89
commit 0cce6cda7c
7 changed files with 169 additions and 40 deletions
@@ -156,6 +156,14 @@ def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
    if not audio_path:
        return False, "未提供音频文件路径。"

+    # 识别前释放 ChatTTS，避免与 Whisper 同占 8GB 显存
+    try:
+        from tts_service import reset_chattts_instance
+
+        reset_chattts_instance()
+    except Exception:
+        logger.debug("释放 ChatTTS 显存时跳过", exc_info=True)
+
    model, init_error = get_whisper_model()
    if model is None:
        return False, init_error or "Whisper 模型不可用。"
@@ -199,6 +207,17 @@ def transcribe_audio(audio_path: str) -> Tuple[bool, str]:


 def reset_whisper_model() -> None:
+    """卸载 Whisper 模型并回收 GPU 显存。"""
    global _model, _model_error
+    if _model is not None:
+        try:
+            del _model
+        except Exception:
+            pass
    _model = None
    _model_error = None
+
+    from gpu_utils import release_cuda_cache
+
+    release_cuda_cache()
+    logger.info("Whisper 模型已卸载，显存已尝试回收。")