Fix CUDA OOM by mutually unloading Whisper and ChatTTS on 8GB GPU.

Release GPU memory before TTS/ASR switches, lower TTS token limits, and set PYTORCH_CUDA_ALLOC_CONF in PM2. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-12 17:03:37 +08:00
parent 82f99c0b89
commit 0cce6cda7c
7 changed files with 169 additions and 40 deletions
@@ -0,0 +1,35 @@
+"""GPU 显存回收工具（3060 Ti 8GB：Whisper 与 ChatTTS 不宜同时驻留）。"""
+
+from __future__ import annotations
+
+import gc
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def release_cuda_cache() -> None:
+    """触发 GC 并清空 PyTorch CUDA 缓存。"""
+    gc.collect()
+    try:
+        import torch
+
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            if hasattr(torch.cuda, "ipc_collect"):
+                torch.cuda.ipc_collect()
+    except ImportError:
+        pass
+
+
+def cuda_memory_summary() -> str:
+    """返回简要显存占用（调试用）。"""
+    try:
+        import torch
+
+        if not torch.cuda.is_available():
+            return "CUDA 不可用"
+        free, total = torch.cuda.mem_get_info()
+        return f"GPU 显存: 已用 {(total - free) / 1024**3:.2f}GB / {total / 1024**3:.2f}GB"
+    except Exception as exc:
+        return f"显存查询失败: {exc}"