Fix CUDA OOM by mutually unloading Whisper and ChatTTS on 8GB GPU.

Release GPU memory before TTS/ASR switches, lower TTS token limits, and set PYTORCH_CUDA_ALLOC_CONF in PM2.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dekun
2026-06-12 17:03:37 +08:00
parent 82f99c0b89
commit 0cce6cda7c
7 changed files with 169 additions and 40 deletions
+35
View File
@@ -0,0 +1,35 @@
"""GPU 显存回收工具(3060 Ti 8GBWhisper 与 ChatTTS 不宜同时驻留)。"""
from __future__ import annotations
import gc
import logging
logger = logging.getLogger(__name__)
def release_cuda_cache() -> None:
"""触发 GC 并清空 PyTorch CUDA 缓存。"""
gc.collect()
try:
import torch
if torch.cuda.is_available():
torch.cuda.empty_cache()
if hasattr(torch.cuda, "ipc_collect"):
torch.cuda.ipc_collect()
except ImportError:
pass
def cuda_memory_summary() -> str:
"""返回简要显存占用(调试用)。"""
try:
import torch
if not torch.cuda.is_available():
return "CUDA 不可用"
free, total = torch.cuda.mem_get_info()
return f"GPU 显存: 已用 {(total - free) / 1024**3:.2f}GB / {total / 1024**3:.2f}GB"
except Exception as exc:
return f"显存查询失败: {exc}"