Fix CUDA OOM by mutually unloading Whisper and ChatTTS on 8GB GPU.
Release GPU memory before TTS/ASR switches, lower TTS token limits, and set PYTORCH_CUDA_ALLOC_CONF in PM2. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -156,6 +156,14 @@ def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
|
||||
if not audio_path:
|
||||
return False, "未提供音频文件路径。"
|
||||
|
||||
# 识别前释放 ChatTTS,避免与 Whisper 同占 8GB 显存
|
||||
try:
|
||||
from tts_service import reset_chattts_instance
|
||||
|
||||
reset_chattts_instance()
|
||||
except Exception:
|
||||
logger.debug("释放 ChatTTS 显存时跳过", exc_info=True)
|
||||
|
||||
model, init_error = get_whisper_model()
|
||||
if model is None:
|
||||
return False, init_error or "Whisper 模型不可用。"
|
||||
@@ -199,6 +207,17 @@ def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
|
||||
|
||||
|
||||
def reset_whisper_model() -> None:
|
||||
"""卸载 Whisper 模型并回收 GPU 显存。"""
|
||||
global _model, _model_error
|
||||
if _model is not None:
|
||||
try:
|
||||
del _model
|
||||
except Exception:
|
||||
pass
|
||||
_model = None
|
||||
_model_error = None
|
||||
|
||||
from gpu_utils import release_cuda_cache
|
||||
|
||||
release_cuda_cache()
|
||||
logger.info("Whisper 模型已卸载,显存已尝试回收。")
|
||||
|
||||
Reference in New Issue
Block a user