Files
Trading_Studio/whisper_service.py
T

156 lines
4.4 KiB
Python

"""
Faster-Whisper CUDA 语音识别服务
封装本地 GPU 加速的音频转写逻辑,适配 RTX 3060 Ti 8GB 显存。
"""
from __future__ import annotations
import logging
import traceback
from typing import Optional, Tuple
from config import (
WHISPER_COMPUTE_TYPE,
WHISPER_DEVICE,
WHISPER_LANGUAGE,
WHISPER_MODEL_SIZE,
)
logger = logging.getLogger(__name__)
# 全局懒加载模型实例,避免 Gradio 重复初始化占用显存
_model = None
_model_error: Optional[str] = None
def _is_cuda_error(exc: BaseException) -> bool:
"""判断异常是否与 CUDA/GPU 相关。"""
msg = str(exc).lower()
cuda_keywords = (
"cuda",
"cudnn",
"cublas",
"gpu",
"out of memory",
"no kernel image",
"device-side assert",
)
return any(k in msg for k in cuda_keywords)
def get_whisper_model():
"""
获取或初始化 Faster-Whisper 模型。
强制 device=cuda, compute_type=float16。
"""
global _model, _model_error
if _model is not None:
return _model, None
if _model_error is not None:
return None, _model_error
try:
from faster_whisper import WhisperModel
logger.info(
"正在加载 Whisper 模型: size=%s, device=%s, compute_type=%s",
WHISPER_MODEL_SIZE,
WHISPER_DEVICE,
WHISPER_COMPUTE_TYPE,
)
_model = WhisperModel(
WHISPER_MODEL_SIZE,
device=WHISPER_DEVICE,
compute_type=WHISPER_COMPUTE_TYPE,
)
logger.info("Whisper 模型加载成功。")
return _model, None
except ImportError as exc:
_model_error = (
"未安装 faster-whisper,请执行: pip install faster-whisper\n"
f"原始错误: {exc}"
)
logger.exception("faster-whisper 导入失败")
return None, _model_error
except Exception as exc:
if _is_cuda_error(exc):
_model_error = (
"CUDA 初始化失败,请检查 NVIDIA 驱动、CUDA 运行时及 cuDNN 是否正确安装。\n"
f"错误详情: {exc}\n"
f"{traceback.format_exc()}"
)
else:
_model_error = f"Whisper 模型加载失败: {exc}\n{traceback.format_exc()}"
logger.exception("Whisper 模型加载异常")
return None, _model_error
def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
"""
将音频文件转写为中文文本。
Args:
audio_path: 本地音频文件绝对或相对路径
Returns:
(success, text_or_error_message)
"""
if not audio_path:
return False, "未提供音频文件路径。"
model, init_error = get_whisper_model()
if model is None:
return False, init_error or "Whisper 模型不可用。"
try:
segments, info = model.transcribe(
audio_path,
language=WHISPER_LANGUAGE,
beam_size=5,
vad_filter=True,
)
text_parts = []
for segment in segments:
text_parts.append(segment.text.strip())
result_text = "".join(text_parts).strip()
if not result_text:
return False, (
"识别结果为空,请检查音频是否有效、音量是否足够,"
f"或尝试更换格式。检测到语言: {getattr(info, 'language', 'unknown')}"
)
logger.info(
"转写完成: 语言=%s, 概率=%.2f, 字数=%d",
getattr(info, "language", "?"),
getattr(info, "language_probability", 0.0),
len(result_text),
)
return True, result_text
except Exception as exc:
if _is_cuda_error(exc):
err = (
"CUDA 推理异常:显存可能不足或 GPU 状态异常。"
"建议关闭其他占用显存的进程后重试。\n"
f"错误详情: {exc}"
)
else:
err = f"音频转写失败: {exc}\n{traceback.format_exc()}"
logger.exception("transcribe_audio 失败")
return False, err
def reset_whisper_model() -> None:
"""释放模型引用(用于调试或显存回收)。"""
global _model, _model_error
_model = None
_model_error = None