From 0f5277c22e3dbbdf1a09f699788133b29cc8779d Mon Sep 17 00:00:00 2001 From: dekun Date: Fri, 12 Jun 2026 16:11:57 +0800 Subject: [PATCH] Add Whisper offline loading for air-gapped servers. Pre-download via HF mirror scripts so inner-network deploys avoid Hub Network is unreachable errors. Co-authored-by: Cursor --- .env.example | 2 + DEPLOY.md | 15 +- config.py | 24 +- scripts/download_all_models.sh | 15 ++ scripts/download_whisper_models.sh | 59 +++++ whisper_service.py | 359 ++++++++++++++++------------- 6 files changed, 310 insertions(+), 164 deletions(-) create mode 100644 scripts/download_all_models.sh create mode 100644 scripts/download_whisper_models.sh diff --git a/.env.example b/.env.example index 3d93425..4ad7ce7 100644 --- a/.env.example +++ b/.env.example @@ -10,4 +10,6 @@ OLLAMA_PORT=11434 # ChatTTS 模型目录(预下载脚本写入) # CHATTTS_MODEL_DIR=/opt/Trading_Studio/models/ChatTTS +# WHISPER_MODEL_DIR=/opt/Trading_Studio/models/whisper +# WHISPER_MODEL_SIZE=small # HF_ENDPOINT=https://hf-mirror.com diff --git a/DEPLOY.md b/DEPLOY.md index 658c13c..7536e72 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -361,9 +361,20 @@ cd /opt/Trading_Studio pip install -r requirements.txt ``` -### 6.1 Faster-Whisper +### 6.1 Faster-Whisper(必须预下载) -随 `requirements.txt` 安装。首次运行会自动下载 `small` 模型(约 500MB)至 HuggingFace 缓存。 +随 `requirements.txt` 安装。**内网服务器无法访问 HuggingFace 时会报 `Network is unreachable`。** + +部署后执行(推荐与 ChatTTS 一起): + +```bash +cd /opt/Trading_Studio +bash scripts/download_all_models.sh +# 或仅 Whisper: bash scripts/download_whisper_models.sh small +pm2 restart trading_studio +``` + +模型目录:`/opt/Trading_Studio/models/whisper/small/`(含 `model.bin`)。 ### 6.2 ChatTTS(必须预下载,勿依赖 GitHub) diff --git a/config.py b/config.py index c0bc924..c053539 100644 --- a/config.py +++ b/config.py @@ -76,23 +76,33 @@ SYSTEM_PROMPT = ( "去掉所有无意义的口头禅,字数不做删减。" ) +# --------------------------------------------------------------------------- +# 路径 +# --------------------------------------------------------------------------- +BASE_DIR = Path(__file__).resolve().parent +INSTALL_DIR = Path("/opt/Trading_Studio") + # --------------------------------------------------------------------------- # Faster-Whisper 配置 # --------------------------------------------------------------------------- -WHISPER_MODEL_SIZE = "small" +WHISPER_MODEL_SIZE = _env_str("WHISPER_MODEL_SIZE", "small") WHISPER_DEVICE = "cuda" WHISPER_COMPUTE_TYPE = "float16" WHISPER_LANGUAGE = "zh" +WHISPER_MODEL_DIR = Path(_env_str("WHISPER_MODEL_DIR", str(BASE_DIR / "models" / "whisper"))) + +WHISPER_HF_REPO = { + "tiny": "Systran/faster-whisper-tiny", + "base": "Systran/faster-whisper-base", + "small": "Systran/faster-whisper-small", + "medium": "Systran/faster-whisper-medium", + "large-v2": "Systran/faster-whisper-large-v2", + "large-v3": "Systran/faster-whisper-large-v3", +} # --------------------------------------------------------------------------- # ChatTTS 配置 # --------------------------------------------------------------------------- -# 标准生产安装路径(/opt,root 部署) -INSTALL_DIR = Path("/opt/Trading_Studio") - -# 项目根目录(开发/生产均自适应,以实际 app.py 所在目录为准) -BASE_DIR = Path(__file__).resolve().parent - # 固定音色 Embedding 存储路径 SPEAKER_EMB_PATH = BASE_DIR / "speaker_emb.pt" diff --git a/scripts/download_all_models.sh b/scripts/download_all_models.sh new file mode 100644 index 0000000..8b10737 --- /dev/null +++ b/scripts/download_all_models.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# 一次性下载 Whisper + ChatTTS 全部模型(内网服务器部署必跑) +set -euo pipefail +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "${ROOT}" + +echo "========== 下载 Whisper (small) ==========" +bash scripts/download_whisper_models.sh small + +echo "" +echo "========== 下载 ChatTTS ==========" +bash scripts/download_chattts_models.sh + +echo "" +echo "[OK] 全部模型下载完成,请: pm2 restart trading_studio" diff --git a/scripts/download_whisper_models.sh b/scripts/download_whisper_models.sh new file mode 100644 index 0000000..1c8fe4c --- /dev/null +++ b/scripts/download_whisper_models.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# 预下载 Faster-Whisper 模型(HF 镜像,内网服务器离线可用) +# 用法: bash scripts/download_whisper_models.sh [tiny|base|small|medium|large-v3] +set -euo pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +SIZE="${1:-small}" +VENV_PY="${ROOT}/venv/bin/python" +MODEL_DIR="${WHISPER_MODEL_DIR:-${ROOT}/models/whisper}/${SIZE}" + +export HF_ENDPOINT="${HF_ENDPOINT:-https://hf-mirror.com}" +export HF_HUB_DOWNLOAD_TIMEOUT="${HF_HUB_DOWNLOAD_TIMEOUT:-600}" +export HF_HOME="${HF_HOME:-${ROOT}/models/hf_cache}" +export MODEL_DIR +export WHISPER_SIZE="${SIZE}" + +echo "[INFO] Whisper 模型: ${SIZE}" +echo "[INFO] 保存目录: ${MODEL_DIR}" +echo "[INFO] HF 镜像: ${HF_ENDPOINT}" + +if [[ ! -x "${VENV_PY}" ]]; then + echo "[ERROR] 未找到 venv,请先 bash deploy.sh deps" + exit 1 +fi + +"${VENV_PY}" -m pip install -q huggingface_hub + +"${VENV_PY}" << 'PY' +import os +from pathlib import Path +from huggingface_hub import snapshot_download + +size = os.environ["WHISPER_SIZE"] +repos = { + "tiny": "Systran/faster-whisper-tiny", + "base": "Systran/faster-whisper-base", + "small": "Systran/faster-whisper-small", + "medium": "Systran/faster-whisper-medium", + "large-v2": "Systran/faster-whisper-large-v2", + "large-v3": "Systran/faster-whisper-large-v3", +} +repo = repos.get(size) +if not repo: + raise SystemExit(f"未知模型尺寸: {size}, 可选: {list(repos)}") + +target = Path(os.environ["MODEL_DIR"]) +target.mkdir(parents=True, exist_ok=True) + +print(f"[INFO] 正在下载 {repo} ...") +snapshot_download(repo_id=repo, local_dir=str(target), local_dir_use_symlinks=False) + +if not (target / "model.bin").is_file(): + raise SystemExit(f"[ERROR] 下载不完整,未找到 model.bin: {target}") + +print(f"[OK] Whisper 模型就绪: {target}") +PY + +echo "" +echo "[OK] 请执行: pm2 restart trading_studio" diff --git a/whisper_service.py b/whisper_service.py index 2853fc5..9c4aa7e 100644 --- a/whisper_service.py +++ b/whisper_service.py @@ -1,155 +1,204 @@ -""" -Faster-Whisper CUDA 语音识别服务 -封装本地 GPU 加速的音频转写逻辑,适配 RTX 3060 Ti 8GB 显存。 -""" - -from __future__ import annotations - -import logging -import traceback -from typing import Optional, Tuple - -from config import ( - WHISPER_COMPUTE_TYPE, - WHISPER_DEVICE, - WHISPER_LANGUAGE, - WHISPER_MODEL_SIZE, -) - -logger = logging.getLogger(__name__) - -# 全局懒加载模型实例,避免 Gradio 重复初始化占用显存 -_model = None -_model_error: Optional[str] = None - - -def _is_cuda_error(exc: BaseException) -> bool: - """判断异常是否与 CUDA/GPU 相关。""" - msg = str(exc).lower() - cuda_keywords = ( - "cuda", - "cudnn", - "cublas", - "gpu", - "out of memory", - "no kernel image", - "device-side assert", - ) - return any(k in msg for k in cuda_keywords) - - -def get_whisper_model(): - """ - 获取或初始化 Faster-Whisper 模型。 - 强制 device=cuda, compute_type=float16。 - """ - global _model, _model_error - - if _model is not None: - return _model, None - - if _model_error is not None: - return None, _model_error - - try: - from faster_whisper import WhisperModel - - logger.info( - "正在加载 Whisper 模型: size=%s, device=%s, compute_type=%s", - WHISPER_MODEL_SIZE, - WHISPER_DEVICE, - WHISPER_COMPUTE_TYPE, - ) - _model = WhisperModel( - WHISPER_MODEL_SIZE, - device=WHISPER_DEVICE, - compute_type=WHISPER_COMPUTE_TYPE, - ) - logger.info("Whisper 模型加载成功。") - return _model, None - - except ImportError as exc: - _model_error = ( - "未安装 faster-whisper,请执行: pip install faster-whisper\n" - f"原始错误: {exc}" - ) - logger.exception("faster-whisper 导入失败") - return None, _model_error - - except Exception as exc: - if _is_cuda_error(exc): - _model_error = ( - "CUDA 初始化失败,请检查 NVIDIA 驱动、CUDA 运行时及 cuDNN 是否正确安装。\n" - f"错误详情: {exc}\n" - f"{traceback.format_exc()}" - ) - else: - _model_error = f"Whisper 模型加载失败: {exc}\n{traceback.format_exc()}" - logger.exception("Whisper 模型加载异常") - return None, _model_error - - -def transcribe_audio(audio_path: str) -> Tuple[bool, str]: - """ - 将音频文件转写为中文文本。 - - Args: - audio_path: 本地音频文件绝对或相对路径 - - Returns: - (success, text_or_error_message) - """ - if not audio_path: - return False, "未提供音频文件路径。" - - model, init_error = get_whisper_model() - if model is None: - return False, init_error or "Whisper 模型不可用。" - - try: - segments, info = model.transcribe( - audio_path, - language=WHISPER_LANGUAGE, - beam_size=5, - vad_filter=True, - ) - - text_parts = [] - for segment in segments: - text_parts.append(segment.text.strip()) - - result_text = "".join(text_parts).strip() - - if not result_text: - return False, ( - "识别结果为空,请检查音频是否有效、音量是否足够," - f"或尝试更换格式。检测到语言: {getattr(info, 'language', 'unknown')}" - ) - - logger.info( - "转写完成: 语言=%s, 概率=%.2f, 字数=%d", - getattr(info, "language", "?"), - getattr(info, "language_probability", 0.0), - len(result_text), - ) - return True, result_text - - except Exception as exc: - if _is_cuda_error(exc): - err = ( - "CUDA 推理异常:显存可能不足或 GPU 状态异常。" - "建议关闭其他占用显存的进程后重试。\n" - f"错误详情: {exc}" - ) - else: - err = f"音频转写失败: {exc}\n{traceback.format_exc()}" - - logger.exception("transcribe_audio 失败") - return False, err - - -def reset_whisper_model() -> None: - """释放模型引用(用于调试或显存回收)。""" - global _model, _model_error - _model = None - _model_error = None +""" +Faster-Whisper CUDA 语音识别服务 +封装本地 GPU 加速的音频转写逻辑,适配 RTX 3060 Ti 8GB 显存。 +""" + +from __future__ import annotations + +import logging +import os +import traceback +from pathlib import Path +from typing import Optional, Tuple + +from config import ( + BASE_DIR, + HF_ENDPOINT, + HF_HOME, + HF_HUB_DOWNLOAD_TIMEOUT, + WHISPER_COMPUTE_TYPE, + WHISPER_DEVICE, + WHISPER_HF_REPO, + WHISPER_LANGUAGE, + WHISPER_MODEL_DIR, + WHISPER_MODEL_SIZE, +) + +logger = logging.getLogger(__name__) + +_model = None +_model_error: Optional[str] = None + + +def _ensure_hf_env() -> None: + os.environ.setdefault("HF_ENDPOINT", HF_ENDPOINT) + os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", str(HF_HUB_DOWNLOAD_TIMEOUT)) + os.environ.setdefault("HF_HOME", str(HF_HOME)) + WHISPER_MODEL_DIR.mkdir(parents=True, exist_ok=True) + + +def _whisper_local_path() -> Optional[Path]: + """返回已预下载的本地模型目录。""" + local = WHISPER_MODEL_DIR / WHISPER_MODEL_SIZE + if (local / "model.bin").is_file(): + return local + return None + + +def _is_cuda_error(exc: BaseException) -> bool: + msg = str(exc).lower() + cuda_keywords = ( + "cuda", "cudnn", "cublas", "gpu", + "out of memory", "no kernel image", "device-side assert", + ) + return any(k in msg for k in cuda_keywords) + + +def _is_network_error(exc: BaseException) -> bool: + msg = str(exc).lower() + return any( + k in msg + for k in ( + "network is unreachable", + "connection error", + "connecterror", + "timed out", + "couldn't connect", + "name resolution", + "hub", + ) + ) + + +def _build_load_error(exc: BaseException) -> str: + lines = [ + "Whisper 模型加载失败。", + f"详情: {exc}", + "", + ] + if _is_network_error(exc): + lines.extend([ + "原因:服务器无法访问 HuggingFace 下载模型(内网/无外网常见)。", + "请在服务器执行(走 HF 镜像,仅需一次):", + f" cd {BASE_DIR}", + " bash scripts/download_whisper_models.sh", + " pm2 restart trading_studio", + "", + f"模型将保存到: {WHISPER_MODEL_DIR / WHISPER_MODEL_SIZE}", + ]) + else: + lines.append(f"完整日志:\n{traceback.format_exc()}") + return "\n".join(lines) + + +def get_whisper_model(): + """获取或初始化 Faster-Whisper 模型(优先本地预下载)。""" + global _model, _model_error + + if _model is not None: + return _model, None + + if _model_error is not None: + return None, _model_error + + try: + _ensure_hf_env() + from faster_whisper import WhisperModel + + local = _whisper_local_path() + if local: + model_id = str(local) + logger.info("Whisper 从本地加载: %s", model_id) + else: + model_id = WHISPER_MODEL_SIZE + logger.warning( + "未找到本地 Whisper 模型 (%s),尝试在线下载(可能失败)…", + WHISPER_MODEL_DIR / WHISPER_MODEL_SIZE, + ) + + logger.info( + "Whisper 加载: model=%s, device=%s, compute_type=%s", + model_id, + WHISPER_DEVICE, + WHISPER_COMPUTE_TYPE, + ) + _model = WhisperModel( + model_id, + device=WHISPER_DEVICE, + compute_type=WHISPER_COMPUTE_TYPE, + download_root=str(WHISPER_MODEL_DIR), + ) + logger.info("Whisper 模型加载成功。") + return _model, None + + except ImportError as exc: + _model_error = ( + "未安装 faster-whisper,请执行: pip install faster-whisper\n" + f"原始错误: {exc}" + ) + logger.exception("faster-whisper 导入失败") + return None, _model_error + + except Exception as exc: + if _is_cuda_error(exc): + _model_error = ( + "CUDA 初始化失败,请检查 NVIDIA 驱动、CUDA 运行时及 cuDNN。\n" + f"错误详情: {exc}" + ) + else: + _model_error = _build_load_error(exc) + logger.exception("Whisper 模型加载异常") + return None, _model_error + + +def transcribe_audio(audio_path: str) -> Tuple[bool, str]: + """将音频文件转写为中文文本。""" + if not audio_path: + return False, "未提供音频文件路径。" + + model, init_error = get_whisper_model() + if model is None: + return False, init_error or "Whisper 模型不可用。" + + try: + segments, info = model.transcribe( + audio_path, + language=WHISPER_LANGUAGE, + beam_size=5, + vad_filter=True, + ) + + text_parts = [segment.text.strip() for segment in segments] + result_text = "".join(text_parts).strip() + + if not result_text: + return False, ( + "识别结果为空,请检查音频是否有效、音量是否足够," + f"或尝试更换格式。检测到语言: {getattr(info, 'language', 'unknown')}" + ) + + logger.info( + "转写完成: 语言=%s, 概率=%.2f, 字数=%d", + getattr(info, "language", "?"), + getattr(info, "language_probability", 0.0), + len(result_text), + ) + return True, result_text + + except Exception as exc: + if _is_cuda_error(exc): + err = ( + "CUDA 推理异常:显存可能不足或 GPU 状态异常。\n" + f"错误详情: {exc}" + ) + else: + err = f"音频转写失败: {exc}\n{traceback.format_exc()}" + + logger.exception("transcribe_audio 失败") + return False, err + + +def reset_whisper_model() -> None: + global _model, _model_error + _model = None + _model_error = None