Compare commits

...

2 Commits

Author SHA1 Message Date
dekun 0f5277c22e Add Whisper offline loading for air-gapped servers.
Pre-download via HF mirror scripts so inner-network deploys avoid Hub Network is unreachable errors.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-12 16:11:57 +08:00
dekun 39e29fe6a9 Load mobile audio via ffmpeg to avoid librosa PySoundFile warnings.
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-12 16:05:55 +08:00
7 changed files with 371 additions and 172 deletions
+2
View File
@@ -10,4 +10,6 @@ OLLAMA_PORT=11434
# ChatTTS 模型目录(预下载脚本写入) # ChatTTS 模型目录(预下载脚本写入)
# CHATTTS_MODEL_DIR=/opt/Trading_Studio/models/ChatTTS # CHATTTS_MODEL_DIR=/opt/Trading_Studio/models/ChatTTS
# WHISPER_MODEL_DIR=/opt/Trading_Studio/models/whisper
# WHISPER_MODEL_SIZE=small
# HF_ENDPOINT=https://hf-mirror.com # HF_ENDPOINT=https://hf-mirror.com
+13 -2
View File
@@ -361,9 +361,20 @@ cd /opt/Trading_Studio
pip install -r requirements.txt pip install -r requirements.txt
``` ```
### 6.1 Faster-Whisper ### 6.1 Faster-Whisper(必须预下载)
`requirements.txt` 安装。首次运行会自动下载 `small` 模型(约 500MB)至 HuggingFace 缓存。 `requirements.txt` 安装。**内网服务器无法访问 HuggingFace 时会报 `Network is unreachable`。**
部署后执行(推荐与 ChatTTS 一起):
```bash
cd /opt/Trading_Studio
bash scripts/download_all_models.sh
# 或仅 Whisper: bash scripts/download_whisper_models.sh small
pm2 restart trading_studio
```
模型目录:`/opt/Trading_Studio/models/whisper/small/`(含 `model.bin`)。
### 6.2 ChatTTS(必须预下载,勿依赖 GitHub) ### 6.2 ChatTTS(必须预下载,勿依赖 GitHub)
+17 -7
View File
@@ -76,23 +76,33 @@ SYSTEM_PROMPT = (
"去掉所有无意义的口头禅,字数不做删减。" "去掉所有无意义的口头禅,字数不做删减。"
) )
# ---------------------------------------------------------------------------
# 路径
# ---------------------------------------------------------------------------
BASE_DIR = Path(__file__).resolve().parent
INSTALL_DIR = Path("/opt/Trading_Studio")
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Faster-Whisper 配置 # Faster-Whisper 配置
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
WHISPER_MODEL_SIZE = "small" WHISPER_MODEL_SIZE = _env_str("WHISPER_MODEL_SIZE", "small")
WHISPER_DEVICE = "cuda" WHISPER_DEVICE = "cuda"
WHISPER_COMPUTE_TYPE = "float16" WHISPER_COMPUTE_TYPE = "float16"
WHISPER_LANGUAGE = "zh" WHISPER_LANGUAGE = "zh"
WHISPER_MODEL_DIR = Path(_env_str("WHISPER_MODEL_DIR", str(BASE_DIR / "models" / "whisper")))
WHISPER_HF_REPO = {
"tiny": "Systran/faster-whisper-tiny",
"base": "Systran/faster-whisper-base",
"small": "Systran/faster-whisper-small",
"medium": "Systran/faster-whisper-medium",
"large-v2": "Systran/faster-whisper-large-v2",
"large-v3": "Systran/faster-whisper-large-v3",
}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# ChatTTS 配置 # ChatTTS 配置
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# 标准生产安装路径(/opt,root 部署)
INSTALL_DIR = Path("/opt/Trading_Studio")
# 项目根目录(开发/生产均自适应,以实际 app.py 所在目录为准)
BASE_DIR = Path(__file__).resolve().parent
# 固定音色 Embedding 存储路径 # 固定音色 Embedding 存储路径
SPEAKER_EMB_PATH = BASE_DIR / "speaker_emb.pt" SPEAKER_EMB_PATH = BASE_DIR / "speaker_emb.pt"
+15
View File
@@ -0,0 +1,15 @@
#!/usr/bin/env bash
# 一次性下载 Whisper + ChatTTS 全部模型(内网服务器部署必跑)
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${ROOT}"
echo "========== 下载 Whisper (small) =========="
bash scripts/download_whisper_models.sh small
echo ""
echo "========== 下载 ChatTTS =========="
bash scripts/download_chattts_models.sh
echo ""
echo "[OK] 全部模型下载完成,请: pm2 restart trading_studio"
+59
View File
@@ -0,0 +1,59 @@
#!/usr/bin/env bash
# 预下载 Faster-Whisper 模型(HF 镜像,内网服务器离线可用)
# 用法: bash scripts/download_whisper_models.sh [tiny|base|small|medium|large-v3]
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
SIZE="${1:-small}"
VENV_PY="${ROOT}/venv/bin/python"
MODEL_DIR="${WHISPER_MODEL_DIR:-${ROOT}/models/whisper}/${SIZE}"
export HF_ENDPOINT="${HF_ENDPOINT:-https://hf-mirror.com}"
export HF_HUB_DOWNLOAD_TIMEOUT="${HF_HUB_DOWNLOAD_TIMEOUT:-600}"
export HF_HOME="${HF_HOME:-${ROOT}/models/hf_cache}"
export MODEL_DIR
export WHISPER_SIZE="${SIZE}"
echo "[INFO] Whisper 模型: ${SIZE}"
echo "[INFO] 保存目录: ${MODEL_DIR}"
echo "[INFO] HF 镜像: ${HF_ENDPOINT}"
if [[ ! -x "${VENV_PY}" ]]; then
echo "[ERROR] 未找到 venv,请先 bash deploy.sh deps"
exit 1
fi
"${VENV_PY}" -m pip install -q huggingface_hub
"${VENV_PY}" << 'PY'
import os
from pathlib import Path
from huggingface_hub import snapshot_download
size = os.environ["WHISPER_SIZE"]
repos = {
"tiny": "Systran/faster-whisper-tiny",
"base": "Systran/faster-whisper-base",
"small": "Systran/faster-whisper-small",
"medium": "Systran/faster-whisper-medium",
"large-v2": "Systran/faster-whisper-large-v2",
"large-v3": "Systran/faster-whisper-large-v3",
}
repo = repos.get(size)
if not repo:
raise SystemExit(f"未知模型尺寸: {size}, 可选: {list(repos)}")
target = Path(os.environ["MODEL_DIR"])
target.mkdir(parents=True, exist_ok=True)
print(f"[INFO] 正在下载 {repo} ...")
snapshot_download(repo_id=repo, local_dir=str(target), local_dir_use_symlinks=False)
if not (target / "model.bin").is_file():
raise SystemExit(f"[ERROR] 下载不完整,未找到 model.bin: {target}")
print(f"[OK] Whisper 模型就绪: {target}")
PY
echo ""
echo "[OK] 请执行: pm2 restart trading_studio"
+61 -8
View File
@@ -10,6 +10,7 @@ import logging
import os import os
import traceback import traceback
import uuid import uuid
import warnings
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Optional, Tuple from typing import Any, Dict, Optional, Tuple
@@ -179,29 +180,81 @@ def get_chattts_instance():
return None, _chat_error return None, _chat_error
def _load_audio_via_ffmpeg(audio_path: str, sample_rate: int) -> np.ndarray:
"""通过 ffmpeg 转码为 wav 再读取,兼容手机 webm/m4a 等格式。"""
import subprocess
import tempfile
import soundfile as sf
tmp_path = tempfile.mktemp(suffix=".wav")
try:
cmd = [
"ffmpeg",
"-y",
"-i",
audio_path,
"-ac",
"1",
"-ar",
str(sample_rate),
"-f",
"wav",
tmp_path,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if result.returncode != 0:
raise RuntimeError(result.stderr[-500:] if result.stderr else "ffmpeg 转码失败")
audio, _ = sf.read(tmp_path, dtype="float32", always_2d=False)
if isinstance(audio, np.ndarray) and audio.ndim > 1:
audio = audio.mean(axis=1)
return np.asarray(audio, dtype=np.float32)
finally:
Path(tmp_path).unlink(missing_ok=True)
def _load_audio_for_chattts(audio_path: str, sample_rate: int = TTS_SAMPLE_RATE) -> np.ndarray: def _load_audio_for_chattts(audio_path: str, sample_rate: int = TTS_SAMPLE_RATE) -> np.ndarray:
""" """
加载音频并重采样到 ChatTTS 所需采样率。 加载音频并重采样到 ChatTTS 所需采样率。
优先使用 ChatTTS 自带工具,回退到 librosa。 优先 ChatTTS 工具 → ffmpeg 转码 → librosa 兜底
""" """
errors: list[str] = []
try: try:
from ChatTTS.utils import load_audio from ChatTTS.utils import load_audio
return load_audio(audio_path, sample_rate) return load_audio(audio_path, sample_rate)
except ImportError: except Exception as exc:
pass errors.append(f"ChatTTS.utils: {exc}")
try: try:
from tools.audio import load_audio from tools.audio import load_audio
return load_audio(audio_path, sample_rate) return load_audio(audio_path, sample_rate)
except ImportError: except Exception as exc:
pass errors.append(f"tools.audio: {exc}")
import librosa try:
return _load_audio_via_ffmpeg(audio_path, sample_rate)
except Exception as exc:
errors.append(f"ffmpeg: {exc}")
audio, _ = librosa.load(audio_path, sr=sample_rate, mono=True) try:
return audio import librosa
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", message="PySoundFile failed")
audio, _ = librosa.load(audio_path, sr=sample_rate, mono=True)
return audio
except Exception as exc:
errors.append(f"librosa: {exc}")
raise RuntimeError(
"无法读取音频文件,请上传 wav/mp3/m4a 或确认已安装 ffmpeg。\n"
+ "\n".join(errors[-3:])
)
def _get_audio_duration_sec(audio: np.ndarray, sample_rate: int) -> float: def _get_audio_duration_sec(audio: np.ndarray, sample_rate: int) -> float:
+204 -155
View File
@@ -1,155 +1,204 @@
""" """
Faster-Whisper CUDA 语音识别服务 Faster-Whisper CUDA 语音识别服务
封装本地 GPU 加速的音频转写逻辑,适配 RTX 3060 Ti 8GB 显存。 封装本地 GPU 加速的音频转写逻辑,适配 RTX 3060 Ti 8GB 显存。
""" """
from __future__ import annotations from __future__ import annotations
import logging import logging
import traceback import os
from typing import Optional, Tuple import traceback
from pathlib import Path
from config import ( from typing import Optional, Tuple
WHISPER_COMPUTE_TYPE,
WHISPER_DEVICE, from config import (
WHISPER_LANGUAGE, BASE_DIR,
WHISPER_MODEL_SIZE, HF_ENDPOINT,
) HF_HOME,
HF_HUB_DOWNLOAD_TIMEOUT,
logger = logging.getLogger(__name__) WHISPER_COMPUTE_TYPE,
WHISPER_DEVICE,
# 全局懒加载模型实例,避免 Gradio 重复初始化占用显存 WHISPER_HF_REPO,
_model = None WHISPER_LANGUAGE,
_model_error: Optional[str] = None WHISPER_MODEL_DIR,
WHISPER_MODEL_SIZE,
)
def _is_cuda_error(exc: BaseException) -> bool:
"""判断异常是否与 CUDA/GPU 相关。""" logger = logging.getLogger(__name__)
msg = str(exc).lower()
cuda_keywords = ( _model = None
"cuda", _model_error: Optional[str] = None
"cudnn",
"cublas",
"gpu", def _ensure_hf_env() -> None:
"out of memory", os.environ.setdefault("HF_ENDPOINT", HF_ENDPOINT)
"no kernel image", os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", str(HF_HUB_DOWNLOAD_TIMEOUT))
"device-side assert", os.environ.setdefault("HF_HOME", str(HF_HOME))
) WHISPER_MODEL_DIR.mkdir(parents=True, exist_ok=True)
return any(k in msg for k in cuda_keywords)
def _whisper_local_path() -> Optional[Path]:
def get_whisper_model(): """返回已预下载的本地模型目录。"""
""" local = WHISPER_MODEL_DIR / WHISPER_MODEL_SIZE
获取或初始化 Faster-Whisper 模型。 if (local / "model.bin").is_file():
强制 device=cuda, compute_type=float16。 return local
""" return None
global _model, _model_error
if _model is not None: def _is_cuda_error(exc: BaseException) -> bool:
return _model, None msg = str(exc).lower()
cuda_keywords = (
if _model_error is not None: "cuda", "cudnn", "cublas", "gpu",
return None, _model_error "out of memory", "no kernel image", "device-side assert",
)
try: return any(k in msg for k in cuda_keywords)
from faster_whisper import WhisperModel
logger.info( def _is_network_error(exc: BaseException) -> bool:
"正在加载 Whisper 模型: size=%s, device=%s, compute_type=%s", msg = str(exc).lower()
WHISPER_MODEL_SIZE, return any(
WHISPER_DEVICE, k in msg
WHISPER_COMPUTE_TYPE, for k in (
) "network is unreachable",
_model = WhisperModel( "connection error",
WHISPER_MODEL_SIZE, "connecterror",
device=WHISPER_DEVICE, "timed out",
compute_type=WHISPER_COMPUTE_TYPE, "couldn't connect",
) "name resolution",
logger.info("Whisper 模型加载成功。") "hub",
return _model, None )
)
except ImportError as exc:
_model_error = (
"未安装 faster-whisper,请执行: pip install faster-whisper\n" def _build_load_error(exc: BaseException) -> str:
f"原始错误: {exc}" lines = [
) "Whisper 模型加载失败。",
logger.exception("faster-whisper 导入失败") f"详情: {exc}",
return None, _model_error "",
]
except Exception as exc: if _is_network_error(exc):
if _is_cuda_error(exc): lines.extend([
_model_error = ( "原因:服务器无法访问 HuggingFace 下载模型(内网/无外网常见)。",
"CUDA 初始化失败,请检查 NVIDIA 驱动、CUDA 运行时及 cuDNN 是否正确安装。\n" "请在服务器执行(走 HF 镜像,仅需一次):",
f"错误详情: {exc}\n" f" cd {BASE_DIR}",
f"{traceback.format_exc()}" " bash scripts/download_whisper_models.sh",
) " pm2 restart trading_studio",
else: "",
_model_error = f"Whisper 模型加载失败: {exc}\n{traceback.format_exc()}" f"模型将保存到: {WHISPER_MODEL_DIR / WHISPER_MODEL_SIZE}",
logger.exception("Whisper 模型加载异常") ])
return None, _model_error else:
lines.append(f"完整日志:\n{traceback.format_exc()}")
return "\n".join(lines)
def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
"""
将音频文件转写为中文文本。 def get_whisper_model():
"""获取或初始化 Faster-Whisper 模型(优先本地预下载)。"""
Args: global _model, _model_error
audio_path: 本地音频文件绝对或相对路径
if _model is not None:
Returns: return _model, None
(success, text_or_error_message)
""" if _model_error is not None:
if not audio_path: return None, _model_error
return False, "未提供音频文件路径。"
try:
model, init_error = get_whisper_model() _ensure_hf_env()
if model is None: from faster_whisper import WhisperModel
return False, init_error or "Whisper 模型不可用。"
local = _whisper_local_path()
try: if local:
segments, info = model.transcribe( model_id = str(local)
audio_path, logger.info("Whisper 从本地加载: %s", model_id)
language=WHISPER_LANGUAGE, else:
beam_size=5, model_id = WHISPER_MODEL_SIZE
vad_filter=True, logger.warning(
) "未找到本地 Whisper 模型 (%s),尝试在线下载(可能失败)…",
WHISPER_MODEL_DIR / WHISPER_MODEL_SIZE,
text_parts = [] )
for segment in segments:
text_parts.append(segment.text.strip()) logger.info(
"Whisper 加载: model=%s, device=%s, compute_type=%s",
result_text = "".join(text_parts).strip() model_id,
WHISPER_DEVICE,
if not result_text: WHISPER_COMPUTE_TYPE,
return False, ( )
"识别结果为空,请检查音频是否有效、音量是否足够," _model = WhisperModel(
f"或尝试更换格式。检测到语言: {getattr(info, 'language', 'unknown')}" model_id,
) device=WHISPER_DEVICE,
compute_type=WHISPER_COMPUTE_TYPE,
logger.info( download_root=str(WHISPER_MODEL_DIR),
"转写完成: 语言=%s, 概率=%.2f, 字数=%d", )
getattr(info, "language", "?"), logger.info("Whisper 模型加载成功。")
getattr(info, "language_probability", 0.0), return _model, None
len(result_text),
) except ImportError as exc:
return True, result_text _model_error = (
"未安装 faster-whisper,请执行: pip install faster-whisper\n"
except Exception as exc: f"原始错误: {exc}"
if _is_cuda_error(exc): )
err = ( logger.exception("faster-whisper 导入失败")
"CUDA 推理异常:显存可能不足或 GPU 状态异常。" return None, _model_error
"建议关闭其他占用显存的进程后重试。\n"
f"错误详情: {exc}" except Exception as exc:
) if _is_cuda_error(exc):
else: _model_error = (
err = f"音频转写失败: {exc}\n{traceback.format_exc()}" "CUDA 初始化失败,请检查 NVIDIA 驱动、CUDA 运行时及 cuDNN。\n"
f"错误详情: {exc}"
logger.exception("transcribe_audio 失败") )
return False, err else:
_model_error = _build_load_error(exc)
logger.exception("Whisper 模型加载异常")
def reset_whisper_model() -> None: return None, _model_error
"""释放模型引用(用于调试或显存回收)。"""
global _model, _model_error
_model = None def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
_model_error = None """将音频文件转写为中文文本。"""
if not audio_path:
return False, "未提供音频文件路径。"
model, init_error = get_whisper_model()
if model is None:
return False, init_error or "Whisper 模型不可用。"
try:
segments, info = model.transcribe(
audio_path,
language=WHISPER_LANGUAGE,
beam_size=5,
vad_filter=True,
)
text_parts = [segment.text.strip() for segment in segments]
result_text = "".join(text_parts).strip()
if not result_text:
return False, (
"识别结果为空,请检查音频是否有效、音量是否足够,"
f"或尝试更换格式。检测到语言: {getattr(info, 'language', 'unknown')}"
)
logger.info(
"转写完成: 语言=%s, 概率=%.2f, 字数=%d",
getattr(info, "language", "?"),
getattr(info, "language_probability", 0.0),
len(result_text),
)
return True, result_text
except Exception as exc:
if _is_cuda_error(exc):
err = (
"CUDA 推理异常:显存可能不足或 GPU 状态异常。\n"
f"错误详情: {exc}"
)
else:
err = f"音频转写失败: {exc}\n{traceback.format_exc()}"
logger.exception("transcribe_audio 失败")
return False, err
def reset_whisper_model() -> None:
global _model, _model_error
_model = None
_model_error = None