Compare commits

..

2 Commits

Author SHA1 Message Date
dekun 0f5277c22e Add Whisper offline loading for air-gapped servers.
Pre-download via HF mirror scripts so inner-network deploys avoid Hub Network is unreachable errors.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-12 16:11:57 +08:00
dekun 39e29fe6a9 Load mobile audio via ffmpeg to avoid librosa PySoundFile warnings.
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-12 16:05:55 +08:00
7 changed files with 371 additions and 172 deletions
+2
View File
@@ -10,4 +10,6 @@ OLLAMA_PORT=11434
# ChatTTS 模型目录(预下载脚本写入) # ChatTTS 模型目录(预下载脚本写入)
# CHATTTS_MODEL_DIR=/opt/Trading_Studio/models/ChatTTS # CHATTTS_MODEL_DIR=/opt/Trading_Studio/models/ChatTTS
# WHISPER_MODEL_DIR=/opt/Trading_Studio/models/whisper
# WHISPER_MODEL_SIZE=small
# HF_ENDPOINT=https://hf-mirror.com # HF_ENDPOINT=https://hf-mirror.com
+13 -2
View File
@@ -361,9 +361,20 @@ cd /opt/Trading_Studio
pip install -r requirements.txt pip install -r requirements.txt
``` ```
### 6.1 Faster-Whisper ### 6.1 Faster-Whisper(必须预下载)
`requirements.txt` 安装。首次运行会自动下载 `small` 模型(约 500MB)至 HuggingFace 缓存。 `requirements.txt` 安装。**内网服务器无法访问 HuggingFace 时会报 `Network is unreachable`。**
部署后执行(推荐与 ChatTTS 一起):
```bash
cd /opt/Trading_Studio
bash scripts/download_all_models.sh
# 或仅 Whisper: bash scripts/download_whisper_models.sh small
pm2 restart trading_studio
```
模型目录:`/opt/Trading_Studio/models/whisper/small/`(含 `model.bin`)。
### 6.2 ChatTTS(必须预下载,勿依赖 GitHub) ### 6.2 ChatTTS(必须预下载,勿依赖 GitHub)
+17 -7
View File
@@ -76,23 +76,33 @@ SYSTEM_PROMPT = (
"去掉所有无意义的口头禅,字数不做删减。" "去掉所有无意义的口头禅,字数不做删减。"
) )
# ---------------------------------------------------------------------------
# 路径
# ---------------------------------------------------------------------------
BASE_DIR = Path(__file__).resolve().parent
INSTALL_DIR = Path("/opt/Trading_Studio")
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Faster-Whisper 配置 # Faster-Whisper 配置
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
WHISPER_MODEL_SIZE = "small" WHISPER_MODEL_SIZE = _env_str("WHISPER_MODEL_SIZE", "small")
WHISPER_DEVICE = "cuda" WHISPER_DEVICE = "cuda"
WHISPER_COMPUTE_TYPE = "float16" WHISPER_COMPUTE_TYPE = "float16"
WHISPER_LANGUAGE = "zh" WHISPER_LANGUAGE = "zh"
WHISPER_MODEL_DIR = Path(_env_str("WHISPER_MODEL_DIR", str(BASE_DIR / "models" / "whisper")))
WHISPER_HF_REPO = {
"tiny": "Systran/faster-whisper-tiny",
"base": "Systran/faster-whisper-base",
"small": "Systran/faster-whisper-small",
"medium": "Systran/faster-whisper-medium",
"large-v2": "Systran/faster-whisper-large-v2",
"large-v3": "Systran/faster-whisper-large-v3",
}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# ChatTTS 配置 # ChatTTS 配置
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# 标准生产安装路径(/opt,root 部署)
INSTALL_DIR = Path("/opt/Trading_Studio")
# 项目根目录(开发/生产均自适应,以实际 app.py 所在目录为准)
BASE_DIR = Path(__file__).resolve().parent
# 固定音色 Embedding 存储路径 # 固定音色 Embedding 存储路径
SPEAKER_EMB_PATH = BASE_DIR / "speaker_emb.pt" SPEAKER_EMB_PATH = BASE_DIR / "speaker_emb.pt"
+15
View File
@@ -0,0 +1,15 @@
#!/usr/bin/env bash
# 一次性下载 Whisper + ChatTTS 全部模型(内网服务器部署必跑)
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "${ROOT}"
echo "========== 下载 Whisper (small) =========="
bash scripts/download_whisper_models.sh small
echo ""
echo "========== 下载 ChatTTS =========="
bash scripts/download_chattts_models.sh
echo ""
echo "[OK] 全部模型下载完成,请: pm2 restart trading_studio"
+59
View File
@@ -0,0 +1,59 @@
#!/usr/bin/env bash
# 预下载 Faster-Whisper 模型(HF 镜像,内网服务器离线可用)
# 用法: bash scripts/download_whisper_models.sh [tiny|base|small|medium|large-v3]
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
SIZE="${1:-small}"
VENV_PY="${ROOT}/venv/bin/python"
MODEL_DIR="${WHISPER_MODEL_DIR:-${ROOT}/models/whisper}/${SIZE}"
export HF_ENDPOINT="${HF_ENDPOINT:-https://hf-mirror.com}"
export HF_HUB_DOWNLOAD_TIMEOUT="${HF_HUB_DOWNLOAD_TIMEOUT:-600}"
export HF_HOME="${HF_HOME:-${ROOT}/models/hf_cache}"
export MODEL_DIR
export WHISPER_SIZE="${SIZE}"
echo "[INFO] Whisper 模型: ${SIZE}"
echo "[INFO] 保存目录: ${MODEL_DIR}"
echo "[INFO] HF 镜像: ${HF_ENDPOINT}"
if [[ ! -x "${VENV_PY}" ]]; then
echo "[ERROR] 未找到 venv,请先 bash deploy.sh deps"
exit 1
fi
"${VENV_PY}" -m pip install -q huggingface_hub
"${VENV_PY}" << 'PY'
import os
from pathlib import Path
from huggingface_hub import snapshot_download
size = os.environ["WHISPER_SIZE"]
repos = {
"tiny": "Systran/faster-whisper-tiny",
"base": "Systran/faster-whisper-base",
"small": "Systran/faster-whisper-small",
"medium": "Systran/faster-whisper-medium",
"large-v2": "Systran/faster-whisper-large-v2",
"large-v3": "Systran/faster-whisper-large-v3",
}
repo = repos.get(size)
if not repo:
raise SystemExit(f"未知模型尺寸: {size}, 可选: {list(repos)}")
target = Path(os.environ["MODEL_DIR"])
target.mkdir(parents=True, exist_ok=True)
print(f"[INFO] 正在下载 {repo} ...")
snapshot_download(repo_id=repo, local_dir=str(target), local_dir_use_symlinks=False)
if not (target / "model.bin").is_file():
raise SystemExit(f"[ERROR] 下载不完整,未找到 model.bin: {target}")
print(f"[OK] Whisper 模型就绪: {target}")
PY
echo ""
echo "[OK] 请执行: pm2 restart trading_studio"
+58 -5
View File
@@ -10,6 +10,7 @@ import logging
import os import os
import traceback import traceback
import uuid import uuid
import warnings
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Optional, Tuple from typing import Any, Dict, Optional, Tuple
@@ -179,29 +180,81 @@ def get_chattts_instance():
return None, _chat_error return None, _chat_error
def _load_audio_via_ffmpeg(audio_path: str, sample_rate: int) -> np.ndarray:
"""通过 ffmpeg 转码为 wav 再读取,兼容手机 webm/m4a 等格式。"""
import subprocess
import tempfile
import soundfile as sf
tmp_path = tempfile.mktemp(suffix=".wav")
try:
cmd = [
"ffmpeg",
"-y",
"-i",
audio_path,
"-ac",
"1",
"-ar",
str(sample_rate),
"-f",
"wav",
tmp_path,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if result.returncode != 0:
raise RuntimeError(result.stderr[-500:] if result.stderr else "ffmpeg 转码失败")
audio, _ = sf.read(tmp_path, dtype="float32", always_2d=False)
if isinstance(audio, np.ndarray) and audio.ndim > 1:
audio = audio.mean(axis=1)
return np.asarray(audio, dtype=np.float32)
finally:
Path(tmp_path).unlink(missing_ok=True)
def _load_audio_for_chattts(audio_path: str, sample_rate: int = TTS_SAMPLE_RATE) -> np.ndarray: def _load_audio_for_chattts(audio_path: str, sample_rate: int = TTS_SAMPLE_RATE) -> np.ndarray:
""" """
加载音频并重采样到 ChatTTS 所需采样率。 加载音频并重采样到 ChatTTS 所需采样率。
优先使用 ChatTTS 自带工具,回退到 librosa。 优先 ChatTTS 工具 → ffmpeg 转码 → librosa 兜底
""" """
errors: list[str] = []
try: try:
from ChatTTS.utils import load_audio from ChatTTS.utils import load_audio
return load_audio(audio_path, sample_rate) return load_audio(audio_path, sample_rate)
except ImportError: except Exception as exc:
pass errors.append(f"ChatTTS.utils: {exc}")
try: try:
from tools.audio import load_audio from tools.audio import load_audio
return load_audio(audio_path, sample_rate) return load_audio(audio_path, sample_rate)
except ImportError: except Exception as exc:
pass errors.append(f"tools.audio: {exc}")
try:
return _load_audio_via_ffmpeg(audio_path, sample_rate)
except Exception as exc:
errors.append(f"ffmpeg: {exc}")
try:
import librosa import librosa
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", message="PySoundFile failed")
audio, _ = librosa.load(audio_path, sr=sample_rate, mono=True) audio, _ = librosa.load(audio_path, sr=sample_rate, mono=True)
return audio return audio
except Exception as exc:
errors.append(f"librosa: {exc}")
raise RuntimeError(
"无法读取音频文件,请上传 wav/mp3/m4a 或确认已安装 ffmpeg。\n"
+ "\n".join(errors[-3:])
)
def _get_audio_duration_sec(audio: np.ndarray, sample_rate: int) -> float: def _get_audio_duration_sec(audio: np.ndarray, sample_rate: int) -> float:
+85 -36
View File
@@ -6,43 +6,93 @@ Faster-Whisper CUDA 语音识别服务
from __future__ import annotations from __future__ import annotations
import logging import logging
import os
import traceback import traceback
from pathlib import Path
from typing import Optional, Tuple from typing import Optional, Tuple
from config import ( from config import (
BASE_DIR,
HF_ENDPOINT,
HF_HOME,
HF_HUB_DOWNLOAD_TIMEOUT,
WHISPER_COMPUTE_TYPE, WHISPER_COMPUTE_TYPE,
WHISPER_DEVICE, WHISPER_DEVICE,
WHISPER_HF_REPO,
WHISPER_LANGUAGE, WHISPER_LANGUAGE,
WHISPER_MODEL_DIR,
WHISPER_MODEL_SIZE, WHISPER_MODEL_SIZE,
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# 全局懒加载模型实例,避免 Gradio 重复初始化占用显存
_model = None _model = None
_model_error: Optional[str] = None _model_error: Optional[str] = None
def _ensure_hf_env() -> None:
os.environ.setdefault("HF_ENDPOINT", HF_ENDPOINT)
os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", str(HF_HUB_DOWNLOAD_TIMEOUT))
os.environ.setdefault("HF_HOME", str(HF_HOME))
WHISPER_MODEL_DIR.mkdir(parents=True, exist_ok=True)
def _whisper_local_path() -> Optional[Path]:
"""返回已预下载的本地模型目录。"""
local = WHISPER_MODEL_DIR / WHISPER_MODEL_SIZE
if (local / "model.bin").is_file():
return local
return None
def _is_cuda_error(exc: BaseException) -> bool: def _is_cuda_error(exc: BaseException) -> bool:
"""判断异常是否与 CUDA/GPU 相关。"""
msg = str(exc).lower() msg = str(exc).lower()
cuda_keywords = ( cuda_keywords = (
"cuda", "cuda", "cudnn", "cublas", "gpu",
"cudnn", "out of memory", "no kernel image", "device-side assert",
"cublas",
"gpu",
"out of memory",
"no kernel image",
"device-side assert",
) )
return any(k in msg for k in cuda_keywords) return any(k in msg for k in cuda_keywords)
def _is_network_error(exc: BaseException) -> bool:
msg = str(exc).lower()
return any(
k in msg
for k in (
"network is unreachable",
"connection error",
"connecterror",
"timed out",
"couldn't connect",
"name resolution",
"hub",
)
)
def _build_load_error(exc: BaseException) -> str:
lines = [
"Whisper 模型加载失败。",
f"详情: {exc}",
"",
]
if _is_network_error(exc):
lines.extend([
"原因:服务器无法访问 HuggingFace 下载模型(内网/无外网常见)。",
"请在服务器执行(走 HF 镜像,仅需一次):",
f" cd {BASE_DIR}",
" bash scripts/download_whisper_models.sh",
" pm2 restart trading_studio",
"",
f"模型将保存到: {WHISPER_MODEL_DIR / WHISPER_MODEL_SIZE}",
])
else:
lines.append(f"完整日志:\n{traceback.format_exc()}")
return "\n".join(lines)
def get_whisper_model(): def get_whisper_model():
""" """获取或初始化 Faster-Whisper 模型(优先本地预下载)。"""
获取或初始化 Faster-Whisper 模型。
强制 device=cuda, compute_type=float16。
"""
global _model, _model_error global _model, _model_error
if _model is not None: if _model is not None:
@@ -52,18 +102,31 @@ def get_whisper_model():
return None, _model_error return None, _model_error
try: try:
_ensure_hf_env()
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
local = _whisper_local_path()
if local:
model_id = str(local)
logger.info("Whisper 从本地加载: %s", model_id)
else:
model_id = WHISPER_MODEL_SIZE
logger.warning(
"未找到本地 Whisper 模型 (%s),尝试在线下载(可能失败)…",
WHISPER_MODEL_DIR / WHISPER_MODEL_SIZE,
)
logger.info( logger.info(
"正在加载 Whisper 模型: size=%s, device=%s, compute_type=%s", "Whisper 加载: model=%s, device=%s, compute_type=%s",
WHISPER_MODEL_SIZE, model_id,
WHISPER_DEVICE, WHISPER_DEVICE,
WHISPER_COMPUTE_TYPE, WHISPER_COMPUTE_TYPE,
) )
_model = WhisperModel( _model = WhisperModel(
WHISPER_MODEL_SIZE, model_id,
device=WHISPER_DEVICE, device=WHISPER_DEVICE,
compute_type=WHISPER_COMPUTE_TYPE, compute_type=WHISPER_COMPUTE_TYPE,
download_root=str(WHISPER_MODEL_DIR),
) )
logger.info("Whisper 模型加载成功。") logger.info("Whisper 模型加载成功。")
return _model, None return _model, None
@@ -79,26 +142,17 @@ def get_whisper_model():
except Exception as exc: except Exception as exc:
if _is_cuda_error(exc): if _is_cuda_error(exc):
_model_error = ( _model_error = (
"CUDA 初始化失败,请检查 NVIDIA 驱动、CUDA 运行时及 cuDNN 是否正确安装\n" "CUDA 初始化失败,请检查 NVIDIA 驱动、CUDA 运行时及 cuDNN。\n"
f"错误详情: {exc}\n" f"错误详情: {exc}"
f"{traceback.format_exc()}"
) )
else: else:
_model_error = f"Whisper 模型加载失败: {exc}\n{traceback.format_exc()}" _model_error = _build_load_error(exc)
logger.exception("Whisper 模型加载异常") logger.exception("Whisper 模型加载异常")
return None, _model_error return None, _model_error
def transcribe_audio(audio_path: str) -> Tuple[bool, str]: def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
""" """将音频文件转写为中文文本。"""
将音频文件转写为中文文本。
Args:
audio_path: 本地音频文件绝对或相对路径
Returns:
(success, text_or_error_message)
"""
if not audio_path: if not audio_path:
return False, "未提供音频文件路径。" return False, "未提供音频文件路径。"
@@ -114,10 +168,7 @@ def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
vad_filter=True, vad_filter=True,
) )
text_parts = [] text_parts = [segment.text.strip() for segment in segments]
for segment in segments:
text_parts.append(segment.text.strip())
result_text = "".join(text_parts).strip() result_text = "".join(text_parts).strip()
if not result_text: if not result_text:
@@ -137,8 +188,7 @@ def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
except Exception as exc: except Exception as exc:
if _is_cuda_error(exc): if _is_cuda_error(exc):
err = ( err = (
"CUDA 推理异常:显存可能不足或 GPU 状态异常。" "CUDA 推理异常:显存可能不足或 GPU 状态异常。\n"
"建议关闭其他占用显存的进程后重试。\n"
f"错误详情: {exc}" f"错误详情: {exc}"
) )
else: else:
@@ -149,7 +199,6 @@ def transcribe_audio(audio_path: str) -> Tuple[bool, str]:
def reset_whisper_model() -> None: def reset_whisper_model() -> None:
"""释放模型引用(用于调试或显存回收)。"""
global _model, _model_error global _model, _model_error
_model = None _model = None
_model_error = None _model_error = None