Load mobile audio via ffmpeg to avoid librosa PySoundFile warnings.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dekun
2026-06-12 16:05:55 +08:00
parent aacdffac77
commit 39e29fe6a9
+61 -8
View File
@@ -10,6 +10,7 @@ import logging
import os import os
import traceback import traceback
import uuid import uuid
import warnings
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Optional, Tuple from typing import Any, Dict, Optional, Tuple
@@ -179,29 +180,81 @@ def get_chattts_instance():
return None, _chat_error return None, _chat_error
def _load_audio_via_ffmpeg(audio_path: str, sample_rate: int) -> np.ndarray:
"""通过 ffmpeg 转码为 wav 再读取,兼容手机 webm/m4a 等格式。"""
import subprocess
import tempfile
import soundfile as sf
tmp_path = tempfile.mktemp(suffix=".wav")
try:
cmd = [
"ffmpeg",
"-y",
"-i",
audio_path,
"-ac",
"1",
"-ar",
str(sample_rate),
"-f",
"wav",
tmp_path,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if result.returncode != 0:
raise RuntimeError(result.stderr[-500:] if result.stderr else "ffmpeg 转码失败")
audio, _ = sf.read(tmp_path, dtype="float32", always_2d=False)
if isinstance(audio, np.ndarray) and audio.ndim > 1:
audio = audio.mean(axis=1)
return np.asarray(audio, dtype=np.float32)
finally:
Path(tmp_path).unlink(missing_ok=True)
def _load_audio_for_chattts(audio_path: str, sample_rate: int = TTS_SAMPLE_RATE) -> np.ndarray: def _load_audio_for_chattts(audio_path: str, sample_rate: int = TTS_SAMPLE_RATE) -> np.ndarray:
""" """
加载音频并重采样到 ChatTTS 所需采样率。 加载音频并重采样到 ChatTTS 所需采样率。
优先使用 ChatTTS 自带工具,回退到 librosa。 优先 ChatTTS 工具 → ffmpeg 转码 → librosa 兜底
""" """
errors: list[str] = []
try: try:
from ChatTTS.utils import load_audio from ChatTTS.utils import load_audio
return load_audio(audio_path, sample_rate) return load_audio(audio_path, sample_rate)
except ImportError: except Exception as exc:
pass errors.append(f"ChatTTS.utils: {exc}")
try: try:
from tools.audio import load_audio from tools.audio import load_audio
return load_audio(audio_path, sample_rate) return load_audio(audio_path, sample_rate)
except ImportError: except Exception as exc:
pass errors.append(f"tools.audio: {exc}")
import librosa try:
return _load_audio_via_ffmpeg(audio_path, sample_rate)
except Exception as exc:
errors.append(f"ffmpeg: {exc}")
audio, _ = librosa.load(audio_path, sr=sample_rate, mono=True) try:
return audio import librosa
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", message="PySoundFile failed")
audio, _ = librosa.load(audio_path, sr=sample_rate, mono=True)
return audio
except Exception as exc:
errors.append(f"librosa: {exc}")
raise RuntimeError(
"无法读取音频文件,请上传 wav/mp3/m4a 或确认已安装 ffmpeg。\n"
+ "\n".join(errors[-3:])
)
def _get_audio_duration_sec(audio: np.ndarray, sample_rate: int) -> float: def _get_audio_duration_sec(audio: np.ndarray, sample_rate: int) -> float: