Load mobile audio via ffmpeg to avoid librosa PySoundFile warnings.
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
+61
-8
@@ -10,6 +10,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
import uuid
|
||||||
|
import warnings
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Optional, Tuple
|
from typing import Any, Dict, Optional, Tuple
|
||||||
@@ -179,29 +180,81 @@ def get_chattts_instance():
|
|||||||
return None, _chat_error
|
return None, _chat_error
|
||||||
|
|
||||||
|
|
||||||
|
def _load_audio_via_ffmpeg(audio_path: str, sample_rate: int) -> np.ndarray:
|
||||||
|
"""通过 ffmpeg 转码为 wav 再读取,兼容手机 webm/m4a 等格式。"""
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
tmp_path = tempfile.mktemp(suffix=".wav")
|
||||||
|
try:
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-y",
|
||||||
|
"-i",
|
||||||
|
audio_path,
|
||||||
|
"-ac",
|
||||||
|
"1",
|
||||||
|
"-ar",
|
||||||
|
str(sample_rate),
|
||||||
|
"-f",
|
||||||
|
"wav",
|
||||||
|
tmp_path,
|
||||||
|
]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(result.stderr[-500:] if result.stderr else "ffmpeg 转码失败")
|
||||||
|
|
||||||
|
audio, _ = sf.read(tmp_path, dtype="float32", always_2d=False)
|
||||||
|
if isinstance(audio, np.ndarray) and audio.ndim > 1:
|
||||||
|
audio = audio.mean(axis=1)
|
||||||
|
return np.asarray(audio, dtype=np.float32)
|
||||||
|
finally:
|
||||||
|
Path(tmp_path).unlink(missing_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def _load_audio_for_chattts(audio_path: str, sample_rate: int = TTS_SAMPLE_RATE) -> np.ndarray:
|
def _load_audio_for_chattts(audio_path: str, sample_rate: int = TTS_SAMPLE_RATE) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
加载音频并重采样到 ChatTTS 所需采样率。
|
加载音频并重采样到 ChatTTS 所需采样率。
|
||||||
优先使用 ChatTTS 自带工具,回退到 librosa。
|
优先 ChatTTS 工具 → ffmpeg 转码 → librosa 兜底。
|
||||||
"""
|
"""
|
||||||
|
errors: list[str] = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from ChatTTS.utils import load_audio
|
from ChatTTS.utils import load_audio
|
||||||
|
|
||||||
return load_audio(audio_path, sample_rate)
|
return load_audio(audio_path, sample_rate)
|
||||||
except ImportError:
|
except Exception as exc:
|
||||||
pass
|
errors.append(f"ChatTTS.utils: {exc}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from tools.audio import load_audio
|
from tools.audio import load_audio
|
||||||
|
|
||||||
return load_audio(audio_path, sample_rate)
|
return load_audio(audio_path, sample_rate)
|
||||||
except ImportError:
|
except Exception as exc:
|
||||||
pass
|
errors.append(f"tools.audio: {exc}")
|
||||||
|
|
||||||
import librosa
|
try:
|
||||||
|
return _load_audio_via_ffmpeg(audio_path, sample_rate)
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"ffmpeg: {exc}")
|
||||||
|
|
||||||
audio, _ = librosa.load(audio_path, sr=sample_rate, mono=True)
|
try:
|
||||||
return audio
|
import librosa
|
||||||
|
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.filterwarnings("ignore", category=FutureWarning)
|
||||||
|
warnings.filterwarnings("ignore", message="PySoundFile failed")
|
||||||
|
audio, _ = librosa.load(audio_path, sr=sample_rate, mono=True)
|
||||||
|
return audio
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"librosa: {exc}")
|
||||||
|
|
||||||
|
raise RuntimeError(
|
||||||
|
"无法读取音频文件,请上传 wav/mp3/m4a 或确认已安装 ffmpeg。\n"
|
||||||
|
+ "\n".join(errors[-3:])
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _get_audio_duration_sec(audio: np.ndarray, sample_rate: int) -> float:
|
def _get_audio_duration_sec(audio: np.ndarray, sample_rate: int) -> float:
|
||||||
|
|||||||
Reference in New Issue
Block a user