bdc63c04df
Keep synthesized wav files browsable with playback and download, default to preset steady male voice, show one-click pipeline as the first tab, and reduce post-synthesis UI flicker. Co-authored-by: Cursor <cursoragent@cursor.com>
153 lines
5.1 KiB
Python
153 lines
5.1 KiB
Python
"""
|
||
本地 GPU 音色库(ChatTTS,无需云端 API)
|
||
- custom:用户在「音色锁定」克隆的 speaker_emb.pt
|
||
- preset_*:ChatTTS sample_random_speaker 生成的内置说话人(scripts/generate_voice_presets.sh)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import logging
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
|
||
import torch
|
||
|
||
from config import BASE_DIR, SPEAKER_EMB_PATH
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
VOICES_DIR = Path(BASE_DIR) / "voices"
|
||
PRESETS_DIR = VOICES_DIR / "presets"
|
||
MANIFEST_PATH = VOICES_DIR / "manifest.json"
|
||
|
||
CUSTOM_VOICE_ID = "custom"
|
||
DEFAULT_PRESET_VOICE_ID = "preset_01"
|
||
DEFAULT_PRESET_VOICE_LABEL = "预设·沉稳男声"
|
||
|
||
# 生成脚本写入的预设元数据(.pt 文件不入 Git)
|
||
DEFAULT_MANIFEST = {
|
||
"presets": [
|
||
{"id": "preset_01", "label": "预设·沉稳男声", "file": "presets/preset_01.pt"},
|
||
{"id": "preset_02", "label": "预设·青年男声", "file": "presets/preset_02.pt"},
|
||
{"id": "preset_03", "label": "预设·温柔女声", "file": "presets/preset_03.pt"},
|
||
{"id": "preset_04", "label": "预设·活泼女声", "file": "presets/preset_04.pt"},
|
||
{"id": "preset_05", "label": "预设·中性旁白", "file": "presets/preset_05.pt"},
|
||
{"id": "preset_06", "label": "预设·纪录片风", "file": "presets/preset_06.pt"},
|
||
]
|
||
}
|
||
|
||
|
||
def ensure_manifest() -> None:
|
||
VOICES_DIR.mkdir(parents=True, exist_ok=True)
|
||
PRESETS_DIR.mkdir(parents=True, exist_ok=True)
|
||
if not MANIFEST_PATH.is_file():
|
||
MANIFEST_PATH.write_text(
|
||
json.dumps(DEFAULT_MANIFEST, ensure_ascii=False, indent=2),
|
||
encoding="utf-8",
|
||
)
|
||
|
||
|
||
def _read_manifest() -> Dict[str, Any]:
|
||
ensure_manifest()
|
||
try:
|
||
return json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
|
||
except Exception as exc:
|
||
logger.warning("读取 manifest 失败: %s", exc)
|
||
return DEFAULT_MANIFEST
|
||
|
||
|
||
def list_voice_choices() -> List[Tuple[str, str]]:
|
||
"""
|
||
返回 Gradio Dropdown 选项:(显示名, voice_id)。
|
||
仅列出磁盘上已存在的音色。
|
||
"""
|
||
choices: List[Tuple[str, str]] = []
|
||
|
||
if SPEAKER_EMB_PATH.is_file():
|
||
choices.append(("我的锁定音色(声音克隆)", CUSTOM_VOICE_ID))
|
||
|
||
for preset in _read_manifest().get("presets", []):
|
||
pid = preset.get("id", "")
|
||
label = preset.get("label", pid)
|
||
rel = preset.get("file", "")
|
||
if pid and rel and (VOICES_DIR / rel).is_file():
|
||
choices.append((label, pid))
|
||
|
||
if not choices:
|
||
choices.append(
|
||
(
|
||
"(请先在「音色锁定」上传人声,或运行 generate_voice_presets.sh)",
|
||
CUSTOM_VOICE_ID,
|
||
)
|
||
)
|
||
return choices
|
||
|
||
|
||
def default_voice_id() -> str:
|
||
choices = list_voice_choices()
|
||
if not choices:
|
||
return DEFAULT_PRESET_VOICE_ID
|
||
for _label, vid in choices:
|
||
if vid == DEFAULT_PRESET_VOICE_ID:
|
||
return vid
|
||
for _label, vid in choices:
|
||
if vid != CUSTOM_VOICE_ID:
|
||
return vid
|
||
return choices[0][1]
|
||
|
||
|
||
def default_voice_label() -> str:
|
||
for lbl, vid in list_voice_choices():
|
||
if vid == DEFAULT_PRESET_VOICE_ID:
|
||
return lbl
|
||
labels = voice_choice_labels()
|
||
return labels[0] if labels else DEFAULT_PRESET_VOICE_LABEL
|
||
|
||
|
||
def voice_choice_labels() -> List[str]:
|
||
return [c[0] for c in list_voice_choices()]
|
||
|
||
|
||
def label_to_voice_id(label: str) -> str:
|
||
for lbl, vid in list_voice_choices():
|
||
if lbl == label:
|
||
return vid
|
||
return default_voice_id()
|
||
|
||
|
||
def load_voice_payload(voice_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
|
||
"""按 voice_id 加载 ChatTTS 说话人数据。"""
|
||
if voice_id == CUSTOM_VOICE_ID or not voice_id:
|
||
if not SPEAKER_EMB_PATH.is_file():
|
||
return None, (
|
||
"未找到锁定音色。请在「音色锁定」上传参考人声,"
|
||
"或选择下方「预设」音色(需先运行 scripts/generate_voice_presets.sh)。"
|
||
)
|
||
return _load_payload_file(SPEAKER_EMB_PATH)
|
||
|
||
for preset in _read_manifest().get("presets", []):
|
||
if preset.get("id") != voice_id:
|
||
continue
|
||
path = VOICES_DIR / preset.get("file", "")
|
||
if not path.is_file():
|
||
return None, (
|
||
f"预设音色「{preset.get('label', voice_id)}」尚未生成。\n"
|
||
f"请在服务器执行: bash scripts/generate_voice_presets.sh"
|
||
)
|
||
return _load_payload_file(path)
|
||
|
||
return None, f"未知音色 ID: {voice_id}"
|
||
|
||
|
||
def _load_payload_file(path: Path) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
|
||
try:
|
||
payload = torch.load(path, map_location="cpu", weights_only=False)
|
||
if isinstance(payload, torch.Tensor):
|
||
return {"spk_emb": payload, "spk_smp": None, "txt_smp": ""}, None
|
||
if isinstance(payload, dict):
|
||
return payload, None
|
||
return None, f"音色文件格式无效: {path.name}"
|
||
except Exception as exc:
|
||
return None, f"读取音色文件失败 ({path.name}): {exc}"
|