Add local GPU preset voices with dropdown selection.

Generate ChatTTS sample_random_speaker presets without cloud APIs; choose clone or preset in synthesize UI.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dekun
2026-06-12 17:28:17 +08:00
parent 8be34a2fd5
commit eb71e28427
7 changed files with 304 additions and 15 deletions
+139
View File
@@ -0,0 +1,139 @@
"""
本地 GPU 音色库(ChatTTS,无需云端 API)
- custom:用户在「音色锁定」克隆的 speaker_emb.pt
- preset_*ChatTTS sample_random_speaker 生成的内置说话人(scripts/generate_voice_presets.sh
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import torch
from config import BASE_DIR, SPEAKER_EMB_PATH
logger = logging.getLogger(__name__)
VOICES_DIR = Path(BASE_DIR) / "voices"
PRESETS_DIR = VOICES_DIR / "presets"
MANIFEST_PATH = VOICES_DIR / "manifest.json"
CUSTOM_VOICE_ID = "custom"
# 生成脚本写入的预设元数据(.pt 文件不入 Git)
DEFAULT_MANIFEST = {
"presets": [
{"id": "preset_01", "label": "预设·沉稳男声", "file": "presets/preset_01.pt"},
{"id": "preset_02", "label": "预设·青年男声", "file": "presets/preset_02.pt"},
{"id": "preset_03", "label": "预设·温柔女声", "file": "presets/preset_03.pt"},
{"id": "preset_04", "label": "预设·活泼女声", "file": "presets/preset_04.pt"},
{"id": "preset_05", "label": "预设·中性旁白", "file": "presets/preset_05.pt"},
{"id": "preset_06", "label": "预设·纪录片风", "file": "presets/preset_06.pt"},
]
}
def ensure_manifest() -> None:
VOICES_DIR.mkdir(parents=True, exist_ok=True)
PRESETS_DIR.mkdir(parents=True, exist_ok=True)
if not MANIFEST_PATH.is_file():
MANIFEST_PATH.write_text(
json.dumps(DEFAULT_MANIFEST, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def _read_manifest() -> Dict[str, Any]:
ensure_manifest()
try:
return json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
except Exception as exc:
logger.warning("读取 manifest 失败: %s", exc)
return DEFAULT_MANIFEST
def list_voice_choices() -> List[Tuple[str, str]]:
"""
返回 Gradio Dropdown 选项:(显示名, voice_id)。
仅列出磁盘上已存在的音色。
"""
choices: List[Tuple[str, str]] = []
if SPEAKER_EMB_PATH.is_file():
choices.append(("我的锁定音色(声音克隆)", CUSTOM_VOICE_ID))
for preset in _read_manifest().get("presets", []):
pid = preset.get("id", "")
label = preset.get("label", pid)
rel = preset.get("file", "")
if pid and rel and (VOICES_DIR / rel).is_file():
choices.append((label, pid))
if not choices:
choices.append(
(
"(请先在「音色锁定」上传人声,或运行 generate_voice_presets.sh",
CUSTOM_VOICE_ID,
)
)
return choices
def default_voice_id() -> str:
choices = list_voice_choices()
if not choices:
return CUSTOM_VOICE_ID
for _label, vid in choices:
if vid == CUSTOM_VOICE_ID:
return CUSTOM_VOICE_ID
return choices[0][1]
def voice_choice_labels() -> List[str]:
return [c[0] for c in list_voice_choices()]
def label_to_voice_id(label: str) -> str:
for lbl, vid in list_voice_choices():
if lbl == label:
return vid
return default_voice_id()
def load_voice_payload(voice_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
"""按 voice_id 加载 ChatTTS 说话人数据。"""
if voice_id == CUSTOM_VOICE_ID or not voice_id:
if not SPEAKER_EMB_PATH.is_file():
return None, (
"未找到锁定音色。请在「音色锁定」上传参考人声,"
"或选择下方「预设」音色(需先运行 scripts/generate_voice_presets.sh)。"
)
return _load_payload_file(SPEAKER_EMB_PATH)
for preset in _read_manifest().get("presets", []):
if preset.get("id") != voice_id:
continue
path = VOICES_DIR / preset.get("file", "")
if not path.is_file():
return None, (
f"预设音色「{preset.get('label', voice_id)}」尚未生成。\n"
f"请在服务器执行: bash scripts/generate_voice_presets.sh"
)
return _load_payload_file(path)
return None, f"未知音色 ID: {voice_id}"
def _load_payload_file(path: Path) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
try:
payload = torch.load(path, map_location="cpu", weights_only=False)
if isinstance(payload, torch.Tensor):
return {"spk_emb": payload, "spk_smp": None, "txt_smp": ""}, None
if isinstance(payload, dict):
return payload, None
return None, f"音色文件格式无效: {path.name}"
except Exception as exc:
return None, f"读取音色文件失败 ({path.name}): {exc}"