Add local GPU preset voices with dropdown selection.
Generate ChatTTS sample_random_speaker presets without cloud APIs; choose clone or preset in synthesize UI. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,139 @@
|
||||
"""
|
||||
本地 GPU 音色库(ChatTTS,无需云端 API)
|
||||
- custom:用户在「音色锁定」克隆的 speaker_emb.pt
|
||||
- preset_*:ChatTTS sample_random_speaker 生成的内置说话人(scripts/generate_voice_presets.sh)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
|
||||
from config import BASE_DIR, SPEAKER_EMB_PATH
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VOICES_DIR = Path(BASE_DIR) / "voices"
|
||||
PRESETS_DIR = VOICES_DIR / "presets"
|
||||
MANIFEST_PATH = VOICES_DIR / "manifest.json"
|
||||
|
||||
CUSTOM_VOICE_ID = "custom"
|
||||
|
||||
# 生成脚本写入的预设元数据(.pt 文件不入 Git)
|
||||
DEFAULT_MANIFEST = {
|
||||
"presets": [
|
||||
{"id": "preset_01", "label": "预设·沉稳男声", "file": "presets/preset_01.pt"},
|
||||
{"id": "preset_02", "label": "预设·青年男声", "file": "presets/preset_02.pt"},
|
||||
{"id": "preset_03", "label": "预设·温柔女声", "file": "presets/preset_03.pt"},
|
||||
{"id": "preset_04", "label": "预设·活泼女声", "file": "presets/preset_04.pt"},
|
||||
{"id": "preset_05", "label": "预设·中性旁白", "file": "presets/preset_05.pt"},
|
||||
{"id": "preset_06", "label": "预设·纪录片风", "file": "presets/preset_06.pt"},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def ensure_manifest() -> None:
|
||||
VOICES_DIR.mkdir(parents=True, exist_ok=True)
|
||||
PRESETS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
if not MANIFEST_PATH.is_file():
|
||||
MANIFEST_PATH.write_text(
|
||||
json.dumps(DEFAULT_MANIFEST, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def _read_manifest() -> Dict[str, Any]:
|
||||
ensure_manifest()
|
||||
try:
|
||||
return json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
|
||||
except Exception as exc:
|
||||
logger.warning("读取 manifest 失败: %s", exc)
|
||||
return DEFAULT_MANIFEST
|
||||
|
||||
|
||||
def list_voice_choices() -> List[Tuple[str, str]]:
|
||||
"""
|
||||
返回 Gradio Dropdown 选项:(显示名, voice_id)。
|
||||
仅列出磁盘上已存在的音色。
|
||||
"""
|
||||
choices: List[Tuple[str, str]] = []
|
||||
|
||||
if SPEAKER_EMB_PATH.is_file():
|
||||
choices.append(("我的锁定音色(声音克隆)", CUSTOM_VOICE_ID))
|
||||
|
||||
for preset in _read_manifest().get("presets", []):
|
||||
pid = preset.get("id", "")
|
||||
label = preset.get("label", pid)
|
||||
rel = preset.get("file", "")
|
||||
if pid and rel and (VOICES_DIR / rel).is_file():
|
||||
choices.append((label, pid))
|
||||
|
||||
if not choices:
|
||||
choices.append(
|
||||
(
|
||||
"(请先在「音色锁定」上传人声,或运行 generate_voice_presets.sh)",
|
||||
CUSTOM_VOICE_ID,
|
||||
)
|
||||
)
|
||||
return choices
|
||||
|
||||
|
||||
def default_voice_id() -> str:
|
||||
choices = list_voice_choices()
|
||||
if not choices:
|
||||
return CUSTOM_VOICE_ID
|
||||
for _label, vid in choices:
|
||||
if vid == CUSTOM_VOICE_ID:
|
||||
return CUSTOM_VOICE_ID
|
||||
return choices[0][1]
|
||||
|
||||
|
||||
def voice_choice_labels() -> List[str]:
|
||||
return [c[0] for c in list_voice_choices()]
|
||||
|
||||
|
||||
def label_to_voice_id(label: str) -> str:
|
||||
for lbl, vid in list_voice_choices():
|
||||
if lbl == label:
|
||||
return vid
|
||||
return default_voice_id()
|
||||
|
||||
|
||||
def load_voice_payload(voice_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
|
||||
"""按 voice_id 加载 ChatTTS 说话人数据。"""
|
||||
if voice_id == CUSTOM_VOICE_ID or not voice_id:
|
||||
if not SPEAKER_EMB_PATH.is_file():
|
||||
return None, (
|
||||
"未找到锁定音色。请在「音色锁定」上传参考人声,"
|
||||
"或选择下方「预设」音色(需先运行 scripts/generate_voice_presets.sh)。"
|
||||
)
|
||||
return _load_payload_file(SPEAKER_EMB_PATH)
|
||||
|
||||
for preset in _read_manifest().get("presets", []):
|
||||
if preset.get("id") != voice_id:
|
||||
continue
|
||||
path = VOICES_DIR / preset.get("file", "")
|
||||
if not path.is_file():
|
||||
return None, (
|
||||
f"预设音色「{preset.get('label', voice_id)}」尚未生成。\n"
|
||||
f"请在服务器执行: bash scripts/generate_voice_presets.sh"
|
||||
)
|
||||
return _load_payload_file(path)
|
||||
|
||||
return None, f"未知音色 ID: {voice_id}"
|
||||
|
||||
|
||||
def _load_payload_file(path: Path) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
|
||||
try:
|
||||
payload = torch.load(path, map_location="cpu", weights_only=False)
|
||||
if isinstance(payload, torch.Tensor):
|
||||
return {"spk_emb": payload, "spk_smp": None, "txt_smp": ""}, None
|
||||
if isinstance(payload, dict):
|
||||
return payload, None
|
||||
return None, f"音色文件格式无效: {path.name}"
|
||||
except Exception as exc:
|
||||
return None, f"读取音色文件失败 ({path.name}): {exc}"
|
||||
Reference in New Issue
Block a user