Files
Trading_Studio/voice_presets.py
T
dekun eb71e28427 Add local GPU preset voices with dropdown selection.
Generate ChatTTS sample_random_speaker presets without cloud APIs; choose clone or preset in synthesize UI.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-12 17:28:17 +08:00

140 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
本地 GPU 音色库(ChatTTS,无需云端 API)
- custom:用户在「音色锁定」克隆的 speaker_emb.pt
- preset_*ChatTTS sample_random_speaker 生成的内置说话人(scripts/generate_voice_presets.sh
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import torch
from config import BASE_DIR, SPEAKER_EMB_PATH
logger = logging.getLogger(__name__)
VOICES_DIR = Path(BASE_DIR) / "voices"
PRESETS_DIR = VOICES_DIR / "presets"
MANIFEST_PATH = VOICES_DIR / "manifest.json"
CUSTOM_VOICE_ID = "custom"
# 生成脚本写入的预设元数据(.pt 文件不入 Git)
DEFAULT_MANIFEST = {
"presets": [
{"id": "preset_01", "label": "预设·沉稳男声", "file": "presets/preset_01.pt"},
{"id": "preset_02", "label": "预设·青年男声", "file": "presets/preset_02.pt"},
{"id": "preset_03", "label": "预设·温柔女声", "file": "presets/preset_03.pt"},
{"id": "preset_04", "label": "预设·活泼女声", "file": "presets/preset_04.pt"},
{"id": "preset_05", "label": "预设·中性旁白", "file": "presets/preset_05.pt"},
{"id": "preset_06", "label": "预设·纪录片风", "file": "presets/preset_06.pt"},
]
}
def ensure_manifest() -> None:
VOICES_DIR.mkdir(parents=True, exist_ok=True)
PRESETS_DIR.mkdir(parents=True, exist_ok=True)
if not MANIFEST_PATH.is_file():
MANIFEST_PATH.write_text(
json.dumps(DEFAULT_MANIFEST, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def _read_manifest() -> Dict[str, Any]:
ensure_manifest()
try:
return json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
except Exception as exc:
logger.warning("读取 manifest 失败: %s", exc)
return DEFAULT_MANIFEST
def list_voice_choices() -> List[Tuple[str, str]]:
"""
返回 Gradio Dropdown 选项:(显示名, voice_id)。
仅列出磁盘上已存在的音色。
"""
choices: List[Tuple[str, str]] = []
if SPEAKER_EMB_PATH.is_file():
choices.append(("我的锁定音色(声音克隆)", CUSTOM_VOICE_ID))
for preset in _read_manifest().get("presets", []):
pid = preset.get("id", "")
label = preset.get("label", pid)
rel = preset.get("file", "")
if pid and rel and (VOICES_DIR / rel).is_file():
choices.append((label, pid))
if not choices:
choices.append(
(
"(请先在「音色锁定」上传人声,或运行 generate_voice_presets.sh",
CUSTOM_VOICE_ID,
)
)
return choices
def default_voice_id() -> str:
choices = list_voice_choices()
if not choices:
return CUSTOM_VOICE_ID
for _label, vid in choices:
if vid == CUSTOM_VOICE_ID:
return CUSTOM_VOICE_ID
return choices[0][1]
def voice_choice_labels() -> List[str]:
return [c[0] for c in list_voice_choices()]
def label_to_voice_id(label: str) -> str:
for lbl, vid in list_voice_choices():
if lbl == label:
return vid
return default_voice_id()
def load_voice_payload(voice_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
"""按 voice_id 加载 ChatTTS 说话人数据。"""
if voice_id == CUSTOM_VOICE_ID or not voice_id:
if not SPEAKER_EMB_PATH.is_file():
return None, (
"未找到锁定音色。请在「音色锁定」上传参考人声,"
"或选择下方「预设」音色(需先运行 scripts/generate_voice_presets.sh)。"
)
return _load_payload_file(SPEAKER_EMB_PATH)
for preset in _read_manifest().get("presets", []):
if preset.get("id") != voice_id:
continue
path = VOICES_DIR / preset.get("file", "")
if not path.is_file():
return None, (
f"预设音色「{preset.get('label', voice_id)}」尚未生成。\n"
f"请在服务器执行: bash scripts/generate_voice_presets.sh"
)
return _load_payload_file(path)
return None, f"未知音色 ID: {voice_id}"
def _load_payload_file(path: Path) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
try:
payload = torch.load(path, map_location="cpu", weights_only=False)
if isinstance(payload, torch.Tensor):
return {"spk_emb": payload, "spk_smp": None, "txt_smp": ""}, None
if isinstance(payload, dict):
return payload, None
return None, f"音色文件格式无效: {path.name}"
except Exception as exc:
return None, f"读取音色文件失败 ({path.name}): {exc}"