""" 本地 GPU 音色库(ChatTTS,无需云端 API) - custom:用户在「音色锁定」克隆的 speaker_emb.pt - preset_*:ChatTTS sample_random_speaker 生成的内置说话人(scripts/generate_voice_presets.sh) """ from __future__ import annotations import json import logging from pathlib import Path from typing import Any, Dict, List, Optional, Tuple import torch from config import BASE_DIR, SPEAKER_EMB_PATH logger = logging.getLogger(__name__) VOICES_DIR = Path(BASE_DIR) / "voices" PRESETS_DIR = VOICES_DIR / "presets" MANIFEST_PATH = VOICES_DIR / "manifest.json" CUSTOM_VOICE_ID = "custom" # 生成脚本写入的预设元数据(.pt 文件不入 Git) DEFAULT_MANIFEST = { "presets": [ {"id": "preset_01", "label": "预设·沉稳男声", "file": "presets/preset_01.pt"}, {"id": "preset_02", "label": "预设·青年男声", "file": "presets/preset_02.pt"}, {"id": "preset_03", "label": "预设·温柔女声", "file": "presets/preset_03.pt"}, {"id": "preset_04", "label": "预设·活泼女声", "file": "presets/preset_04.pt"}, {"id": "preset_05", "label": "预设·中性旁白", "file": "presets/preset_05.pt"}, {"id": "preset_06", "label": "预设·纪录片风", "file": "presets/preset_06.pt"}, ] } def ensure_manifest() -> None: VOICES_DIR.mkdir(parents=True, exist_ok=True) PRESETS_DIR.mkdir(parents=True, exist_ok=True) if not MANIFEST_PATH.is_file(): MANIFEST_PATH.write_text( json.dumps(DEFAULT_MANIFEST, ensure_ascii=False, indent=2), encoding="utf-8", ) def _read_manifest() -> Dict[str, Any]: ensure_manifest() try: return json.loads(MANIFEST_PATH.read_text(encoding="utf-8")) except Exception as exc: logger.warning("读取 manifest 失败: %s", exc) return DEFAULT_MANIFEST def list_voice_choices() -> List[Tuple[str, str]]: """ 返回 Gradio Dropdown 选项:(显示名, voice_id)。 仅列出磁盘上已存在的音色。 """ choices: List[Tuple[str, str]] = [] if SPEAKER_EMB_PATH.is_file(): choices.append(("我的锁定音色(声音克隆)", CUSTOM_VOICE_ID)) for preset in _read_manifest().get("presets", []): pid = preset.get("id", "") label = preset.get("label", pid) rel = preset.get("file", "") if pid and rel and (VOICES_DIR / rel).is_file(): choices.append((label, pid)) if not choices: choices.append( ( "(请先在「音色锁定」上传人声,或运行 generate_voice_presets.sh)", CUSTOM_VOICE_ID, ) ) return choices def default_voice_id() -> str: choices = list_voice_choices() if not choices: return CUSTOM_VOICE_ID for _label, vid in choices: if vid == CUSTOM_VOICE_ID: return CUSTOM_VOICE_ID return choices[0][1] def voice_choice_labels() -> List[str]: return [c[0] for c in list_voice_choices()] def label_to_voice_id(label: str) -> str: for lbl, vid in list_voice_choices(): if lbl == label: return vid return default_voice_id() def load_voice_payload(voice_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]: """按 voice_id 加载 ChatTTS 说话人数据。""" if voice_id == CUSTOM_VOICE_ID or not voice_id: if not SPEAKER_EMB_PATH.is_file(): return None, ( "未找到锁定音色。请在「音色锁定」上传参考人声," "或选择下方「预设」音色(需先运行 scripts/generate_voice_presets.sh)。" ) return _load_payload_file(SPEAKER_EMB_PATH) for preset in _read_manifest().get("presets", []): if preset.get("id") != voice_id: continue path = VOICES_DIR / preset.get("file", "") if not path.is_file(): return None, ( f"预设音色「{preset.get('label', voice_id)}」尚未生成。\n" f"请在服务器执行: bash scripts/generate_voice_presets.sh" ) return _load_payload_file(path) return None, f"未知音色 ID: {voice_id}" def _load_payload_file(path: Path) -> Tuple[Optional[Dict[str, Any]], Optional[str]]: try: payload = torch.load(path, map_location="cpu", weights_only=False) if isinstance(payload, torch.Tensor): return {"spk_emb": payload, "spk_smp": None, "txt_smp": ""}, None if isinstance(payload, dict): return payload, None return None, f"音色文件格式无效: {path.name}" except Exception as exc: return None, f"读取音色文件失败 ({path.name}): {exc}"