Add local GPU preset voices with dropdown selection.
Generate ChatTTS sample_random_speaker presets without cloud APIs; choose clone or preset in synthesize UI. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -24,6 +24,7 @@ from config import (
|
||||
)
|
||||
from llm_service import check_ollama_health, polish_text
|
||||
from tts_service import generate_voice, save_fixed_speaker, speaker_is_ready
|
||||
from voice_presets import label_to_voice_id, voice_choice_labels
|
||||
from whisper_service import transcribe_audio
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -39,6 +40,12 @@ logging.basicConfig(
|
||||
)
|
||||
logger = logging.getLogger("trading_studio")
|
||||
|
||||
|
||||
def _default_voice_label() -> str:
|
||||
labels = voice_choice_labels()
|
||||
return labels[0] if labels else "我的锁定音色(声音克隆)"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 全局 UI 状态(Gradio State)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -117,12 +124,13 @@ def ui_check_ollama() -> str:
|
||||
# ---------------------------------------------------------------------------
|
||||
# 模块 4:ChatTTS 音频合成
|
||||
# ---------------------------------------------------------------------------
|
||||
def ui_synthesize(polished_text: str) -> tuple[str | None, str]:
|
||||
def ui_synthesize(polished_text: str, voice_label: str) -> tuple[str | None, str]:
|
||||
"""【TTS 合成】生成最终 wav 配音文件。"""
|
||||
if not polished_text or not polished_text.strip():
|
||||
return None, "请先完成 Gemma4 润色。"
|
||||
|
||||
ok, msg, wav_path = generate_voice(polished_text)
|
||||
voice_id = label_to_voice_id(voice_label)
|
||||
ok, msg, wav_path = generate_voice(polished_text, voice_id=voice_id)
|
||||
if ok and wav_path:
|
||||
return wav_path, f"✅ {msg}"
|
||||
return None, f"❌ {msg}"
|
||||
@@ -135,6 +143,7 @@ def ui_full_pipeline(
|
||||
audio_file,
|
||||
skip_polish: bool,
|
||||
manual_raw: str,
|
||||
voice_label: str,
|
||||
) -> tuple[str, str, str | None, str]:
|
||||
"""
|
||||
串联执行:识别 → 润色(可跳过)→ 合成。
|
||||
@@ -168,7 +177,8 @@ def ui_full_pipeline(
|
||||
logs.append(f"✅ Gemma4 润色完成({len(polished)} 字)。")
|
||||
|
||||
# Step 3: 合成
|
||||
ok, msg, wav_path = generate_voice(polished)
|
||||
voice_id = label_to_voice_id(voice_label)
|
||||
ok, msg, wav_path = generate_voice(polished, voice_id=voice_id)
|
||||
if not ok:
|
||||
return raw, polished, None, f"❌ 合成失败: {msg}\n" + "\n".join(logs)
|
||||
|
||||
@@ -939,11 +949,16 @@ def build_app() -> gr.Blocks:
|
||||
polish_log = gr.Textbox(label="润色日志", lines=2, interactive=False)
|
||||
|
||||
with gr.Column(scale=1):
|
||||
gr.Markdown("### Step 3 · ChatTTS 配音合成")
|
||||
gr.Markdown("### Step 3 · 本地 GPU 配音合成")
|
||||
gr.Markdown(
|
||||
"> 合成前会自动去掉 **Markdown**(`#`、`**`)、emoji、"
|
||||
"舞台提示(如前奏/转场)和文末「修改笔记」。"
|
||||
"也可手动删成纯口语文本再点合成。"
|
||||
"> 全部在 **本机显卡** 运行,无需微软/讯飞 API。"
|
||||
"可选「我的锁定音色」或预设男/女声;合成前会自动清洗 Markdown。"
|
||||
)
|
||||
tts_voice = gr.Dropdown(
|
||||
label="配音音色(本地 ChatTTS)",
|
||||
choices=voice_choice_labels(),
|
||||
value=_default_voice_label(),
|
||||
info="预设音色需先在服务器执行 bash scripts/generate_voice_presets.sh",
|
||||
)
|
||||
polished_text = gr.Textbox(
|
||||
label="润色配音稿(可编辑,支持含 Markdown,合成时自动清洗)",
|
||||
@@ -956,7 +971,11 @@ def build_app() -> gr.Blocks:
|
||||
|
||||
transcribe_btn.click(ui_transcribe, rec_audio, [raw_text, transcribe_log])
|
||||
polish_btn.click(ui_polish, raw_text, [polished_text, polish_log])
|
||||
synth_btn.click(ui_synthesize, polished_text, [output_audio, synth_log])
|
||||
synth_btn.click(
|
||||
ui_synthesize,
|
||||
[polished_text, tts_voice],
|
||||
[output_audio, synth_log],
|
||||
)
|
||||
|
||||
# ---- Tab 3: 一键生产 ----
|
||||
with gr.Tab("🚀 一键生产"):
|
||||
@@ -979,6 +998,11 @@ def build_app() -> gr.Blocks:
|
||||
label="跳过 Gemma4 润色(仅测试 TTS)",
|
||||
value=False,
|
||||
)
|
||||
pipe_voice = gr.Dropdown(
|
||||
label="配音音色(本地 ChatTTS)",
|
||||
choices=voice_choice_labels(),
|
||||
value=_default_voice_label(),
|
||||
)
|
||||
pipeline_btn = gr.Button("▶ 启动全流程", variant="primary", size="lg")
|
||||
pipeline_log = gr.Textbox(label="流水线日志", lines=6, interactive=False)
|
||||
with gr.Row(elem_classes=["pipeline-output-row"]):
|
||||
@@ -988,7 +1012,7 @@ def build_app() -> gr.Blocks:
|
||||
|
||||
pipeline_btn.click(
|
||||
ui_full_pipeline,
|
||||
[pipe_audio, skip_polish_cb, pipe_manual],
|
||||
[pipe_audio, skip_polish_cb, pipe_manual, pipe_voice],
|
||||
[pipe_raw, pipe_polished, pipe_output, pipeline_log],
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user