Fix TTS synthesis UI stuck on loading state

Enable Gradio queue, immediate pending feedback, segment progress, and gr.update for Audio so long syntheses show logs and playback correctly.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dekun
2026-06-12 18:02:34 +08:00
parent 97c11e08e0
commit 7c50b13c57
2 changed files with 62 additions and 11 deletions
+56 -11
View File
@@ -141,14 +141,40 @@ def _short_synth_log(msg: str, ok: bool) -> str:
return "✅ 配音完成。请用下方播放器试听、下载。"
def ui_synthesize(polished_text: str, voice_label: str) -> tuple[str, str | None]:
"""【TTS 合成】生成最终 wav 配音文件。先更新日志更新播放器,减轻闪屏"""
def ui_synth_pending(polished_text: str) -> tuple[str, dict]:
"""点击合成后立即反馈,避免长时间无日志更新被误认为卡死"""
text = (polished_text or "").strip()
if not text:
return "请先完成 Gemma4 润色。", gr.update(value=None)
est_sec = max(20, len(text) // 10)
return (
f"⏳ 配音合成中(约 {len(text)} 字,预计 {est_sec}{est_sec + 45} 秒),请勿重复点击…",
gr.update(value=None),
)
def ui_synthesize(
polished_text: str,
voice_label: str,
progress: gr.Progress = gr.Progress(),
) -> tuple[str, dict]:
"""【TTS 合成】生成最终 wav 配音文件。"""
if not polished_text or not polished_text.strip():
return "请先完成 Gemma4 润色。", None
return "请先完成 Gemma4 润色。", gr.update(value=None)
voice_id = label_to_voice_id(voice_label)
ok, msg, wav_path = generate_voice(polished_text, voice_id=voice_id)
return _short_synth_log(msg, ok), wav_path if ok else None
def _report_segment(seg: int, total: int) -> None:
progress((seg - 1) / max(total, 1), desc=f"ChatTTS 第 {seg}/{total} 段…")
ok, msg, wav_path = generate_voice(
polished_text,
voice_id=voice_id,
progress_callback=_report_segment,
)
if ok:
return _short_synth_log(msg, ok), gr.update(value=wav_path)
return _short_synth_log(msg, ok), gr.update(value=None)
# ---------------------------------------------------------------------------
@@ -173,10 +199,10 @@ def ui_full_pipeline(
else:
path = _save_upload(audio_file)
if not path:
return "", "", None, "❌ 请上传录音或手动填写转写文本。"
return "", "", gr.update(value=None), "❌ 请上传录音或手动填写转写文本。"
ok, result = transcribe_audio(path)
if not ok:
return "", "", None, f"❌ 识别失败: {result}"
return "", "", gr.update(value=None), f"❌ 识别失败: {result}"
raw = result
logs.append(f"✅ Whisper 识别完成({len(raw)} 字)。")
@@ -187,7 +213,7 @@ def ui_full_pipeline(
else:
ok, result = polish_text(raw)
if not ok:
return raw, "", None, f"❌ 润色失败: {result}\n" + "\n".join(logs)
return raw, "", gr.update(value=None), f"❌ 润色失败: {result}\n" + "\n".join(logs)
polished = result
logs.append(f"✅ Gemma4 润色完成({len(polished)} 字)。")
@@ -195,10 +221,10 @@ def ui_full_pipeline(
voice_id = label_to_voice_id(voice_label)
ok, msg, wav_path = generate_voice(polished, voice_id=voice_id)
if not ok:
return raw, polished, None, f"❌ 合成失败: {msg}\n" + "\n".join(logs)
return raw, polished, gr.update(value=None), f"❌ 合成失败: {msg}\n" + "\n".join(logs)
logs.append(f"{msg}")
return raw, polished, wav_path, "\n".join(logs)
return raw, polished, gr.update(value=wav_path), "\n".join(logs)
# ---------------------------------------------------------------------------
@@ -1116,10 +1142,15 @@ def build_app() -> gr.Blocks:
transcribe_btn.click(ui_transcribe, rec_audio, [raw_text, transcribe_log])
polish_btn.click(ui_polish, raw_text, [polished_text, polish_log])
synth_btn.click(
ui_synth_pending,
[polished_text],
[synth_log, output_audio],
queue=True,
).then(
ui_synthesize,
[polished_text, tts_voice],
[synth_log, output_audio],
show_progress="hidden",
queue=True,
)
# ---- Tab 3: 一键生产 ----
@@ -1156,12 +1187,26 @@ def build_app() -> gr.Blocks:
pipe_polished = gr.Textbox(label="润色稿", lines=6)
pipe_output = gr.Audio(label="成品配音", type="filepath")
def ui_pipeline_pending(skip_polish: bool, manual_raw: str) -> tuple[str, dict]:
if manual_raw and manual_raw.strip():
return "⏳ 全流程运行中(识别/润色/合成),请稍候…", gr.update(value=None)
if skip_polish:
return "⏳ 全流程运行中(识别→合成),请稍候…", gr.update(value=None)
return "⏳ 全流程运行中(识别→润色→合成),请稍候…", gr.update(value=None)
pipeline_btn.click(
ui_pipeline_pending,
[skip_polish_cb, pipe_manual],
[pipeline_log, pipe_output],
queue=True,
).then(
ui_full_pipeline,
[pipe_audio, skip_polish_cb, pipe_manual, pipe_voice],
[pipe_raw, pipe_polished, pipe_output, pipeline_log],
queue=True,
)
demo.queue(default_concurrency_limit=1)
return demo