Fix TTS synthesis UI stuck on loading state
Enable Gradio queue, immediate pending feedback, segment progress, and gr.update for Audio so long syntheses show logs and playback correctly. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -141,14 +141,40 @@ def _short_synth_log(msg: str, ok: bool) -> str:
|
||||
return "✅ 配音完成。请用下方播放器试听、下载。"
|
||||
|
||||
|
||||
def ui_synthesize(polished_text: str, voice_label: str) -> tuple[str, str | None]:
|
||||
"""【TTS 合成】生成最终 wav 配音文件。先更新日志再更新播放器,减轻闪屏。"""
|
||||
def ui_synth_pending(polished_text: str) -> tuple[str, dict]:
|
||||
"""点击合成后立即反馈,避免长时间无日志更新被误认为卡死。"""
|
||||
text = (polished_text or "").strip()
|
||||
if not text:
|
||||
return "请先完成 Gemma4 润色。", gr.update(value=None)
|
||||
est_sec = max(20, len(text) // 10)
|
||||
return (
|
||||
f"⏳ 配音合成中(约 {len(text)} 字,预计 {est_sec}–{est_sec + 45} 秒),请勿重复点击…",
|
||||
gr.update(value=None),
|
||||
)
|
||||
|
||||
|
||||
def ui_synthesize(
|
||||
polished_text: str,
|
||||
voice_label: str,
|
||||
progress: gr.Progress = gr.Progress(),
|
||||
) -> tuple[str, dict]:
|
||||
"""【TTS 合成】生成最终 wav 配音文件。"""
|
||||
if not polished_text or not polished_text.strip():
|
||||
return "请先完成 Gemma4 润色。", None
|
||||
return "请先完成 Gemma4 润色。", gr.update(value=None)
|
||||
|
||||
voice_id = label_to_voice_id(voice_label)
|
||||
ok, msg, wav_path = generate_voice(polished_text, voice_id=voice_id)
|
||||
return _short_synth_log(msg, ok), wav_path if ok else None
|
||||
|
||||
def _report_segment(seg: int, total: int) -> None:
|
||||
progress((seg - 1) / max(total, 1), desc=f"ChatTTS 第 {seg}/{total} 段…")
|
||||
|
||||
ok, msg, wav_path = generate_voice(
|
||||
polished_text,
|
||||
voice_id=voice_id,
|
||||
progress_callback=_report_segment,
|
||||
)
|
||||
if ok:
|
||||
return _short_synth_log(msg, ok), gr.update(value=wav_path)
|
||||
return _short_synth_log(msg, ok), gr.update(value=None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -173,10 +199,10 @@ def ui_full_pipeline(
|
||||
else:
|
||||
path = _save_upload(audio_file)
|
||||
if not path:
|
||||
return "", "", None, "❌ 请上传录音或手动填写转写文本。"
|
||||
return "", "", gr.update(value=None), "❌ 请上传录音或手动填写转写文本。"
|
||||
ok, result = transcribe_audio(path)
|
||||
if not ok:
|
||||
return "", "", None, f"❌ 识别失败: {result}"
|
||||
return "", "", gr.update(value=None), f"❌ 识别失败: {result}"
|
||||
raw = result
|
||||
logs.append(f"✅ Whisper 识别完成({len(raw)} 字)。")
|
||||
|
||||
@@ -187,7 +213,7 @@ def ui_full_pipeline(
|
||||
else:
|
||||
ok, result = polish_text(raw)
|
||||
if not ok:
|
||||
return raw, "", None, f"❌ 润色失败: {result}\n" + "\n".join(logs)
|
||||
return raw, "", gr.update(value=None), f"❌ 润色失败: {result}\n" + "\n".join(logs)
|
||||
polished = result
|
||||
logs.append(f"✅ Gemma4 润色完成({len(polished)} 字)。")
|
||||
|
||||
@@ -195,10 +221,10 @@ def ui_full_pipeline(
|
||||
voice_id = label_to_voice_id(voice_label)
|
||||
ok, msg, wav_path = generate_voice(polished, voice_id=voice_id)
|
||||
if not ok:
|
||||
return raw, polished, None, f"❌ 合成失败: {msg}\n" + "\n".join(logs)
|
||||
return raw, polished, gr.update(value=None), f"❌ 合成失败: {msg}\n" + "\n".join(logs)
|
||||
|
||||
logs.append(f"✅ {msg}")
|
||||
return raw, polished, wav_path, "\n".join(logs)
|
||||
return raw, polished, gr.update(value=wav_path), "\n".join(logs)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1116,10 +1142,15 @@ def build_app() -> gr.Blocks:
|
||||
transcribe_btn.click(ui_transcribe, rec_audio, [raw_text, transcribe_log])
|
||||
polish_btn.click(ui_polish, raw_text, [polished_text, polish_log])
|
||||
synth_btn.click(
|
||||
ui_synth_pending,
|
||||
[polished_text],
|
||||
[synth_log, output_audio],
|
||||
queue=True,
|
||||
).then(
|
||||
ui_synthesize,
|
||||
[polished_text, tts_voice],
|
||||
[synth_log, output_audio],
|
||||
show_progress="hidden",
|
||||
queue=True,
|
||||
)
|
||||
|
||||
# ---- Tab 3: 一键生产 ----
|
||||
@@ -1156,12 +1187,26 @@ def build_app() -> gr.Blocks:
|
||||
pipe_polished = gr.Textbox(label="润色稿", lines=6)
|
||||
pipe_output = gr.Audio(label="成品配音", type="filepath")
|
||||
|
||||
def ui_pipeline_pending(skip_polish: bool, manual_raw: str) -> tuple[str, dict]:
|
||||
if manual_raw and manual_raw.strip():
|
||||
return "⏳ 全流程运行中(识别/润色/合成),请稍候…", gr.update(value=None)
|
||||
if skip_polish:
|
||||
return "⏳ 全流程运行中(识别→合成),请稍候…", gr.update(value=None)
|
||||
return "⏳ 全流程运行中(识别→润色→合成),请稍候…", gr.update(value=None)
|
||||
|
||||
pipeline_btn.click(
|
||||
ui_pipeline_pending,
|
||||
[skip_polish_cb, pipe_manual],
|
||||
[pipeline_log, pipe_output],
|
||||
queue=True,
|
||||
).then(
|
||||
ui_full_pipeline,
|
||||
[pipe_audio, skip_polish_cb, pipe_manual, pipe_voice],
|
||||
[pipe_raw, pipe_polished, pipe_output, pipeline_log],
|
||||
queue=True,
|
||||
)
|
||||
|
||||
demo.queue(default_concurrency_limit=1)
|
||||
return demo
|
||||
|
||||
|
||||
|
||||
@@ -635,6 +635,7 @@ def _concat_wavs(
|
||||
def generate_voice(
|
||||
refined_text: str,
|
||||
voice_id: str = "custom",
|
||||
progress_callback=None,
|
||||
) -> Tuple[bool, str, Optional[str]]:
|
||||
"""
|
||||
使用 ChatTTS(本地 GPU)将润色稿合成为 wav。
|
||||
@@ -724,6 +725,11 @@ def generate_voice(
|
||||
for idx, chunk in enumerate(chunks, start=1):
|
||||
if not chunk or len(chunk) < 2:
|
||||
continue
|
||||
if progress_callback is not None:
|
||||
try:
|
||||
progress_callback(idx, len(chunks))
|
||||
except Exception:
|
||||
logger.debug("TTS 进度回调失败", exc_info=True)
|
||||
release_cuda_cache()
|
||||
chunk_infer = replace(params_infer_code, manual_seed=42 + idx)
|
||||
wavs = None
|
||||
|
||||
Reference in New Issue
Block a user