diff --git a/app.py b/app.py
index 68cbb07..0cf1002 100644
--- a/app.py
+++ b/app.py
@@ -25,7 +25,8 @@ from config import (
)
from llm_service import check_ollama_health, polish_text
from tts_service import generate_voice, save_fixed_speaker, speaker_is_ready
-from voice_presets import label_to_voice_id, voice_choice_labels
+from voice_history import list_voice_history
+from voice_presets import default_voice_label, label_to_voice_id, voice_choice_labels
from whisper_service import transcribe_audio
# ---------------------------------------------------------------------------
@@ -42,9 +43,35 @@ logging.basicConfig(
logger = logging.getLogger("trading_studio")
-def _default_voice_label() -> str:
- labels = voice_choice_labels()
- return labels[0] if labels else "我的锁定音色(声音克隆)"
+# ---------------------------------------------------------------------------
+# 配音历史
+# ---------------------------------------------------------------------------
+def ui_history_dropdown(select_path: str | None = None) -> dict:
+ """刷新历史下拉列表;可选选中指定路径(合成完成后传入新文件)。"""
+ choices = list_voice_history()
+ paths = [p for _, p in choices]
+ if select_path and select_path in paths:
+ value = select_path
+ elif paths:
+ value = paths[0]
+ else:
+ value = None
+ return gr.update(choices=choices, value=value)
+
+
+def ui_history_play(filepath: str | None) -> dict:
+ """选中历史条目后加载播放器。"""
+ if filepath and Path(filepath).is_file():
+ return gr.update(value=filepath)
+ return gr.update(value=None)
+
+
+def ui_initial_history() -> tuple[dict, dict]:
+ """首屏加载历史列表并自动选中最新一条。"""
+ choices = list_voice_history()
+ paths = [p for _, p in choices]
+ latest = paths[0] if paths else None
+ return gr.update(choices=choices, value=latest), ui_history_play(latest)
# ---------------------------------------------------------------------------
@@ -141,40 +168,42 @@ def _short_synth_log(msg: str, ok: bool) -> str:
return "✅ 配音完成。请用下方播放器试听、下载。"
-def ui_synth_pending(polished_text: str) -> tuple[str, dict]:
- """点击合成后立即反馈,避免长时间无日志更新被误认为卡死。"""
+def ui_synth_pending(polished_text: str) -> str:
+ """点击合成后立即更新日志;不触碰播放器,避免波形组件销毁重建导致闪屏。"""
text = (polished_text or "").strip()
if not text:
- return "请先完成 Gemma4 润色。", gr.update(value=None)
+ return "请先完成 Gemma4 润色。"
est_sec = max(20, len(text) // 10)
return (
- f"⏳ 配音合成中(约 {len(text)} 字,预计 {est_sec}–{est_sec + 45} 秒),请勿重复点击…",
- gr.update(value=None),
+ f"⏳ 配音合成中(约 {len(text)} 字,预计 {est_sec}–{est_sec + 45} 秒),请勿重复点击或刷新页面…"
)
-def ui_synthesize(
- polished_text: str,
- voice_label: str,
- progress: gr.Progress = gr.Progress(),
-) -> tuple[str, dict]:
+def ui_synthesize(polished_text: str, voice_label: str) -> tuple[str, dict, dict, dict]:
"""【TTS 合成】生成最终 wav 配音文件。"""
if not polished_text or not polished_text.strip():
- return "请先完成 Gemma4 润色。", gr.update(value=None)
+ return (
+ "请先完成 Gemma4 润色。",
+ gr.update(value=None),
+ ui_history_dropdown(),
+ gr.update(value=None),
+ )
voice_id = label_to_voice_id(voice_label)
-
- def _report_segment(seg: int, total: int) -> None:
- progress((seg - 1) / max(total, 1), desc=f"ChatTTS 第 {seg}/{total} 段…")
-
- ok, msg, wav_path = generate_voice(
- polished_text,
- voice_id=voice_id,
- progress_callback=_report_segment,
- )
+ ok, msg, wav_path = generate_voice(polished_text, voice_id=voice_id)
if ok:
- return _short_synth_log(msg, ok), gr.update(value=wav_path)
- return _short_synth_log(msg, ok), gr.update(value=None)
+ return (
+ _short_synth_log(msg, ok),
+ gr.update(value=wav_path),
+ ui_history_dropdown(wav_path),
+ gr.update(value=wav_path),
+ )
+ return (
+ _short_synth_log(msg, ok),
+ gr.update(value=None),
+ ui_history_dropdown(),
+ gr.update(value=None),
+ )
# ---------------------------------------------------------------------------
@@ -185,7 +214,7 @@ def ui_full_pipeline(
skip_polish: bool,
manual_raw: str,
voice_label: str,
-) -> tuple[str, str, str | None, str]:
+) -> tuple[str, str, dict, str, dict, dict]:
"""
串联执行:识别 → 润色(可跳过)→ 合成。
返回 (raw, polished, wav_path, log)
@@ -199,10 +228,10 @@ def ui_full_pipeline(
else:
path = _save_upload(audio_file)
if not path:
- return "", "", gr.update(value=None), "❌ 请上传录音或手动填写转写文本。"
+ return "", "", gr.update(value=None), "❌ 请上传录音或手动填写转写文本。", ui_history_dropdown(), gr.update(value=None)
ok, result = transcribe_audio(path)
if not ok:
- return "", "", gr.update(value=None), f"❌ 识别失败: {result}"
+ return "", "", gr.update(value=None), f"❌ 识别失败: {result}", ui_history_dropdown(), gr.update(value=None)
raw = result
logs.append(f"✅ Whisper 识别完成({len(raw)} 字)。")
@@ -213,7 +242,7 @@ def ui_full_pipeline(
else:
ok, result = polish_text(raw)
if not ok:
- return raw, "", gr.update(value=None), f"❌ 润色失败: {result}\n" + "\n".join(logs)
+ return raw, "", gr.update(value=None), f"❌ 润色失败: {result}\n" + "\n".join(logs), ui_history_dropdown(), gr.update(value=None)
polished = result
logs.append(f"✅ Gemma4 润色完成({len(polished)} 字)。")
@@ -221,10 +250,17 @@ def ui_full_pipeline(
voice_id = label_to_voice_id(voice_label)
ok, msg, wav_path = generate_voice(polished, voice_id=voice_id)
if not ok:
- return raw, polished, gr.update(value=None), f"❌ 合成失败: {msg}\n" + "\n".join(logs)
+ return raw, polished, gr.update(value=None), f"❌ 合成失败: {msg}\n" + "\n".join(logs), ui_history_dropdown(), gr.update(value=None)
logs.append(f"✅ {msg}")
- return raw, polished, gr.update(value=wav_path), "\n".join(logs)
+ return (
+ raw,
+ polished,
+ gr.update(value=wav_path),
+ "\n".join(logs),
+ ui_history_dropdown(wav_path),
+ gr.update(value=wav_path),
+ )
# ---------------------------------------------------------------------------
@@ -918,6 +954,25 @@ gradio-app,
.gradio-container .waveform-container {
background: #1a2332 !important;
}
+/* 成品播放器:去掉 Gradio 默认 focus 白框,减轻合成完成时闪一下 */
+.gradio-container .tts-output-audio,
+.gradio-container .tts-output-audio .audio-container {
+ border: 1px solid #374151 !important;
+ background: #1a2332 !important;
+ contain: strict;
+ min-height: 120px;
+}
+.gradio-container .tts-output-audio button,
+.gradio-container .tts-output-audio button:focus,
+.gradio-container .tts-output-audio button:focus-visible {
+ outline: none !important;
+ box-shadow: none !important;
+ border-color: #4b5563 !important;
+}
+.gradio-container .tts-output-audio .wrap,
+.gradio-container .tts-output-audio .controls {
+ background: #1a2332 !important;
+}
.gradio-container .pipeline-step-card textarea {
contain: layout style;
}
@@ -1061,38 +1116,48 @@ def build_app() -> gr.Blocks:
)
with gr.Tabs():
- # ---- Tab 1: 音色锁定 ----
- with gr.Tab("🎙️ 音色锁定"):
+ # ---- Tab 1: 一键生产(默认首页)----
+ with gr.Tab("🚀 一键生产"):
gr.HTML(MIC_HINT_HTML)
- gr.HTML(
- f'
'
- f'上传 10-30 秒 干净人声样本,系统将提取 Speaker Embedding '
- f'并保存至 {SPEAKER_EMB_PATH.name},'
- f'后续合成 100% 还原音色。'
- f"
"
+ gr.Markdown(
+ "上传碎碎念录音,系统自动完成 **识别 → 润色 → 合成** 全流程。"
)
with gr.Row():
- spk_audio = gr.Audio(
- label="参考人声(碎碎念盲录样本)",
+ pipe_audio = gr.Audio(
+ label="复盘录音",
type="filepath",
sources=["upload", "microphone"],
)
- spk_transcript = gr.Textbox(
- label="参考音频精确转写(强烈建议填写,与录音一致,避免合成报错)",
- placeholder="示例:今天开了三单,第一单手贱提前平了,第二单…",
- info="请尽量与参考音频内容完全一致,可提升音色还原度",
- lines=6,
- elem_classes=["bright-input"],
+ pipe_manual = gr.Textbox(
+ label="或手动输入转写(跳过识别)",
+ lines=4,
+ placeholder="若已有转写文本,可直接粘贴,留空则走 Whisper 识别",
)
- lock_btn = gr.Button("🔒 锁定音色", variant="primary")
- lock_log = gr.Textbox(label="锁定结果", lines=4, interactive=False)
- lock_btn.click(
- ui_lock_speaker,
- [spk_audio, spk_transcript],
- [lock_log, speaker_status],
+ skip_polish_cb = gr.Checkbox(
+ label="跳过 Gemma4 润色(仅测试 TTS)",
+ value=False,
+ )
+ pipe_voice = gr.Radio(
+ label="配音音色(本地 ChatTTS)",
+ choices=voice_choice_labels(),
+ value=default_voice_label(),
+ elem_classes=["voice-radio"],
+ )
+ pipeline_btn = gr.Button("▶ 启动全流程", variant="primary", size="lg")
+ pipeline_log = gr.Textbox(label="流水线日志", lines=6, interactive=False)
+ with gr.Row(elem_classes=["pipeline-output-row"]):
+ pipe_raw = gr.Textbox(label="转写原文", lines=6)
+ pipe_polished = gr.Textbox(label="润色稿", lines=6)
+ pipe_output = gr.Audio(
+ label="成品配音",
+ type="filepath",
+ interactive=False,
+ show_download_button=True,
+ show_share_button=False,
+ elem_classes=["tts-output-audio"],
)
- # ---- Tab 2: 分步操作(纵向三步,避免三栏挤在一起)----
+ # ---- Tab 2: 分步流水线 ----
with gr.Tab("🔧 分步流水线"):
gr.HTML(MIC_HINT_HTML)
with gr.Column(elem_classes=["pipeline-flow"]):
@@ -1126,7 +1191,7 @@ def build_app() -> gr.Blocks:
tts_voice = gr.Radio(
label="配音音色(本地 ChatTTS)",
choices=voice_choice_labels(),
- value=_default_voice_label(),
+ value=default_voice_label(),
info="预设音色:bash scripts/generate_voice_presets.sh",
elem_classes=["voice-radio"],
)
@@ -1137,74 +1202,101 @@ def build_app() -> gr.Blocks:
)
synth_btn = gr.Button("🔊 合成配音 WAV", variant="primary")
synth_log = gr.Textbox(label="合成日志", lines=3, interactive=False)
- output_audio = gr.Audio(label="成品配音", type="filepath")
+ output_audio = gr.Audio(
+ label="成品配音",
+ type="filepath",
+ interactive=False,
+ show_download_button=True,
+ show_share_button=False,
+ elem_classes=["tts-output-audio"],
+ )
transcribe_btn.click(ui_transcribe, rec_audio, [raw_text, transcribe_log])
polish_btn.click(ui_polish, raw_text, [polished_text, polish_log])
- synth_btn.click(
- ui_synth_pending,
- [polished_text],
- [synth_log, output_audio],
- queue=True,
- ).then(
- ui_synthesize,
- [polished_text, tts_voice],
- [synth_log, output_audio],
- queue=True,
- )
- # ---- Tab 3: 一键生产 ----
- with gr.Tab("🚀 一键生产"):
+ # ---- Tab 3: 音色锁定 ----
+ with gr.Tab("🎙️ 音色锁定"):
gr.HTML(MIC_HINT_HTML)
- gr.Markdown(
- "上传碎碎念录音,系统自动完成 **识别 → 润色 → 合成** 全流程。"
+ gr.HTML(
+ f''
+ f'上传 10-30 秒 干净人声样本,系统将提取 Speaker Embedding '
+ f'并保存至 {SPEAKER_EMB_PATH.name},'
+ f'后续合成 100% 还原音色。'
+ f"
"
)
with gr.Row():
- pipe_audio = gr.Audio(
- label="复盘录音",
+ spk_audio = gr.Audio(
+ label="参考人声(碎碎念盲录样本)",
type="filepath",
sources=["upload", "microphone"],
)
- pipe_manual = gr.Textbox(
- label="或手动输入转写(跳过识别)",
- lines=4,
- placeholder="若已有转写文本,可直接粘贴,留空则走 Whisper 识别",
+ spk_transcript = gr.Textbox(
+ label="参考音频精确转写(强烈建议填写,与录音一致,避免合成报错)",
+ placeholder="示例:今天开了三单,第一单手贱提前平了,第二单…",
+ info="请尽量与参考音频内容完全一致,可提升音色还原度",
+ lines=6,
+ elem_classes=["bright-input"],
)
- skip_polish_cb = gr.Checkbox(
- label="跳过 Gemma4 润色(仅测试 TTS)",
- value=False,
+ lock_btn = gr.Button("🔒 锁定音色", variant="primary")
+ lock_log = gr.Textbox(label="锁定结果", lines=4, interactive=False)
+ lock_btn.click(
+ ui_lock_speaker,
+ [spk_audio, spk_transcript],
+ [lock_log, speaker_status],
)
- pipe_voice = gr.Radio(
- label="配音音色(本地 ChatTTS)",
- choices=voice_choice_labels(),
- value=_default_voice_label(),
- elem_classes=["voice-radio"],
- )
- pipeline_btn = gr.Button("▶ 启动全流程", variant="primary", size="lg")
- pipeline_log = gr.Textbox(label="流水线日志", lines=6, interactive=False)
- with gr.Row(elem_classes=["pipeline-output-row"]):
- pipe_raw = gr.Textbox(label="转写原文", lines=6)
- pipe_polished = gr.Textbox(label="润色稿", lines=6)
- pipe_output = gr.Audio(label="成品配音", type="filepath")
- def ui_pipeline_pending(skip_polish: bool, manual_raw: str) -> tuple[str, dict]:
- if manual_raw and manual_raw.strip():
- return "⏳ 全流程运行中(识别/润色/合成),请稍候…", gr.update(value=None)
- if skip_polish:
- return "⏳ 全流程运行中(识别→合成),请稍候…", gr.update(value=None)
- return "⏳ 全流程运行中(识别→润色→合成),请稍候…", gr.update(value=None)
-
- pipeline_btn.click(
- ui_pipeline_pending,
- [skip_polish_cb, pipe_manual],
- [pipeline_log, pipe_output],
- queue=True,
- ).then(
- ui_full_pipeline,
- [pipe_audio, skip_polish_cb, pipe_manual, pipe_voice],
- [pipe_raw, pipe_polished, pipe_output, pipeline_log],
- queue=True,
+ with gr.Accordion("📂 配音历史(本地保留,可随时试听下载)", open=True):
+ with gr.Row():
+ history_select = gr.Dropdown(
+ label="历史配音",
+ choices=list_voice_history(),
+ value=None,
+ interactive=True,
+ scale=4,
)
+ history_refresh_btn = gr.Button("🔄 刷新", scale=0, min_width=100)
+ history_player = gr.Audio(
+ label="历史试听 / 下载",
+ type="filepath",
+ interactive=False,
+ show_download_button=True,
+ show_share_button=False,
+ elem_classes=["tts-output-audio"],
+ )
+
+ history_refresh_btn.click(ui_history_dropdown, outputs=[history_select])
+ history_select.change(ui_history_play, history_select, history_player)
+ demo.load(ui_initial_history, outputs=[history_select, history_player])
+
+ def ui_pipeline_pending(skip_polish: bool, manual_raw: str) -> str:
+ if manual_raw and manual_raw.strip():
+ return "⏳ 全流程运行中(识别/润色/合成),请稍候,勿刷新页面…"
+ if skip_polish:
+ return "⏳ 全流程运行中(识别→合成),请稍候,勿刷新页面…"
+ return "⏳ 全流程运行中(识别→润色→合成),请稍候,勿刷新页面…"
+
+ pipeline_btn.click(
+ ui_pipeline_pending,
+ [skip_polish_cb, pipe_manual],
+ [pipeline_log],
+ queue=True,
+ ).then(
+ ui_full_pipeline,
+ [pipe_audio, skip_polish_cb, pipe_manual, pipe_voice],
+ [pipe_raw, pipe_polished, pipe_output, pipeline_log, history_select, history_player],
+ queue=True,
+ )
+ synth_btn.click(
+ ui_synth_pending,
+ [polished_text],
+ [synth_log],
+ queue=True,
+ ).then(
+ ui_synthesize,
+ [polished_text, tts_voice],
+ [synth_log, output_audio, history_select, history_player],
+ queue=True,
+ )
demo.queue(default_concurrency_limit=1)
return demo
diff --git a/tts_service.py b/tts_service.py
index 586906c..8a1d81c 100644
--- a/tts_service.py
+++ b/tts_service.py
@@ -635,7 +635,6 @@ def _concat_wavs(
def generate_voice(
refined_text: str,
voice_id: str = "custom",
- progress_callback=None,
) -> Tuple[bool, str, Optional[str]]:
"""
使用 ChatTTS(本地 GPU)将润色稿合成为 wav。
@@ -725,11 +724,6 @@ def generate_voice(
for idx, chunk in enumerate(chunks, start=1):
if not chunk or len(chunk) < 2:
continue
- if progress_callback is not None:
- try:
- progress_callback(idx, len(chunks))
- except Exception:
- logger.debug("TTS 进度回调失败", exc_info=True)
release_cuda_cache()
chunk_infer = replace(params_infer_code, manual_seed=42 + idx)
wavs = None
diff --git a/voice_history.py b/voice_history.py
new file mode 100644
index 0000000..080f734
--- /dev/null
+++ b/voice_history.py
@@ -0,0 +1,51 @@
+"""
+本地配音历史:扫描 outputs/ 下已生成的 wav,供 Gradio 下拉试听与下载。
+文件不会被自动删除,重启服务后仍可访问。
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import datetime
+from pathlib import Path
+from typing import List, Tuple
+
+from config import OUTPUT_DIR
+
+logger = logging.getLogger(__name__)
+
+HISTORY_MAX_ITEMS = 50
+VOICEOVER_GLOB = "voiceover_*.wav"
+
+
+def list_voice_history(limit: int = HISTORY_MAX_ITEMS) -> List[Tuple[str, str]]:
+ """
+ 返回 Gradio Dropdown 选项:(显示名, 文件绝对路径),按时间倒序。
+ """
+ if not OUTPUT_DIR.is_dir():
+ return []
+
+ files = sorted(
+ OUTPUT_DIR.glob(VOICEOVER_GLOB),
+ key=lambda p: p.stat().st_mtime,
+ reverse=True,
+ )[:limit]
+
+ choices: List[Tuple[str, str]] = []
+ for path in files:
+ try:
+ st = path.stat()
+ except OSError:
+ logger.debug("跳过不可读历史文件: %s", path)
+ continue
+ ts = datetime.fromtimestamp(st.st_mtime).strftime("%Y-%m-%d %H:%M")
+ size_mb = st.st_size / (1024 * 1024)
+ label = f"{ts} · {path.name} ({size_mb:.1f} MB)"
+ choices.append((label, str(path.resolve())))
+ return choices
+
+
+def latest_voice_path() -> str | None:
+ """最新一条配音路径,无历史时返回 None。"""
+ items = list_voice_history(limit=1)
+ return items[0][1] if items else None
diff --git a/voice_presets.py b/voice_presets.py
index d1067f8..33da74e 100644
--- a/voice_presets.py
+++ b/voice_presets.py
@@ -22,6 +22,8 @@ PRESETS_DIR = VOICES_DIR / "presets"
MANIFEST_PATH = VOICES_DIR / "manifest.json"
CUSTOM_VOICE_ID = "custom"
+DEFAULT_PRESET_VOICE_ID = "preset_01"
+DEFAULT_PRESET_VOICE_LABEL = "预设·沉稳男声"
# 生成脚本写入的预设元数据(.pt 文件不入 Git)
DEFAULT_MANIFEST = {
@@ -85,13 +87,24 @@ def list_voice_choices() -> List[Tuple[str, str]]:
def default_voice_id() -> str:
choices = list_voice_choices()
if not choices:
- return CUSTOM_VOICE_ID
+ return DEFAULT_PRESET_VOICE_ID
for _label, vid in choices:
- if vid == CUSTOM_VOICE_ID:
- return CUSTOM_VOICE_ID
+ if vid == DEFAULT_PRESET_VOICE_ID:
+ return vid
+ for _label, vid in choices:
+ if vid != CUSTOM_VOICE_ID:
+ return vid
return choices[0][1]
+def default_voice_label() -> str:
+ for lbl, vid in list_voice_choices():
+ if vid == DEFAULT_PRESET_VOICE_ID:
+ return lbl
+ labels = voice_choice_labels()
+ return labels[0] if labels else DEFAULT_PRESET_VOICE_LABEL
+
+
def voice_choice_labels() -> List[str]:
return [c[0] for c in list_voice_choices()]