Add voice history, default preset voice, and one-click tab
Keep synthesized wav files browsable with playback and download, default to preset steady male voice, show one-click pipeline as the first tab, and reduce post-synthesis UI flicker. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -25,7 +25,8 @@ from config import (
|
|||||||
)
|
)
|
||||||
from llm_service import check_ollama_health, polish_text
|
from llm_service import check_ollama_health, polish_text
|
||||||
from tts_service import generate_voice, save_fixed_speaker, speaker_is_ready
|
from tts_service import generate_voice, save_fixed_speaker, speaker_is_ready
|
||||||
from voice_presets import label_to_voice_id, voice_choice_labels
|
from voice_history import list_voice_history
|
||||||
|
from voice_presets import default_voice_label, label_to_voice_id, voice_choice_labels
|
||||||
from whisper_service import transcribe_audio
|
from whisper_service import transcribe_audio
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -42,9 +43,35 @@ logging.basicConfig(
|
|||||||
logger = logging.getLogger("trading_studio")
|
logger = logging.getLogger("trading_studio")
|
||||||
|
|
||||||
|
|
||||||
def _default_voice_label() -> str:
|
# ---------------------------------------------------------------------------
|
||||||
labels = voice_choice_labels()
|
# 配音历史
|
||||||
return labels[0] if labels else "我的锁定音色(声音克隆)"
|
# ---------------------------------------------------------------------------
|
||||||
|
def ui_history_dropdown(select_path: str | None = None) -> dict:
|
||||||
|
"""刷新历史下拉列表;可选选中指定路径(合成完成后传入新文件)。"""
|
||||||
|
choices = list_voice_history()
|
||||||
|
paths = [p for _, p in choices]
|
||||||
|
if select_path and select_path in paths:
|
||||||
|
value = select_path
|
||||||
|
elif paths:
|
||||||
|
value = paths[0]
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
return gr.update(choices=choices, value=value)
|
||||||
|
|
||||||
|
|
||||||
|
def ui_history_play(filepath: str | None) -> dict:
|
||||||
|
"""选中历史条目后加载播放器。"""
|
||||||
|
if filepath and Path(filepath).is_file():
|
||||||
|
return gr.update(value=filepath)
|
||||||
|
return gr.update(value=None)
|
||||||
|
|
||||||
|
|
||||||
|
def ui_initial_history() -> tuple[dict, dict]:
|
||||||
|
"""首屏加载历史列表并自动选中最新一条。"""
|
||||||
|
choices = list_voice_history()
|
||||||
|
paths = [p for _, p in choices]
|
||||||
|
latest = paths[0] if paths else None
|
||||||
|
return gr.update(choices=choices, value=latest), ui_history_play(latest)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -141,40 +168,42 @@ def _short_synth_log(msg: str, ok: bool) -> str:
|
|||||||
return "✅ 配音完成。请用下方播放器试听、下载。"
|
return "✅ 配音完成。请用下方播放器试听、下载。"
|
||||||
|
|
||||||
|
|
||||||
def ui_synth_pending(polished_text: str) -> tuple[str, dict]:
|
def ui_synth_pending(polished_text: str) -> str:
|
||||||
"""点击合成后立即反馈,避免长时间无日志更新被误认为卡死。"""
|
"""点击合成后立即更新日志;不触碰播放器,避免波形组件销毁重建导致闪屏。"""
|
||||||
text = (polished_text or "").strip()
|
text = (polished_text or "").strip()
|
||||||
if not text:
|
if not text:
|
||||||
return "请先完成 Gemma4 润色。", gr.update(value=None)
|
return "请先完成 Gemma4 润色。"
|
||||||
est_sec = max(20, len(text) // 10)
|
est_sec = max(20, len(text) // 10)
|
||||||
return (
|
return (
|
||||||
f"⏳ 配音合成中(约 {len(text)} 字,预计 {est_sec}–{est_sec + 45} 秒),请勿重复点击…",
|
f"⏳ 配音合成中(约 {len(text)} 字,预计 {est_sec}–{est_sec + 45} 秒),请勿重复点击或刷新页面…"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def ui_synthesize(polished_text: str, voice_label: str) -> tuple[str, dict, dict, dict]:
|
||||||
|
"""【TTS 合成】生成最终 wav 配音文件。"""
|
||||||
|
if not polished_text or not polished_text.strip():
|
||||||
|
return (
|
||||||
|
"请先完成 Gemma4 润色。",
|
||||||
|
gr.update(value=None),
|
||||||
|
ui_history_dropdown(),
|
||||||
gr.update(value=None),
|
gr.update(value=None),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def ui_synthesize(
|
|
||||||
polished_text: str,
|
|
||||||
voice_label: str,
|
|
||||||
progress: gr.Progress = gr.Progress(),
|
|
||||||
) -> tuple[str, dict]:
|
|
||||||
"""【TTS 合成】生成最终 wav 配音文件。"""
|
|
||||||
if not polished_text or not polished_text.strip():
|
|
||||||
return "请先完成 Gemma4 润色。", gr.update(value=None)
|
|
||||||
|
|
||||||
voice_id = label_to_voice_id(voice_label)
|
voice_id = label_to_voice_id(voice_label)
|
||||||
|
ok, msg, wav_path = generate_voice(polished_text, voice_id=voice_id)
|
||||||
def _report_segment(seg: int, total: int) -> None:
|
|
||||||
progress((seg - 1) / max(total, 1), desc=f"ChatTTS 第 {seg}/{total} 段…")
|
|
||||||
|
|
||||||
ok, msg, wav_path = generate_voice(
|
|
||||||
polished_text,
|
|
||||||
voice_id=voice_id,
|
|
||||||
progress_callback=_report_segment,
|
|
||||||
)
|
|
||||||
if ok:
|
if ok:
|
||||||
return _short_synth_log(msg, ok), gr.update(value=wav_path)
|
return (
|
||||||
return _short_synth_log(msg, ok), gr.update(value=None)
|
_short_synth_log(msg, ok),
|
||||||
|
gr.update(value=wav_path),
|
||||||
|
ui_history_dropdown(wav_path),
|
||||||
|
gr.update(value=wav_path),
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
_short_synth_log(msg, ok),
|
||||||
|
gr.update(value=None),
|
||||||
|
ui_history_dropdown(),
|
||||||
|
gr.update(value=None),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -185,7 +214,7 @@ def ui_full_pipeline(
|
|||||||
skip_polish: bool,
|
skip_polish: bool,
|
||||||
manual_raw: str,
|
manual_raw: str,
|
||||||
voice_label: str,
|
voice_label: str,
|
||||||
) -> tuple[str, str, str | None, str]:
|
) -> tuple[str, str, dict, str, dict, dict]:
|
||||||
"""
|
"""
|
||||||
串联执行:识别 → 润色(可跳过)→ 合成。
|
串联执行:识别 → 润色(可跳过)→ 合成。
|
||||||
返回 (raw, polished, wav_path, log)
|
返回 (raw, polished, wav_path, log)
|
||||||
@@ -199,10 +228,10 @@ def ui_full_pipeline(
|
|||||||
else:
|
else:
|
||||||
path = _save_upload(audio_file)
|
path = _save_upload(audio_file)
|
||||||
if not path:
|
if not path:
|
||||||
return "", "", gr.update(value=None), "❌ 请上传录音或手动填写转写文本。"
|
return "", "", gr.update(value=None), "❌ 请上传录音或手动填写转写文本。", ui_history_dropdown(), gr.update(value=None)
|
||||||
ok, result = transcribe_audio(path)
|
ok, result = transcribe_audio(path)
|
||||||
if not ok:
|
if not ok:
|
||||||
return "", "", gr.update(value=None), f"❌ 识别失败: {result}"
|
return "", "", gr.update(value=None), f"❌ 识别失败: {result}", ui_history_dropdown(), gr.update(value=None)
|
||||||
raw = result
|
raw = result
|
||||||
logs.append(f"✅ Whisper 识别完成({len(raw)} 字)。")
|
logs.append(f"✅ Whisper 识别完成({len(raw)} 字)。")
|
||||||
|
|
||||||
@@ -213,7 +242,7 @@ def ui_full_pipeline(
|
|||||||
else:
|
else:
|
||||||
ok, result = polish_text(raw)
|
ok, result = polish_text(raw)
|
||||||
if not ok:
|
if not ok:
|
||||||
return raw, "", gr.update(value=None), f"❌ 润色失败: {result}\n" + "\n".join(logs)
|
return raw, "", gr.update(value=None), f"❌ 润色失败: {result}\n" + "\n".join(logs), ui_history_dropdown(), gr.update(value=None)
|
||||||
polished = result
|
polished = result
|
||||||
logs.append(f"✅ Gemma4 润色完成({len(polished)} 字)。")
|
logs.append(f"✅ Gemma4 润色完成({len(polished)} 字)。")
|
||||||
|
|
||||||
@@ -221,10 +250,17 @@ def ui_full_pipeline(
|
|||||||
voice_id = label_to_voice_id(voice_label)
|
voice_id = label_to_voice_id(voice_label)
|
||||||
ok, msg, wav_path = generate_voice(polished, voice_id=voice_id)
|
ok, msg, wav_path = generate_voice(polished, voice_id=voice_id)
|
||||||
if not ok:
|
if not ok:
|
||||||
return raw, polished, gr.update(value=None), f"❌ 合成失败: {msg}\n" + "\n".join(logs)
|
return raw, polished, gr.update(value=None), f"❌ 合成失败: {msg}\n" + "\n".join(logs), ui_history_dropdown(), gr.update(value=None)
|
||||||
|
|
||||||
logs.append(f"✅ {msg}")
|
logs.append(f"✅ {msg}")
|
||||||
return raw, polished, gr.update(value=wav_path), "\n".join(logs)
|
return (
|
||||||
|
raw,
|
||||||
|
polished,
|
||||||
|
gr.update(value=wav_path),
|
||||||
|
"\n".join(logs),
|
||||||
|
ui_history_dropdown(wav_path),
|
||||||
|
gr.update(value=wav_path),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -918,6 +954,25 @@ gradio-app,
|
|||||||
.gradio-container .waveform-container {
|
.gradio-container .waveform-container {
|
||||||
background: #1a2332 !important;
|
background: #1a2332 !important;
|
||||||
}
|
}
|
||||||
|
/* 成品播放器:去掉 Gradio 默认 focus 白框,减轻合成完成时闪一下 */
|
||||||
|
.gradio-container .tts-output-audio,
|
||||||
|
.gradio-container .tts-output-audio .audio-container {
|
||||||
|
border: 1px solid #374151 !important;
|
||||||
|
background: #1a2332 !important;
|
||||||
|
contain: strict;
|
||||||
|
min-height: 120px;
|
||||||
|
}
|
||||||
|
.gradio-container .tts-output-audio button,
|
||||||
|
.gradio-container .tts-output-audio button:focus,
|
||||||
|
.gradio-container .tts-output-audio button:focus-visible {
|
||||||
|
outline: none !important;
|
||||||
|
box-shadow: none !important;
|
||||||
|
border-color: #4b5563 !important;
|
||||||
|
}
|
||||||
|
.gradio-container .tts-output-audio .wrap,
|
||||||
|
.gradio-container .tts-output-audio .controls {
|
||||||
|
background: #1a2332 !important;
|
||||||
|
}
|
||||||
.gradio-container .pipeline-step-card textarea {
|
.gradio-container .pipeline-step-card textarea {
|
||||||
contain: layout style;
|
contain: layout style;
|
||||||
}
|
}
|
||||||
@@ -1061,38 +1116,48 @@ def build_app() -> gr.Blocks:
|
|||||||
)
|
)
|
||||||
|
|
||||||
with gr.Tabs():
|
with gr.Tabs():
|
||||||
# ---- Tab 1: 音色锁定 ----
|
# ---- Tab 1: 一键生产(默认首页)----
|
||||||
with gr.Tab("🎙️ 音色锁定"):
|
with gr.Tab("🚀 一键生产"):
|
||||||
gr.HTML(MIC_HINT_HTML)
|
gr.HTML(MIC_HINT_HTML)
|
||||||
gr.HTML(
|
gr.Markdown(
|
||||||
f'<div class="hint-box">'
|
"上传碎碎念录音,系统自动完成 **识别 → 润色 → 合成** 全流程。"
|
||||||
f'上传 <strong>10-30 秒</strong> 干净人声样本,系统将提取 Speaker Embedding '
|
|
||||||
f'并保存至 <span class="file-tag">{SPEAKER_EMB_PATH.name}</span>,'
|
|
||||||
f'后续合成 <strong>100% 还原音色</strong>。'
|
|
||||||
f"</div>"
|
|
||||||
)
|
)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
spk_audio = gr.Audio(
|
pipe_audio = gr.Audio(
|
||||||
label="参考人声(碎碎念盲录样本)",
|
label="复盘录音",
|
||||||
type="filepath",
|
type="filepath",
|
||||||
sources=["upload", "microphone"],
|
sources=["upload", "microphone"],
|
||||||
)
|
)
|
||||||
spk_transcript = gr.Textbox(
|
pipe_manual = gr.Textbox(
|
||||||
label="参考音频精确转写(强烈建议填写,与录音一致,避免合成报错)",
|
label="或手动输入转写(跳过识别)",
|
||||||
placeholder="示例:今天开了三单,第一单手贱提前平了,第二单…",
|
lines=4,
|
||||||
info="请尽量与参考音频内容完全一致,可提升音色还原度",
|
placeholder="若已有转写文本,可直接粘贴,留空则走 Whisper 识别",
|
||||||
lines=6,
|
|
||||||
elem_classes=["bright-input"],
|
|
||||||
)
|
)
|
||||||
lock_btn = gr.Button("🔒 锁定音色", variant="primary")
|
skip_polish_cb = gr.Checkbox(
|
||||||
lock_log = gr.Textbox(label="锁定结果", lines=4, interactive=False)
|
label="跳过 Gemma4 润色(仅测试 TTS)",
|
||||||
lock_btn.click(
|
value=False,
|
||||||
ui_lock_speaker,
|
)
|
||||||
[spk_audio, spk_transcript],
|
pipe_voice = gr.Radio(
|
||||||
[lock_log, speaker_status],
|
label="配音音色(本地 ChatTTS)",
|
||||||
|
choices=voice_choice_labels(),
|
||||||
|
value=default_voice_label(),
|
||||||
|
elem_classes=["voice-radio"],
|
||||||
|
)
|
||||||
|
pipeline_btn = gr.Button("▶ 启动全流程", variant="primary", size="lg")
|
||||||
|
pipeline_log = gr.Textbox(label="流水线日志", lines=6, interactive=False)
|
||||||
|
with gr.Row(elem_classes=["pipeline-output-row"]):
|
||||||
|
pipe_raw = gr.Textbox(label="转写原文", lines=6)
|
||||||
|
pipe_polished = gr.Textbox(label="润色稿", lines=6)
|
||||||
|
pipe_output = gr.Audio(
|
||||||
|
label="成品配音",
|
||||||
|
type="filepath",
|
||||||
|
interactive=False,
|
||||||
|
show_download_button=True,
|
||||||
|
show_share_button=False,
|
||||||
|
elem_classes=["tts-output-audio"],
|
||||||
)
|
)
|
||||||
|
|
||||||
# ---- Tab 2: 分步操作(纵向三步,避免三栏挤在一起)----
|
# ---- Tab 2: 分步流水线 ----
|
||||||
with gr.Tab("🔧 分步流水线"):
|
with gr.Tab("🔧 分步流水线"):
|
||||||
gr.HTML(MIC_HINT_HTML)
|
gr.HTML(MIC_HINT_HTML)
|
||||||
with gr.Column(elem_classes=["pipeline-flow"]):
|
with gr.Column(elem_classes=["pipeline-flow"]):
|
||||||
@@ -1126,7 +1191,7 @@ def build_app() -> gr.Blocks:
|
|||||||
tts_voice = gr.Radio(
|
tts_voice = gr.Radio(
|
||||||
label="配音音色(本地 ChatTTS)",
|
label="配音音色(本地 ChatTTS)",
|
||||||
choices=voice_choice_labels(),
|
choices=voice_choice_labels(),
|
||||||
value=_default_voice_label(),
|
value=default_voice_label(),
|
||||||
info="预设音色:bash scripts/generate_voice_presets.sh",
|
info="预设音色:bash scripts/generate_voice_presets.sh",
|
||||||
elem_classes=["voice-radio"],
|
elem_classes=["voice-radio"],
|
||||||
)
|
)
|
||||||
@@ -1137,72 +1202,99 @@ def build_app() -> gr.Blocks:
|
|||||||
)
|
)
|
||||||
synth_btn = gr.Button("🔊 合成配音 WAV", variant="primary")
|
synth_btn = gr.Button("🔊 合成配音 WAV", variant="primary")
|
||||||
synth_log = gr.Textbox(label="合成日志", lines=3, interactive=False)
|
synth_log = gr.Textbox(label="合成日志", lines=3, interactive=False)
|
||||||
output_audio = gr.Audio(label="成品配音", type="filepath")
|
output_audio = gr.Audio(
|
||||||
|
label="成品配音",
|
||||||
|
type="filepath",
|
||||||
|
interactive=False,
|
||||||
|
show_download_button=True,
|
||||||
|
show_share_button=False,
|
||||||
|
elem_classes=["tts-output-audio"],
|
||||||
|
)
|
||||||
|
|
||||||
transcribe_btn.click(ui_transcribe, rec_audio, [raw_text, transcribe_log])
|
transcribe_btn.click(ui_transcribe, rec_audio, [raw_text, transcribe_log])
|
||||||
polish_btn.click(ui_polish, raw_text, [polished_text, polish_log])
|
polish_btn.click(ui_polish, raw_text, [polished_text, polish_log])
|
||||||
synth_btn.click(
|
|
||||||
ui_synth_pending,
|
|
||||||
[polished_text],
|
|
||||||
[synth_log, output_audio],
|
|
||||||
queue=True,
|
|
||||||
).then(
|
|
||||||
ui_synthesize,
|
|
||||||
[polished_text, tts_voice],
|
|
||||||
[synth_log, output_audio],
|
|
||||||
queue=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# ---- Tab 3: 一键生产 ----
|
# ---- Tab 3: 音色锁定 ----
|
||||||
with gr.Tab("🚀 一键生产"):
|
with gr.Tab("🎙️ 音色锁定"):
|
||||||
gr.HTML(MIC_HINT_HTML)
|
gr.HTML(MIC_HINT_HTML)
|
||||||
gr.Markdown(
|
gr.HTML(
|
||||||
"上传碎碎念录音,系统自动完成 **识别 → 润色 → 合成** 全流程。"
|
f'<div class="hint-box">'
|
||||||
|
f'上传 <strong>10-30 秒</strong> 干净人声样本,系统将提取 Speaker Embedding '
|
||||||
|
f'并保存至 <span class="file-tag">{SPEAKER_EMB_PATH.name}</span>,'
|
||||||
|
f'后续合成 <strong>100% 还原音色</strong>。'
|
||||||
|
f"</div>"
|
||||||
)
|
)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
pipe_audio = gr.Audio(
|
spk_audio = gr.Audio(
|
||||||
label="复盘录音",
|
label="参考人声(碎碎念盲录样本)",
|
||||||
type="filepath",
|
type="filepath",
|
||||||
sources=["upload", "microphone"],
|
sources=["upload", "microphone"],
|
||||||
)
|
)
|
||||||
pipe_manual = gr.Textbox(
|
spk_transcript = gr.Textbox(
|
||||||
label="或手动输入转写(跳过识别)",
|
label="参考音频精确转写(强烈建议填写,与录音一致,避免合成报错)",
|
||||||
lines=4,
|
placeholder="示例:今天开了三单,第一单手贱提前平了,第二单…",
|
||||||
placeholder="若已有转写文本,可直接粘贴,留空则走 Whisper 识别",
|
info="请尽量与参考音频内容完全一致,可提升音色还原度",
|
||||||
|
lines=6,
|
||||||
|
elem_classes=["bright-input"],
|
||||||
)
|
)
|
||||||
skip_polish_cb = gr.Checkbox(
|
lock_btn = gr.Button("🔒 锁定音色", variant="primary")
|
||||||
label="跳过 Gemma4 润色(仅测试 TTS)",
|
lock_log = gr.Textbox(label="锁定结果", lines=4, interactive=False)
|
||||||
value=False,
|
lock_btn.click(
|
||||||
|
ui_lock_speaker,
|
||||||
|
[spk_audio, spk_transcript],
|
||||||
|
[lock_log, speaker_status],
|
||||||
)
|
)
|
||||||
pipe_voice = gr.Radio(
|
|
||||||
label="配音音色(本地 ChatTTS)",
|
|
||||||
choices=voice_choice_labels(),
|
|
||||||
value=_default_voice_label(),
|
|
||||||
elem_classes=["voice-radio"],
|
|
||||||
)
|
|
||||||
pipeline_btn = gr.Button("▶ 启动全流程", variant="primary", size="lg")
|
|
||||||
pipeline_log = gr.Textbox(label="流水线日志", lines=6, interactive=False)
|
|
||||||
with gr.Row(elem_classes=["pipeline-output-row"]):
|
|
||||||
pipe_raw = gr.Textbox(label="转写原文", lines=6)
|
|
||||||
pipe_polished = gr.Textbox(label="润色稿", lines=6)
|
|
||||||
pipe_output = gr.Audio(label="成品配音", type="filepath")
|
|
||||||
|
|
||||||
def ui_pipeline_pending(skip_polish: bool, manual_raw: str) -> tuple[str, dict]:
|
with gr.Accordion("📂 配音历史(本地保留,可随时试听下载)", open=True):
|
||||||
|
with gr.Row():
|
||||||
|
history_select = gr.Dropdown(
|
||||||
|
label="历史配音",
|
||||||
|
choices=list_voice_history(),
|
||||||
|
value=None,
|
||||||
|
interactive=True,
|
||||||
|
scale=4,
|
||||||
|
)
|
||||||
|
history_refresh_btn = gr.Button("🔄 刷新", scale=0, min_width=100)
|
||||||
|
history_player = gr.Audio(
|
||||||
|
label="历史试听 / 下载",
|
||||||
|
type="filepath",
|
||||||
|
interactive=False,
|
||||||
|
show_download_button=True,
|
||||||
|
show_share_button=False,
|
||||||
|
elem_classes=["tts-output-audio"],
|
||||||
|
)
|
||||||
|
|
||||||
|
history_refresh_btn.click(ui_history_dropdown, outputs=[history_select])
|
||||||
|
history_select.change(ui_history_play, history_select, history_player)
|
||||||
|
demo.load(ui_initial_history, outputs=[history_select, history_player])
|
||||||
|
|
||||||
|
def ui_pipeline_pending(skip_polish: bool, manual_raw: str) -> str:
|
||||||
if manual_raw and manual_raw.strip():
|
if manual_raw and manual_raw.strip():
|
||||||
return "⏳ 全流程运行中(识别/润色/合成),请稍候…", gr.update(value=None)
|
return "⏳ 全流程运行中(识别/润色/合成),请稍候,勿刷新页面…"
|
||||||
if skip_polish:
|
if skip_polish:
|
||||||
return "⏳ 全流程运行中(识别→合成),请稍候…", gr.update(value=None)
|
return "⏳ 全流程运行中(识别→合成),请稍候,勿刷新页面…"
|
||||||
return "⏳ 全流程运行中(识别→润色→合成),请稍候…", gr.update(value=None)
|
return "⏳ 全流程运行中(识别→润色→合成),请稍候,勿刷新页面…"
|
||||||
|
|
||||||
pipeline_btn.click(
|
pipeline_btn.click(
|
||||||
ui_pipeline_pending,
|
ui_pipeline_pending,
|
||||||
[skip_polish_cb, pipe_manual],
|
[skip_polish_cb, pipe_manual],
|
||||||
[pipeline_log, pipe_output],
|
[pipeline_log],
|
||||||
queue=True,
|
queue=True,
|
||||||
).then(
|
).then(
|
||||||
ui_full_pipeline,
|
ui_full_pipeline,
|
||||||
[pipe_audio, skip_polish_cb, pipe_manual, pipe_voice],
|
[pipe_audio, skip_polish_cb, pipe_manual, pipe_voice],
|
||||||
[pipe_raw, pipe_polished, pipe_output, pipeline_log],
|
[pipe_raw, pipe_polished, pipe_output, pipeline_log, history_select, history_player],
|
||||||
|
queue=True,
|
||||||
|
)
|
||||||
|
synth_btn.click(
|
||||||
|
ui_synth_pending,
|
||||||
|
[polished_text],
|
||||||
|
[synth_log],
|
||||||
|
queue=True,
|
||||||
|
).then(
|
||||||
|
ui_synthesize,
|
||||||
|
[polished_text, tts_voice],
|
||||||
|
[synth_log, output_audio, history_select, history_player],
|
||||||
queue=True,
|
queue=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -635,7 +635,6 @@ def _concat_wavs(
|
|||||||
def generate_voice(
|
def generate_voice(
|
||||||
refined_text: str,
|
refined_text: str,
|
||||||
voice_id: str = "custom",
|
voice_id: str = "custom",
|
||||||
progress_callback=None,
|
|
||||||
) -> Tuple[bool, str, Optional[str]]:
|
) -> Tuple[bool, str, Optional[str]]:
|
||||||
"""
|
"""
|
||||||
使用 ChatTTS(本地 GPU)将润色稿合成为 wav。
|
使用 ChatTTS(本地 GPU)将润色稿合成为 wav。
|
||||||
@@ -725,11 +724,6 @@ def generate_voice(
|
|||||||
for idx, chunk in enumerate(chunks, start=1):
|
for idx, chunk in enumerate(chunks, start=1):
|
||||||
if not chunk or len(chunk) < 2:
|
if not chunk or len(chunk) < 2:
|
||||||
continue
|
continue
|
||||||
if progress_callback is not None:
|
|
||||||
try:
|
|
||||||
progress_callback(idx, len(chunks))
|
|
||||||
except Exception:
|
|
||||||
logger.debug("TTS 进度回调失败", exc_info=True)
|
|
||||||
release_cuda_cache()
|
release_cuda_cache()
|
||||||
chunk_infer = replace(params_infer_code, manual_seed=42 + idx)
|
chunk_infer = replace(params_infer_code, manual_seed=42 + idx)
|
||||||
wavs = None
|
wavs = None
|
||||||
|
|||||||
@@ -0,0 +1,51 @@
|
|||||||
|
"""
|
||||||
|
本地配音历史:扫描 outputs/ 下已生成的 wav,供 Gradio 下拉试听与下载。
|
||||||
|
文件不会被自动删除,重启服务后仍可访问。
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Tuple
|
||||||
|
|
||||||
|
from config import OUTPUT_DIR
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
HISTORY_MAX_ITEMS = 50
|
||||||
|
VOICEOVER_GLOB = "voiceover_*.wav"
|
||||||
|
|
||||||
|
|
||||||
|
def list_voice_history(limit: int = HISTORY_MAX_ITEMS) -> List[Tuple[str, str]]:
|
||||||
|
"""
|
||||||
|
返回 Gradio Dropdown 选项:(显示名, 文件绝对路径),按时间倒序。
|
||||||
|
"""
|
||||||
|
if not OUTPUT_DIR.is_dir():
|
||||||
|
return []
|
||||||
|
|
||||||
|
files = sorted(
|
||||||
|
OUTPUT_DIR.glob(VOICEOVER_GLOB),
|
||||||
|
key=lambda p: p.stat().st_mtime,
|
||||||
|
reverse=True,
|
||||||
|
)[:limit]
|
||||||
|
|
||||||
|
choices: List[Tuple[str, str]] = []
|
||||||
|
for path in files:
|
||||||
|
try:
|
||||||
|
st = path.stat()
|
||||||
|
except OSError:
|
||||||
|
logger.debug("跳过不可读历史文件: %s", path)
|
||||||
|
continue
|
||||||
|
ts = datetime.fromtimestamp(st.st_mtime).strftime("%Y-%m-%d %H:%M")
|
||||||
|
size_mb = st.st_size / (1024 * 1024)
|
||||||
|
label = f"{ts} · {path.name} ({size_mb:.1f} MB)"
|
||||||
|
choices.append((label, str(path.resolve())))
|
||||||
|
return choices
|
||||||
|
|
||||||
|
|
||||||
|
def latest_voice_path() -> str | None:
|
||||||
|
"""最新一条配音路径,无历史时返回 None。"""
|
||||||
|
items = list_voice_history(limit=1)
|
||||||
|
return items[0][1] if items else None
|
||||||
+16
-3
@@ -22,6 +22,8 @@ PRESETS_DIR = VOICES_DIR / "presets"
|
|||||||
MANIFEST_PATH = VOICES_DIR / "manifest.json"
|
MANIFEST_PATH = VOICES_DIR / "manifest.json"
|
||||||
|
|
||||||
CUSTOM_VOICE_ID = "custom"
|
CUSTOM_VOICE_ID = "custom"
|
||||||
|
DEFAULT_PRESET_VOICE_ID = "preset_01"
|
||||||
|
DEFAULT_PRESET_VOICE_LABEL = "预设·沉稳男声"
|
||||||
|
|
||||||
# 生成脚本写入的预设元数据(.pt 文件不入 Git)
|
# 生成脚本写入的预设元数据(.pt 文件不入 Git)
|
||||||
DEFAULT_MANIFEST = {
|
DEFAULT_MANIFEST = {
|
||||||
@@ -85,13 +87,24 @@ def list_voice_choices() -> List[Tuple[str, str]]:
|
|||||||
def default_voice_id() -> str:
|
def default_voice_id() -> str:
|
||||||
choices = list_voice_choices()
|
choices = list_voice_choices()
|
||||||
if not choices:
|
if not choices:
|
||||||
return CUSTOM_VOICE_ID
|
return DEFAULT_PRESET_VOICE_ID
|
||||||
for _label, vid in choices:
|
for _label, vid in choices:
|
||||||
if vid == CUSTOM_VOICE_ID:
|
if vid == DEFAULT_PRESET_VOICE_ID:
|
||||||
return CUSTOM_VOICE_ID
|
return vid
|
||||||
|
for _label, vid in choices:
|
||||||
|
if vid != CUSTOM_VOICE_ID:
|
||||||
|
return vid
|
||||||
return choices[0][1]
|
return choices[0][1]
|
||||||
|
|
||||||
|
|
||||||
|
def default_voice_label() -> str:
|
||||||
|
for lbl, vid in list_voice_choices():
|
||||||
|
if vid == DEFAULT_PRESET_VOICE_ID:
|
||||||
|
return lbl
|
||||||
|
labels = voice_choice_labels()
|
||||||
|
return labels[0] if labels else DEFAULT_PRESET_VOICE_LABEL
|
||||||
|
|
||||||
|
|
||||||
def voice_choice_labels() -> List[str]:
|
def voice_choice_labels() -> List[str]:
|
||||||
return [c[0] for c in list_voice_choices()]
|
return [c[0] for c in list_voice_choices()]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user