Fix Gradio 6.0 theme/css warning and refresh speaker status after lock.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dekun
2026-06-12 14:17:26 +08:00
parent 4a4f40fac4
commit fc96f834a0
+386 -378
View File
@@ -1,378 +1,386 @@
""" """
Trading Studio — 自动化交易复盘视频配音系统 Trading Studio — 自动化交易复盘视频配音系统
Gradio Web 中控:音色锁定 → Whisper 识别 → Gemma4 润色 → ChatTTS 合成 Gradio Web 中控:音色锁定 → Whisper 识别 → Gemma4 润色 → ChatTTS 合成
""" """
from __future__ import annotations from __future__ import annotations
import logging import logging
import shutil import shutil
import sys import sys
import uuid import uuid
from pathlib import Path from pathlib import Path
import gradio as gr import gradio as gr
from config import ( from config import (
GIT_REPO_URL, GIT_REPO_URL,
HOST, HOST,
MODEL_NAME, MODEL_NAME,
OLLAMA_URL, OLLAMA_URL,
PORT, PORT,
SPEAKER_EMB_PATH, SPEAKER_EMB_PATH,
UPLOAD_DIR, UPLOAD_DIR,
) )
from llm_service import check_ollama_health, polish_text from llm_service import check_ollama_health, polish_text
from tts_service import generate_voice, save_fixed_speaker, speaker_is_ready from tts_service import generate_voice, save_fixed_speaker, speaker_is_ready
from whisper_service import transcribe_audio from whisper_service import transcribe_audio
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# 日志 # 日志
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
handlers=[ handlers=[
logging.StreamHandler(sys.stdout), logging.StreamHandler(sys.stdout),
logging.FileHandler("trading_studio.log", encoding="utf-8"), logging.FileHandler("trading_studio.log", encoding="utf-8"),
], ],
) )
logger = logging.getLogger("trading_studio") logger = logging.getLogger("trading_studio")
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# 全局 UI 状态(Gradio State # 全局 UI 状态(Gradio State
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# raw_transcript / polished_script 在流水线中传递 # raw_transcript / polished_script 在流水线中传递
def _save_upload(upload_file) -> str | None: def _save_upload(upload_file) -> str | None:
"""将 Gradio 上传文件复制到本地 uploads 目录,返回持久化路径。""" """将 Gradio 上传文件复制到本地 uploads 目录,返回持久化路径。"""
if upload_file is None: if upload_file is None:
return None return None
src = Path(upload_file) src = Path(upload_file)
if not src.exists(): if not src.exists():
return None return None
dest = UPLOAD_DIR / f"{uuid.uuid4().hex}_{src.name}" dest = UPLOAD_DIR / f"{uuid.uuid4().hex}_{src.name}"
shutil.copy2(src, dest) shutil.copy2(src, dest)
return str(dest) return str(dest)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# 模块 1:音色锁定 # 模块 1:音色锁定
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def ui_lock_speaker(audio_file, sample_transcript: str) -> str: def ui_lock_speaker(audio_file, sample_transcript: str) -> tuple[str, str]:
"""【音色锁定】从参考人声提取并保存 Speaker Embedding。""" """【音色锁定】从参考人声提取并保存 Speaker Embedding。"""
path = _save_upload(audio_file) path = _save_upload(audio_file)
if not path: if not path:
return "请上传 10-30 秒干净参考人声(wav/mp3 均可)。" return "请上传 10-30 秒干净参考人声(wav/mp3 均可)。", ui_speaker_status()
ok, msg = save_fixed_speaker(path, sample_transcript or "") ok, msg = save_fixed_speaker(path, sample_transcript or "")
return msg if ok else f"{msg}" result = msg if ok else f"{msg}"
return result, ui_speaker_status()
def ui_speaker_status() -> str:
"""刷新音色状态。""" def ui_speaker_status() -> str:
ok, msg = speaker_is_ready() """刷新音色状态。"""
return f"{msg}" if ok else f"⚠️ {msg}" ok, msg = speaker_is_ready()
return f"{msg}" if ok else f"⚠️ {msg}"
# ---------------------------------------------------------------------------
# 模块 2:音频极速识别 # ---------------------------------------------------------------------------
# --------------------------------------------------------------------------- # 模块 2:音频极速识别
def ui_transcribe(audio_file) -> tuple[str, str]: # ---------------------------------------------------------------------------
"""【Whisper 识别】返回 (转写文本, 状态日志)。""" def ui_transcribe(audio_file) -> tuple[str, str]:
path = _save_upload(audio_file) """【Whisper 识别】返回 (转写文本, 状态日志)。"""
if not path: path = _save_upload(audio_file)
return "", "请上传待识别的碎碎念录音。" if not path:
return "", "请上传待识别的碎碎念录音。"
ok, result = transcribe_audio(path)
if ok: ok, result = transcribe_audio(path)
return result, f"✅ 识别完成,共 {len(result)} 字。" if ok:
return "", f"{result}" return result, f"✅ 识别完成,共 {len(result)} 字。"
return "", f"{result}"
# ---------------------------------------------------------------------------
# 模块 3Gemma4 纪律审判 # ---------------------------------------------------------------------------
# --------------------------------------------------------------------------- # 模块 3Gemma4 纪律审判
def ui_polish(raw_text: str) -> tuple[str, str]: # ---------------------------------------------------------------------------
"""【LLM 润色】对转写稿进行严厉自我反思式润色。""" def ui_polish(raw_text: str) -> tuple[str, str]:
if not raw_text or not raw_text.strip(): """【LLM 润色】对转写稿进行严厉自我反思式润色。"""
return "", "请先完成语音识别或手动粘贴转写文本。" if not raw_text or not raw_text.strip():
return "", "请先完成语音识别或手动粘贴转写文本。"
ok, result = polish_text(raw_text)
if ok: ok, result = polish_text(raw_text)
return result, f"✅ Gemma4 润色完成,共 {len(result)} 字。" if ok:
return "", f"{result}" return result, f"✅ Gemma4 润色完成,共 {len(result)} 字。"
return "", f"{result}"
def ui_check_ollama() -> str:
"""检测远程 Ollama 节点状态。""" def ui_check_ollama() -> str:
ok, msg = check_ollama_health() """检测远程 Ollama 节点状态。"""
return f"{msg}" if ok else f"{msg}" ok, msg = check_ollama_health()
return f"{msg}" if ok else f"{msg}"
# ---------------------------------------------------------------------------
# 模块 4ChatTTS 音频合成 # ---------------------------------------------------------------------------
# --------------------------------------------------------------------------- # 模块 4ChatTTS 音频合成
def ui_synthesize(polished_text: str) -> tuple[str | None, str]: # ---------------------------------------------------------------------------
"""【TTS 合成】生成最终 wav 配音文件。""" def ui_synthesize(polished_text: str) -> tuple[str | None, str]:
if not polished_text or not polished_text.strip(): """【TTS 合成】生成最终 wav 配音文件。"""
return None, "请先完成 Gemma4 润色。" if not polished_text or not polished_text.strip():
return None, "请先完成 Gemma4 润色。"
ok, msg, wav_path = generate_voice(polished_text)
if ok and wav_path: ok, msg, wav_path = generate_voice(polished_text)
return wav_path, f"{msg}" if ok and wav_path:
return None, f" {msg}" return wav_path, f" {msg}"
return None, f"{msg}"
# ---------------------------------------------------------------------------
# 一键流水线 # ---------------------------------------------------------------------------
# --------------------------------------------------------------------------- # 一键流水线
def ui_full_pipeline( # ---------------------------------------------------------------------------
audio_file, def ui_full_pipeline(
skip_polish: bool, audio_file,
manual_raw: str, skip_polish: bool,
) -> tuple[str, str, str | None, str]: manual_raw: str,
""" ) -> tuple[str, str, str | None, str]:
串联执行:识别 → 润色(可跳过)→ 合成。 """
返回 (raw, polished, wav_path, log) 串联执行:识别 → 润色(可跳过)→ 合成。
""" 返回 (raw, polished, wav_path, log)
logs: list[str] = [] """
logs: list[str] = []
# Step 1: 识别
if manual_raw and manual_raw.strip(): # Step 1: 识别
raw = manual_raw.strip() if manual_raw and manual_raw.strip():
logs.append(f"使用手动输入转写稿({len(raw)} 字)。") raw = manual_raw.strip()
else: logs.append(f"使用手动输入转写稿({len(raw)} 字)。")
path = _save_upload(audio_file) else:
if not path: path = _save_upload(audio_file)
return "", "", None, "❌ 请上传录音或手动填写转写文本。" if not path:
ok, result = transcribe_audio(path) return "", "", None, "❌ 请上传录音或手动填写转写文本。"
if not ok: ok, result = transcribe_audio(path)
return "", "", None, f"❌ 识别失败: {result}" if not ok:
raw = result return "", "", None, f"❌ 识别失败: {result}"
logs.append(f"✅ Whisper 识别完成({len(raw)} 字)。") raw = result
logs.append(f"✅ Whisper 识别完成({len(raw)} 字)。")
# Step 2: 润色
if skip_polish: # Step 2: 润色
polished = raw if skip_polish:
logs.append("已跳过 LLM 润色,直接使用原文。") polished = raw
else: logs.append("已跳过 LLM 润色,直接使用原文。")
ok, result = polish_text(raw) else:
if not ok: ok, result = polish_text(raw)
return raw, "", None, f"❌ 润色失败: {result}\n" + "\n".join(logs) if not ok:
polished = result return raw, "", None, f"❌ 润色失败: {result}\n" + "\n".join(logs)
logs.append(f"✅ Gemma4 润色完成({len(polished)} 字)。") polished = result
logs.append(f"✅ Gemma4 润色完成({len(polished)} 字)。")
# Step 3: 合成
ok, msg, wav_path = generate_voice(polished) # Step 3: 合成
if not ok: ok, msg, wav_path = generate_voice(polished)
return raw, polished, None, f"❌ 合成失败: {msg}\n" + "\n".join(logs) if not ok:
return raw, polished, None, f"❌ 合成失败: {msg}\n" + "\n".join(logs)
logs.append(f"{msg}")
return raw, polished, wav_path, "\n".join(logs) logs.append(f"{msg}")
return raw, polished, wav_path, "\n".join(logs)
# ---------------------------------------------------------------------------
# Gradio 界面 # ---------------------------------------------------------------------------
# --------------------------------------------------------------------------- # Gradio 界面
CUSTOM_CSS = """ # ---------------------------------------------------------------------------
/* 硬核暗黑科技风 */ CUSTOM_CSS = """
.gradio-container { /* 硬核暗黑科技风 */
background: linear-gradient(160deg, #0a0a0f 0%, #12121a 40%, #0d0d12 100%) !important; .gradio-container {
color: #c8c8d0 !important; background: linear-gradient(160deg, #0a0a0f 0%, #12121a 40%, #0d0d12 100%) !important;
} color: #c8c8d0 !important;
.dark-panel { }
border: 1px solid #2a2a35; .dark-panel {
border-radius: 8px; border: 1px solid #2a2a35;
padding: 16px; border-radius: 8px;
background: rgba(18, 18, 26, 0.85); padding: 16px;
margin-bottom: 12px; background: rgba(18, 18, 26, 0.85);
} margin-bottom: 12px;
h1, h2, h3 { color: #e8e8f0 !important; letter-spacing: 0.05em; } }
.status-bar { h1, h2, h3 { color: #e8e8f0 !important; letter-spacing: 0.05em; }
font-family: 'Consolas', 'Monaco', monospace; .status-bar {
font-size: 0.85em; font-family: 'Consolas', 'Monaco', monospace;
color: #7a7a90; font-size: 0.85em;
} color: #7a7a90;
footer { visibility: hidden; } }
""" footer { visibility: hidden; }
"""
def build_app() -> gr.Blocks:
"""构建 Gradio 主界面。""" def build_theme() -> gr.themes.Base:
theme = gr.themes.Monochrome( """构建暗黑科技风 Monochrome 主题(Gradio 6.0 需在 launch() 传入)。"""
primary_hue="slate", return gr.themes.Monochrome(
secondary_hue="gray", primary_hue="slate",
neutral_hue="slate", secondary_hue="gray",
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], neutral_hue="slate",
font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "Consolas", "monospace"], font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
).set( font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "Consolas", "monospace"],
body_background_fill="#0a0a0f", ).set(
body_background_fill_dark="#0a0a0f", body_background_fill="#0a0a0f",
block_background_fill="#12121a", body_background_fill_dark="#0a0a0f",
block_background_fill_dark="#12121a", block_background_fill="#12121a",
block_border_color="#2a2a35", block_background_fill_dark="#12121a",
block_label_text_color="#9090a0", block_border_color="#2a2a35",
input_background_fill="#1a1a24", block_label_text_color="#9090a0",
button_primary_background_fill="#3a3a50", input_background_fill="#1a1a24",
button_primary_background_fill_hover="#4a4a60", button_primary_background_fill="#3a3a50",
) button_primary_background_fill_hover="#4a4a60",
)
with gr.Blocks(
title="Trading Studio | 交易复盘配音中控",
theme=theme, def build_app() -> gr.Blocks:
css=CUSTOM_CSS, """构建 Gradio 主界面。"""
) as demo: with gr.Blocks(
gr.Markdown( title="Trading Studio | 交易复盘配音中控",
f""" ) as demo:
# ⚡ Trading Studio gr.Markdown(
**本地量化交易复盘 → B 站配音生产流水线** f"""
# ⚡ Trading Studio
`Whisper(CUDA)` → `Gemma4 @ {OLLAMA_URL}` → `ChatTTS(CUDA)` **本地量化交易复盘 → B 站配音生产流水线**
> 仓库: [{GIT_REPO_URL}]({GIT_REPO_URL}) `Whisper(CUDA)` → `Gemma4 @ {OLLAMA_URL}` → `ChatTTS(CUDA)`
""",
elem_classes=["dark-panel"], > 仓库: [{GIT_REPO_URL}]({GIT_REPO_URL})
) """,
elem_classes=["dark-panel"],
with gr.Row(): )
ollama_status = gr.Textbox(
label="Ollama 节点", with gr.Row():
value=f"模型: {MODEL_NAME}", ollama_status = gr.Textbox(
interactive=False, label="Ollama 节点",
scale=3, value="检测中...",
elem_classes=["status-bar"], interactive=False,
) scale=3,
speaker_status = gr.Textbox( elem_classes=["status-bar"],
label="音色状态", )
value="检测中...", speaker_status = gr.Textbox(
interactive=False, label="音色状态",
scale=2, value="检测中...",
elem_classes=["status-bar"], interactive=False,
) scale=2,
refresh_btn = gr.Button("🔄 刷新状态", scale=1) elem_classes=["status-bar"],
)
refresh_btn.click( refresh_btn = gr.Button("🔄 刷新状态", scale=1)
fn=lambda: (ui_check_ollama(), ui_speaker_status()),
outputs=[ollama_status, speaker_status], refresh_btn.click(
) fn=lambda: (ui_check_ollama(), ui_speaker_status()),
outputs=[ollama_status, speaker_status],
with gr.Tabs(): )
# ---- Tab 1: 音色锁定 ----
with gr.Tab("🎙️ 音色锁定"): with gr.Tabs():
gr.Markdown( # ---- Tab 1: 音色锁定 ----
"上传 **10-30 秒** 干净人声样本,系统将提取 Speaker Embedding " with gr.Tab("🎙️ 音色锁定"):
f"并保存至 `{SPEAKER_EMB_PATH.name}`,后续合成 100% 还原音色。" gr.Markdown(
) "上传 **10-30 秒** 干净人声样本,系统将提取 Speaker Embedding "
with gr.Row(): f"并保存至 `{SPEAKER_EMB_PATH.name}`,后续合成 100% 还原音色。"
spk_audio = gr.Audio( )
label="参考人声(碎碎念盲录样本)", with gr.Row():
type="filepath", spk_audio = gr.Audio(
sources=["upload", "microphone"], label="参考人声(碎碎念盲录样本)",
) type="filepath",
spk_transcript = gr.Textbox( sources=["upload", "microphone"],
label="参考音频精确转写(可选,提升还原度)", )
placeholder="尽量与参考音频内容完全一致...", spk_transcript = gr.Textbox(
lines=6, label="参考音频精确转写(可选,提升还原度)",
) placeholder="尽量与参考音频内容完全一致...",
lock_btn = gr.Button("🔒 锁定音色", variant="primary") lines=6,
lock_log = gr.Textbox(label="锁定结果", lines=4, interactive=False) )
lock_btn.click(ui_lock_speaker, [spk_audio, spk_transcript], lock_log) lock_btn = gr.Button("🔒 锁定音色", variant="primary")
lock_log = gr.Textbox(label="锁定结果", lines=4, interactive=False)
# ---- Tab 2: 分步操作 ---- lock_btn.click(
with gr.Tab("🔧 分步流水线"): ui_lock_speaker,
with gr.Row(): [spk_audio, spk_transcript],
with gr.Column(scale=1): [lock_log, speaker_status],
gr.Markdown("### Step 1 · 音频极速识别") )
rec_audio = gr.Audio(
label="交易复盘碎碎念录音", # ---- Tab 2: 分步操作 ----
type="filepath", with gr.Tab("🔧 分步流水线"):
sources=["upload", "microphone"], with gr.Row():
) with gr.Column(scale=1):
transcribe_btn = gr.Button("⚡ Faster-Whisper 识别", variant="primary") gr.Markdown("### Step 1 · 音频极速识别")
transcribe_log = gr.Textbox(label="识别日志", lines=2, interactive=False) rec_audio = gr.Audio(
label="交易复盘碎碎念录音",
with gr.Column(scale=1): type="filepath",
gr.Markdown("### Step 2 · Gemma4 纪律审判") sources=["upload", "microphone"],
raw_text = gr.Textbox( )
label="转写原文(可编辑)", transcribe_btn = gr.Button("⚡ Faster-Whisper 识别", variant="primary")
lines=10, transcribe_log = gr.Textbox(label="识别日志", lines=2, interactive=False)
placeholder="识别结果将显示在此,也可手动粘贴...",
) with gr.Column(scale=1):
polish_btn = gr.Button("⚖️ 远程 Gemma4 严厉润色", variant="primary") gr.Markdown("### Step 2 · Gemma4 纪律审判")
polish_log = gr.Textbox(label="润色日志", lines=2, interactive=False) raw_text = gr.Textbox(
label="转写原文(可编辑)",
with gr.Column(scale=1): lines=10,
gr.Markdown("### Step 3 · ChatTTS 配音合成") placeholder="识别结果将显示在此,也可手动粘贴...",
polished_text = gr.Textbox( )
label="润色配音稿(可编辑)", polish_btn = gr.Button("⚖️ 远程 Gemma4 严厉润色", variant="primary")
lines=10, polish_log = gr.Textbox(label="润色日志", lines=2, interactive=False)
placeholder="润色结果将显示在此...",
) with gr.Column(scale=1):
synth_btn = gr.Button("🔊 合成配音 WAV", variant="primary") gr.Markdown("### Step 3 · ChatTTS 配音合成")
synth_log = gr.Textbox(label="合成日志", lines=2, interactive=False) polished_text = gr.Textbox(
output_audio = gr.Audio(label="成品配音", type="filepath") label="润色配音稿(可编辑)",
lines=10,
transcribe_btn.click(ui_transcribe, rec_audio, [raw_text, transcribe_log]) placeholder="润色结果将显示在此...",
polish_btn.click(ui_polish, raw_text, [polished_text, polish_log]) )
synth_btn.click(ui_synthesize, polished_text, [output_audio, synth_log]) synth_btn = gr.Button("🔊 合成配音 WAV", variant="primary")
synth_log = gr.Textbox(label="合成日志", lines=2, interactive=False)
# ---- Tab 3: 一键生产 ---- output_audio = gr.Audio(label="成品配音", type="filepath")
with gr.Tab("🚀 一键生产"):
gr.Markdown( transcribe_btn.click(ui_transcribe, rec_audio, [raw_text, transcribe_log])
"上传碎碎念录音,系统自动完成 **识别 → 润色 → 合成** 全流程。" polish_btn.click(ui_polish, raw_text, [polished_text, polish_log])
) synth_btn.click(ui_synthesize, polished_text, [output_audio, synth_log])
with gr.Row():
pipe_audio = gr.Audio( # ---- Tab 3: 一键生产 ----
label="复盘录音", with gr.Tab("🚀 一键生产"):
type="filepath", gr.Markdown(
sources=["upload", "microphone"], "上传碎碎念录音,系统自动完成 **识别 → 润色 → 合成** 全流程。"
) )
pipe_manual = gr.Textbox( with gr.Row():
label="或手动输入转写(跳过识别)", pipe_audio = gr.Audio(
lines=4, label="复盘录音",
placeholder="若已有转写文本,可直接粘贴,留空则走 Whisper 识别", type="filepath",
) sources=["upload", "microphone"],
skip_polish_cb = gr.Checkbox( )
label="跳过 Gemma4 润色(仅测试 TTS)", pipe_manual = gr.Textbox(
value=False, label="或手动输入转写(跳过识别)",
) lines=4,
pipeline_btn = gr.Button("▶ 启动全流程", variant="primary", size="lg") placeholder="若已有转写文本,可直接粘贴,留空则走 Whisper 识别",
pipeline_log = gr.Textbox(label="流水线日志", lines=6, interactive=False) )
with gr.Row(): skip_polish_cb = gr.Checkbox(
pipe_raw = gr.Textbox(label="转写原文", lines=6) label="跳过 Gemma4 润色(仅测试 TTS)",
pipe_polished = gr.Textbox(label="润色稿", lines=6) value=False,
pipe_output = gr.Audio(label="成品配音", type="filepath") )
pipeline_btn = gr.Button("▶ 启动全流程", variant="primary", size="lg")
pipeline_btn.click( pipeline_log = gr.Textbox(label="流水线日志", lines=6, interactive=False)
ui_full_pipeline, with gr.Row():
[pipe_audio, skip_polish_cb, pipe_manual], pipe_raw = gr.Textbox(label="转写原文", lines=6)
[pipe_raw, pipe_polished, pipe_output, pipeline_log], pipe_polished = gr.Textbox(label="润色稿", lines=6)
) pipe_output = gr.Audio(label="成品配音", type="filepath")
demo.load( pipeline_btn.click(
fn=lambda: (ui_check_ollama(), ui_speaker_status()), ui_full_pipeline,
outputs=[ollama_status, speaker_status], [pipe_audio, skip_polish_cb, pipe_manual],
) [pipe_raw, pipe_polished, pipe_output, pipeline_log],
)
return demo
demo.load(
fn=lambda: (ui_check_ollama(), ui_speaker_status()),
def main() -> None: outputs=[ollama_status, speaker_status],
"""主入口:启动 Gradio 服务。""" )
logger.info("Trading Studio 启动中... HOST=%s PORT=%s", HOST, PORT)
app = build_app() return demo
app.launch(
server_name=HOST,
server_port=PORT, def main() -> None:
share=False, """主入口:启动 Gradio 服务。"""
show_error=True, logger.info("Trading Studio 启动中... HOST=%s PORT=%s", HOST, PORT)
allowed_paths=[str(Path(__file__).resolve().parent / "outputs")], app = build_app()
) app.launch(
server_name=HOST,
server_port=PORT,
if __name__ == "__main__": share=False,
main() show_error=True,
theme=build_theme(),
css=CUSTOM_CSS,
allowed_paths=[str(Path(__file__).resolve().parent / "outputs")],
)
if __name__ == "__main__":
main()