""" Trading Studio — 自动化交易复盘视频配音系统 Gradio Web 中控:音色锁定 → Whisper 识别 → Gemma4 润色 → ChatTTS 合成 """ from __future__ import annotations import logging import shutil import sys import uuid from pathlib import Path import gradio as gr from config import ( GIT_REPO_URL, HOST, MODEL_NAME, OLLAMA_URL, PORT, SPEAKER_EMB_PATH, UPLOAD_DIR, ) from llm_service import check_ollama_health, polish_text from tts_service import generate_voice, save_fixed_speaker, speaker_is_ready from whisper_service import transcribe_audio # --------------------------------------------------------------------------- # 日志 # --------------------------------------------------------------------------- logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", handlers=[ logging.StreamHandler(sys.stdout), logging.FileHandler("trading_studio.log", encoding="utf-8"), ], ) logger = logging.getLogger("trading_studio") # --------------------------------------------------------------------------- # 全局 UI 状态(Gradio State) # --------------------------------------------------------------------------- # raw_transcript / polished_script 在流水线中传递 def _save_upload(upload_file) -> str | None: """将 Gradio 上传文件复制到本地 uploads 目录,返回持久化路径。""" if upload_file is None: return None src = Path(upload_file) if not src.exists(): return None dest = UPLOAD_DIR / f"{uuid.uuid4().hex}_{src.name}" shutil.copy2(src, dest) return str(dest) # --------------------------------------------------------------------------- # 模块 1:音色锁定 # --------------------------------------------------------------------------- def ui_lock_speaker(audio_file, sample_transcript: str) -> tuple[str, str]: """【音色锁定】从参考人声提取并保存 Speaker Embedding。""" path = _save_upload(audio_file) if not path: return "请上传 10-30 秒干净参考人声(wav/mp3 均可)。", ui_speaker_status() ok, msg = save_fixed_speaker(path, sample_transcript or "") result = msg if ok else f"❌ {msg}" return result, ui_speaker_status() def ui_speaker_status() -> str: """刷新音色状态。""" ok, msg = speaker_is_ready() return f"✅ {msg}" if ok else f"⚠️ {msg}" # --------------------------------------------------------------------------- # 模块 2:音频极速识别 # --------------------------------------------------------------------------- def ui_transcribe(audio_file) -> tuple[str, str]: """【Whisper 识别】返回 (转写文本, 状态日志)。""" path = _save_upload(audio_file) if not path: return "", "请上传待识别的碎碎念录音。" ok, result = transcribe_audio(path) if ok: return result, f"✅ 识别完成,共 {len(result)} 字。" return "", f"❌ {result}" # --------------------------------------------------------------------------- # 模块 3:Gemma4 纪律审判 # --------------------------------------------------------------------------- def ui_polish(raw_text: str) -> tuple[str, str]: """【LLM 润色】对转写稿进行严厉自我反思式润色。""" if not raw_text or not raw_text.strip(): return "", "请先完成语音识别或手动粘贴转写文本。" ok, result = polish_text(raw_text) if ok: return result, f"✅ Gemma4 润色完成,共 {len(result)} 字。" return "", f"❌ {result}" def ui_check_ollama() -> str: """检测远程 Ollama 节点状态。""" ok, msg = check_ollama_health() return f"✅ {msg}" if ok else f"❌ {msg}" # --------------------------------------------------------------------------- # 模块 4:ChatTTS 音频合成 # --------------------------------------------------------------------------- def ui_synthesize(polished_text: str) -> tuple[str | None, str]: """【TTS 合成】生成最终 wav 配音文件。""" if not polished_text or not polished_text.strip(): return None, "请先完成 Gemma4 润色。" ok, msg, wav_path = generate_voice(polished_text) if ok and wav_path: return wav_path, f"✅ {msg}" return None, f"❌ {msg}" # --------------------------------------------------------------------------- # 一键流水线 # --------------------------------------------------------------------------- def ui_full_pipeline( audio_file, skip_polish: bool, manual_raw: str, ) -> tuple[str, str, str | None, str]: """ 串联执行:识别 → 润色(可跳过)→ 合成。 返回 (raw, polished, wav_path, log) """ logs: list[str] = [] # Step 1: 识别 if manual_raw and manual_raw.strip(): raw = manual_raw.strip() logs.append(f"使用手动输入转写稿({len(raw)} 字)。") else: path = _save_upload(audio_file) if not path: return "", "", None, "❌ 请上传录音或手动填写转写文本。" ok, result = transcribe_audio(path) if not ok: return "", "", None, f"❌ 识别失败: {result}" raw = result logs.append(f"✅ Whisper 识别完成({len(raw)} 字)。") # Step 2: 润色 if skip_polish: polished = raw logs.append("已跳过 LLM 润色,直接使用原文。") else: ok, result = polish_text(raw) if not ok: return raw, "", None, f"❌ 润色失败: {result}\n" + "\n".join(logs) polished = result logs.append(f"✅ Gemma4 润色完成({len(polished)} 字)。") # Step 3: 合成 ok, msg, wav_path = generate_voice(polished) if not ok: return raw, polished, None, f"❌ 合成失败: {msg}\n" + "\n".join(logs) logs.append(f"✅ {msg}") return raw, polished, wav_path, "\n".join(logs) # --------------------------------------------------------------------------- # Gradio 界面 # --------------------------------------------------------------------------- CUSTOM_CSS = """ /* 硬核暗黑科技风 */ .gradio-container { background: linear-gradient(160deg, #0a0a0f 0%, #12121a 40%, #0d0d12 100%) !important; color: #c8c8d0 !important; } .dark-panel { border: 1px solid #2a2a35; border-radius: 8px; padding: 16px; background: rgba(18, 18, 26, 0.85); margin-bottom: 12px; } h1, h2, h3 { color: #e8e8f0 !important; letter-spacing: 0.05em; } .status-bar { font-family: 'Consolas', 'Monaco', monospace; font-size: 0.85em; color: #7a7a90; } footer { visibility: hidden; } """ def build_theme() -> gr.themes.Base: """构建暗黑科技风 Monochrome 主题(Gradio 6.0 需在 launch() 传入)。""" return gr.themes.Monochrome( primary_hue="slate", secondary_hue="gray", neutral_hue="slate", font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "Consolas", "monospace"], ).set( body_background_fill="#0a0a0f", body_background_fill_dark="#0a0a0f", block_background_fill="#12121a", block_background_fill_dark="#12121a", block_border_color="#2a2a35", block_label_text_color="#9090a0", input_background_fill="#1a1a24", button_primary_background_fill="#3a3a50", button_primary_background_fill_hover="#4a4a60", ) def build_app() -> gr.Blocks: """构建 Gradio 主界面。""" with gr.Blocks( title="Trading Studio | 交易复盘配音中控", ) as demo: gr.Markdown( f""" # ⚡ Trading Studio **本地量化交易复盘 → B 站配音生产流水线** `Whisper(CUDA)` → `Gemma4 @ {OLLAMA_URL}` → `ChatTTS(CUDA)` > 仓库: [{GIT_REPO_URL}]({GIT_REPO_URL}) """, elem_classes=["dark-panel"], ) with gr.Row(): ollama_status = gr.Textbox( label="Ollama 节点", value="检测中...", interactive=False, scale=3, elem_classes=["status-bar"], ) speaker_status = gr.Textbox( label="音色状态", value="检测中...", interactive=False, scale=2, elem_classes=["status-bar"], ) refresh_btn = gr.Button("🔄 刷新状态", scale=1) refresh_btn.click( fn=lambda: (ui_check_ollama(), ui_speaker_status()), outputs=[ollama_status, speaker_status], ) with gr.Tabs(): # ---- Tab 1: 音色锁定 ---- with gr.Tab("🎙️ 音色锁定"): gr.Markdown( "上传 **10-30 秒** 干净人声样本,系统将提取 Speaker Embedding " f"并保存至 `{SPEAKER_EMB_PATH.name}`,后续合成 100% 还原音色。" ) with gr.Row(): spk_audio = gr.Audio( label="参考人声(碎碎念盲录样本)", type="filepath", sources=["upload", "microphone"], ) spk_transcript = gr.Textbox( label="参考音频精确转写(可选,提升还原度)", placeholder="尽量与参考音频内容完全一致...", lines=6, ) lock_btn = gr.Button("🔒 锁定音色", variant="primary") lock_log = gr.Textbox(label="锁定结果", lines=4, interactive=False) lock_btn.click( ui_lock_speaker, [spk_audio, spk_transcript], [lock_log, speaker_status], ) # ---- Tab 2: 分步操作 ---- with gr.Tab("🔧 分步流水线"): with gr.Row(): with gr.Column(scale=1): gr.Markdown("### Step 1 · 音频极速识别") rec_audio = gr.Audio( label="交易复盘碎碎念录音", type="filepath", sources=["upload", "microphone"], ) transcribe_btn = gr.Button("⚡ Faster-Whisper 识别", variant="primary") transcribe_log = gr.Textbox(label="识别日志", lines=2, interactive=False) with gr.Column(scale=1): gr.Markdown("### Step 2 · Gemma4 纪律审判") raw_text = gr.Textbox( label="转写原文(可编辑)", lines=10, placeholder="识别结果将显示在此,也可手动粘贴...", ) polish_btn = gr.Button("⚖️ 远程 Gemma4 严厉润色", variant="primary") polish_log = gr.Textbox(label="润色日志", lines=2, interactive=False) with gr.Column(scale=1): gr.Markdown("### Step 3 · ChatTTS 配音合成") polished_text = gr.Textbox( label="润色配音稿(可编辑)", lines=10, placeholder="润色结果将显示在此...", ) synth_btn = gr.Button("🔊 合成配音 WAV", variant="primary") synth_log = gr.Textbox(label="合成日志", lines=2, interactive=False) output_audio = gr.Audio(label="成品配音", type="filepath") transcribe_btn.click(ui_transcribe, rec_audio, [raw_text, transcribe_log]) polish_btn.click(ui_polish, raw_text, [polished_text, polish_log]) synth_btn.click(ui_synthesize, polished_text, [output_audio, synth_log]) # ---- Tab 3: 一键生产 ---- with gr.Tab("🚀 一键生产"): gr.Markdown( "上传碎碎念录音,系统自动完成 **识别 → 润色 → 合成** 全流程。" ) with gr.Row(): pipe_audio = gr.Audio( label="复盘录音", type="filepath", sources=["upload", "microphone"], ) pipe_manual = gr.Textbox( label="或手动输入转写(跳过识别)", lines=4, placeholder="若已有转写文本,可直接粘贴,留空则走 Whisper 识别", ) skip_polish_cb = gr.Checkbox( label="跳过 Gemma4 润色(仅测试 TTS)", value=False, ) pipeline_btn = gr.Button("▶ 启动全流程", variant="primary", size="lg") pipeline_log = gr.Textbox(label="流水线日志", lines=6, interactive=False) with gr.Row(): pipe_raw = gr.Textbox(label="转写原文", lines=6) pipe_polished = gr.Textbox(label="润色稿", lines=6) pipe_output = gr.Audio(label="成品配音", type="filepath") pipeline_btn.click( ui_full_pipeline, [pipe_audio, skip_polish_cb, pipe_manual], [pipe_raw, pipe_polished, pipe_output, pipeline_log], ) demo.load( fn=lambda: (ui_check_ollama(), ui_speaker_status()), outputs=[ollama_status, speaker_status], ) return demo def main() -> None: """主入口:启动 Gradio 服务。""" logger.info("Trading Studio 启动中... HOST=%s PORT=%s", HOST, PORT) app = build_app() app.launch( server_name=HOST, server_port=PORT, share=False, show_error=True, theme=build_theme(), css=CUSTOM_CSS, allowed_paths=[str(Path(__file__).resolve().parent / "outputs")], ) if __name__ == "__main__": main()