Fix inconsistent voice across TTS segments

Use the same manual_seed for every chunk and normalize per-segment peaks before concat so long voiceovers no longer sound like different speakers between segments. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-12 18:46:25 +08:00
parent 4255cf7cd7
commit 541df29722
2 changed files with 44 additions and 20 deletions
@@ -142,6 +142,10 @@ TTS_TEMPERATURE = 0.3
 TTS_TOP_P = 0.7
 TTS_TOP_K = 20
 TTS_SPEED_PROMPT = "[speed_5]"
+# 多段拼接时各段必须使用同一随机种子，否则音色会像「换了个人」
+TTS_MANUAL_SEED = _env_int("TTS_MANUAL_SEED", 42)
+# 段间静音间隔（秒）
+TTS_SEGMENT_PAUSE_SEC = 0.35

 # 单段 TTS 最大字数（超长稿按句切分后逐段合成再拼接；8GB 显存建议 ≤200）
 TTS_MAX_CHARS_PER_CHUNK = _env_int("TTS_MAX_CHARS_PER_CHUNK", 200)