Fix inconsistent voice across TTS segments

Use the same manual_seed for every chunk and normalize per-segment peaks before concat so long voiceovers no longer sound like different speakers between segments.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dekun
2026-06-12 18:46:25 +08:00
parent 4255cf7cd7
commit 541df29722
2 changed files with 44 additions and 20 deletions
+4
View File
@@ -142,6 +142,10 @@ TTS_TEMPERATURE = 0.3
TTS_TOP_P = 0.7
TTS_TOP_K = 20
TTS_SPEED_PROMPT = "[speed_5]"
# 多段拼接时各段必须使用同一随机种子,否则音色会像「换了个人」
TTS_MANUAL_SEED = _env_int("TTS_MANUAL_SEED", 42)
# 段间静音间隔(秒)
TTS_SEGMENT_PAUSE_SEC = 0.35
# 单段 TTS 最大字数(超长稿按句切分后逐段合成再拼接;8GB 显存建议 ≤200)
TTS_MAX_CHARS_PER_CHUNK = _env_int("TTS_MAX_CHARS_PER_CHUNK", 200)