Allow saving voiceover at adjusted playback speed

Add a save button that exports WAV at the current slider speed using Web Audio, matching what the user hears during preview.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dekun
2026-06-12 18:58:00 +08:00
parent 1acba0349c
commit 54523e39af
+157 -9
View File
@@ -62,11 +62,11 @@ def ui_history_dropdown(select_path: str | None = None) -> dict:
def _voice_player_html(wav_path: str | None) -> str:
"""带播放控件语速滑块的 HTML 播放器(语速仅影响试听,不改变 WAV 文件)"""
"""带播放控件语速滑块与按语速保存下载的 HTML 播放器。"""
if not wav_path:
return (
'<div class="tts-player-wrap tts-player-empty">'
"<p>合成完成后可在此试听,拖动下方滑块调节播放语速(0.5x2.0x)。</p>"
"<p>合成完成后可在此试听,拖动滑块调节语速(0.5x~2.0x),点「保存」下载。</p>"
"</div>"
)
path = Path(wav_path)
@@ -79,18 +79,23 @@ def _voice_player_html(wav_path: str | None) -> str:
name = path.name
src = f"/outputs/{quote(name)}"
return f"""
<div class="tts-player-wrap">
<div class="tts-player-wrap" data-filename="{name}">
<div class="tts-player-title">🎧 {name}</div>
<audio class="tts-audio-el" controls preload="metadata" src="{src}"></audio>
<audio class="tts-audio-el" controls preload="metadata" src="{src}"
onloadedmetadata="(function(a){{var sl=a.closest('.tts-player-wrap').querySelector('.tts-speed-slider'); if(sl){{a.playbackRate=parseFloat(sl.value);}}}})(this)"></audio>
<div class="tts-speed-row">
<span class="tts-speed-label-text">播放语速</span>
<input type="range" class="tts-speed-slider" min="0.5" max="2.0" step="0.05" value="1"
aria-label="播放语速"
oninput="(function(el){{var w=el.closest('.tts-player-wrap'); var a=w&&w.querySelector('audio'); if(a){{a.playbackRate=parseFloat(el.value);}} var s=w&&w.querySelector('.tts-speed-val'); if(s){{s.textContent=parseFloat(el.value).toFixed(2)+'x';}}}})(this)">
oninput="ttsSyncSpeed(this)">
<span class="tts-speed-val">1.00x</span>
<a class="tts-dl-btn" href="{src}" download="{name}">⬇ 下载 WAV</a>
</div>
<p class="tts-player-tip">语速仅用于试听,下载的 WAV 仍为原速。</p>
<div class="tts-action-row">
<button type="button" class="tts-save-btn" onclick="ttsSaveAtSpeed(this)">💾 按当前语速保存</button>
<a class="tts-dl-btn" href="{src}" download="{name}">⬇ 原速下载</a>
<span class="tts-save-status"></span>
</div>
<p class="tts-player-tip">试听语速与保存文件一致;「原速下载」获取未变速的原始 WAV。</p>
</div>
"""
@@ -203,8 +208,8 @@ def _short_synth_log(msg: str, ok: bool) -> str:
segs = re.search(r"\s*(\d+)\s*段", msg)
if chars:
seg_note = f"{segs.group(1)} 段拼接" if segs else ""
return f"✅ 配音完成({chars.group(1)}{seg_note})。请用下方播放器试听调节语速下载。"
return "✅ 配音完成。请用下方播放器试听调节语速下载。"
return f"✅ 配音完成({chars.group(1)}{seg_note})。请用下方播放器试听调节语速后点「保存」下载。"
return "✅ 配音完成。请用下方播放器试听调节语速后点「保存」下载。"
def ui_synth_pending(polished_text: str) -> str:
@@ -423,6 +428,126 @@ PWA_HEAD = """
});
})();
</script>
<script>
(function () {
function audioBufferToWav(buffer) {
var numChannels = buffer.numberOfChannels;
var sampleRate = buffer.sampleRate;
var format = 1;
var bitDepth = 16;
var samples = buffer.length;
var blockAlign = numChannels * bitDepth / 8;
var byteRate = sampleRate * blockAlign;
var dataSize = samples * blockAlign;
var ab = new ArrayBuffer(44 + dataSize);
var view = new DataView(ab);
function writeStr(off, str) {
for (var i = 0; i < str.length; i++) view.setUint8(off + i, str.charCodeAt(i));
}
writeStr(0, "RIFF");
view.setUint32(4, 36 + dataSize, true);
writeStr(8, "WAVE");
writeStr(12, "fmt ");
view.setUint32(16, 16, true);
view.setUint16(20, format, true);
view.setUint16(22, numChannels, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, byteRate, true);
view.setUint16(32, blockAlign, true);
view.setUint16(34, bitDepth, true);
writeStr(36, "data");
view.setUint32(40, dataSize, true);
var offset = 44;
var chData = [];
for (var c = 0; c < numChannels; c++) chData.push(buffer.getChannelData(c));
for (var i = 0; i < samples; i++) {
for (var c = 0; c < numChannels; c++) {
var s = Math.max(-1, Math.min(1, chData[c][i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
offset += 2;
}
}
return new Blob([ab], { type: "audio/wav" });
}
window.ttsSyncSpeed = function (slider) {
var wrap = slider.closest(".tts-player-wrap");
if (!wrap) return;
var audio = wrap.querySelector("audio");
var label = wrap.querySelector(".tts-speed-val");
var speed = parseFloat(slider.value);
if (audio) audio.playbackRate = speed;
if (label) label.textContent = speed.toFixed(2) + "x";
};
window.ttsSaveAtSpeed = async function (btn) {
var wrap = btn.closest(".tts-player-wrap");
if (!wrap) return;
var audio = wrap.querySelector("audio");
var slider = wrap.querySelector(".tts-speed-slider");
var status = wrap.querySelector(".tts-save-status");
var speed = parseFloat(slider ? slider.value : "1");
var src = audio ? audio.currentSrc || audio.src : "";
var baseName = wrap.getAttribute("data-filename") || "voiceover.wav";
if (!src) {
if (status) status.textContent = "无音频";
return;
}
btn.disabled = true;
if (status) status.textContent = "正在生成…";
try {
if (Math.abs(speed - 1.0) < 0.001) {
var link0 = document.createElement("a");
link0.href = src;
link0.download = baseName;
document.body.appendChild(link0);
link0.click();
link0.remove();
if (status) status.textContent = "已保存(原速)";
return;
}
var resp = await fetch(src);
if (!resp.ok) throw new Error("HTTP " + resp.status);
var buf = await resp.arrayBuffer();
var Ctx = window.AudioContext || window.webkitAudioContext;
if (!Ctx) throw new Error("浏览器不支持 AudioContext");
var ctx = new Ctx();
var decoded = await ctx.decodeAudioData(buf.slice(0));
var newLen = Math.max(1, Math.ceil(decoded.length / speed));
var offline = new OfflineAudioContext(
decoded.numberOfChannels,
newLen,
decoded.sampleRate
);
var source = offline.createBufferSource();
source.buffer = decoded;
source.playbackRate.value = speed;
source.connect(offline.destination);
source.start(0);
var rendered = await offline.startRendering();
await ctx.close();
var blob = audioBufferToWav(rendered);
var url = URL.createObjectURL(blob);
var stem = baseName.replace(/\\.wav$/i, "");
var tag = speed.toFixed(2).replace(".", "");
var dlName = stem + "_" + tag + "x.wav";
var link = document.createElement("a");
link.href = url;
link.download = dlName;
document.body.appendChild(link);
link.click();
link.remove();
URL.revokeObjectURL(url);
if (status) status.textContent = "已保存 " + speed.toFixed(2) + "x";
} catch (err) {
if (status) status.textContent = "保存失败";
console.error("ttsSaveAtSpeed", err);
} finally {
btn.disabled = false;
}
};
})();
</script>
"""
MIC_HINT_HTML = """
@@ -1042,6 +1167,29 @@ gradio-app,
font-weight: 600 !important;
min-width: 48px !important;
}
.tts-action-row {
display: flex !important;
flex-wrap: wrap !important;
align-items: center !important;
gap: 10px 12px !important;
margin-top: 10px !important;
}
.tts-save-btn {
color: #ffffff !important;
background: #2563eb !important;
border: none !important;
padding: 8px 14px !important;
border-radius: 6px !important;
font-size: 0.88rem !important;
cursor: pointer !important;
}
.tts-save-btn:hover { background: #1d4ed8 !important; }
.tts-save-btn:disabled { opacity: 0.6 !important; cursor: wait !important; }
.tts-save-status {
color: #86efac !important;
font-size: 0.82rem !important;
min-width: 80px;
}
.tts-dl-btn {
color: #ffffff !important;
background: #374151 !important;