Fix AI output showing UTF-8 bytes as hex escapes instead of Chinese.
Decode <0xE5><0xA7><0xA4> style model output to proper characters; add prompt rule to use normal Chinese text. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -1,4 +1,9 @@
|
||||
import type { AiRequestBody } from "@/lib/ai/types";
|
||||
import { decodeHexByteEscapes } from "@/lib/ai/decode-text";
|
||||
|
||||
function emitText(text: string, onUpdate: (text: string) => void) {
|
||||
onUpdate(decodeHexByteEscapes(text));
|
||||
}
|
||||
|
||||
function parseApiError(text: string, status: number): string {
|
||||
const trimmed = text.trim();
|
||||
@@ -41,11 +46,11 @@ export async function streamAiCompletion(
|
||||
break;
|
||||
}
|
||||
text += decoder.decode(value, { stream: true });
|
||||
onUpdate(text);
|
||||
emitText(text, onUpdate);
|
||||
}
|
||||
|
||||
text += decoder.decode();
|
||||
onUpdate(text);
|
||||
emitText(text, onUpdate);
|
||||
|
||||
if (!text.trim()) {
|
||||
throw new Error("AI 返回内容为空,请检查模型配置或稍后重试");
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
/**
|
||||
* 部分模型会把 UTF-8 汉字输出成 <0xE5><0xA7><0xA4> 形式,还原为正常文字。
|
||||
* 例:<0xE5><0xA7><0xA4> → 姤
|
||||
*/
|
||||
export function decodeHexByteEscapes(text: string): string {
|
||||
return text.replace(/(?:<0x[0-9A-Fa-f]{2}>)+/gi, (match) => {
|
||||
const parts = match.match(/<0x([0-9A-Fa-f]{2})>/gi);
|
||||
if (!parts?.length) {
|
||||
return match;
|
||||
}
|
||||
const bytes = Uint8Array.from(
|
||||
parts.map((part) => parseInt(part.slice(3, 5), 16)),
|
||||
);
|
||||
try {
|
||||
return new TextDecoder("utf-8", { fatal: true }).decode(bytes);
|
||||
} catch {
|
||||
return new TextDecoder("utf-8", { fatal: false }).decode(bytes);
|
||||
}
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user