Fix AI output showing UTF-8 bytes as hex escapes instead of Chinese.

Decode <0xE5><0xA7><0xA4> style model output to proper characters; add prompt rule to use normal Chinese text. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-10 23:10:09 +08:00
parent b38b69cb71
commit 206673fd90
3 changed files with 36 additions and 5 deletions
@@ -1,4 +1,9 @@
 import type { AiRequestBody } from "@/lib/ai/types";
+import { decodeHexByteEscapes } from "@/lib/ai/decode-text";
+
+function emitText(text: string, onUpdate: (text: string) => void) {
+  onUpdate(decodeHexByteEscapes(text));
+}

 function parseApiError(text: string, status: number): string {
  const trimmed = text.trim();
@@ -41,11 +46,11 @@ export async function streamAiCompletion(
      break;
    }
    text += decoder.decode(value, { stream: true });
-    onUpdate(text);
+    emitText(text, onUpdate);
  }

  text += decoder.decode();
-  onUpdate(text);
+  emitText(text, onUpdate);

  if (!text.trim()) {
    throw new Error("AI 返回内容为空，请检查模型配置或稍后重试");
@@ -0,0 +1,20 @@
+/**
+ * 部分模型会把 UTF-8 汉字输出成 <0xE5><0xA7><0xA4> 形式，还原为正常文字。
+ * 例：<0xE5><0xA7><0xA4> → 姤
+ */
+export function decodeHexByteEscapes(text: string): string {
+  return text.replace(/(?:<0x[0-9A-Fa-f]{2}>)+/gi, (match) => {
+    const parts = match.match(/<0x([0-9A-Fa-f]{2})>/gi);
+    if (!parts?.length) {
+      return match;
+    }
+    const bytes = Uint8Array.from(
+      parts.map((part) => parseInt(part.slice(3, 5), 16)),
+    );
+    try {
+      return new TextDecoder("utf-8", { fatal: true }).decode(bytes);
+    } catch {
+      return new TextDecoder("utf-8", { fatal: false }).decode(bytes);
+    }
+  });
+}