fix: prevent AI coach chat replies from truncating mid-sentence

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-11 00:44:08 +08:00
parent 0e2e360ccf
commit 6169fee7b9
5 changed files with 221 additions and 36 deletions
@@ -7,7 +7,7 @@ from __future__ import annotations

 import base64
 import os
-from typing import List, Optional, Sequence
+from typing import List, Optional, Sequence, Tuple

 import requests

@@ -19,7 +19,12 @@ def _env_str(name: str, default: str = "") -> str:
    return str(v).strip()


-def _ai_timeout_seconds(*, image_count: int = 0) -> int:
+def _ai_timeout_seconds(*, image_count: int = 0, chat: bool = False) -> int:
+    if chat:
+        try:
+            return max(30, int(_env_str("CHAT_AI_TIMEOUT_SECONDS", "300") or "300"))
+        except ValueError:
+            return 300
    if image_count > 0:
        try:
            return max(30, int(_env_str("AI_REVIEW_TIMEOUT_SECONDS", "300") or "300"))
@@ -104,6 +109,68 @@ def _openai_chat_url() -> str:
    return f"{base}/chat/completions"


+def _openai_message_text(msg: dict) -> str:
+    content = msg.get("content")
+    if isinstance(content, list):
+        parts: list[str] = []
+        for part in content:
+            if isinstance(part, dict) and part.get("type") == "text":
+                parts.append(str(part.get("text") or ""))
+        content = "".join(parts)
+    text = str(content or "").strip()
+    if not text:
+        text = str(msg.get("reasoning_content") or "").strip()
+    return text
+
+
+def _apply_max_tokens(body: dict, max_tokens: int | None) -> None:
+    if max_tokens is not None and max_tokens > 0:
+        mt = int(max_tokens)
+        body["max_tokens"] = mt
+        body["max_completion_tokens"] = mt
+
+
+def _openai_chat_completion(
+    messages: list[dict],
+    *,
+    temperature: float,
+    max_tokens: int | None = None,
+    image_count: int = 0,
+    chat: bool = False,
+) -> Tuple[str, str]:
+    api_key = _openai_api_key()
+    if not api_key:
+        return "AI 调用失败：未配置 OPENAI_API_KEY（请在当前实例目录 .env 中设置，修改后需重启服务）", "error"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+    body: dict = {
+        "model": _openai_model(),
+        "messages": messages,
+        "temperature": temperature,
+        "stream": False,
+    }
+    _apply_max_tokens(body, max_tokens)
+    r = requests.post(
+        _openai_chat_url(),
+        headers=headers,
+        json=body,
+        timeout=_ai_timeout_seconds(image_count=image_count, chat=chat),
+    )
+    r.raise_for_status()
+    data = r.json()
+    choices = data.get("choices") or []
+    if not choices:
+        return "AI 生成失败：响应无 choices", "error"
+    choice = choices[0] or {}
+    msg = choice.get("message") or {}
+    text = _openai_message_text(msg)
+    if not text:
+        return "AI 生成失败：空内容", choice.get("finish_reason") or "error"
+    return text, str(choice.get("finish_reason") or "")
+
+
 def _generate_openai(
    prompt: str,
    images: List[tuple],
@@ -111,13 +178,6 @@ def _generate_openai(
    *,
    max_tokens: int | None = None,
 ) -> str:
-    api_key = _openai_api_key()
-    if not api_key:
-        return "AI 调用失败：未配置 OPENAI_API_KEY（请在当前实例目录 .env 中设置，修改后需重启服务）"
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json",
-    }
    if images:
        content: List[dict] = [{"type": "text", "text": prompt}]
        for b64, mime in images:
@@ -130,27 +190,13 @@ def _generate_openai(
        messages = [{"role": "user", "content": content}]
    else:
        messages = [{"role": "user", "content": prompt}]
-    body: dict = {
-        "model": _openai_model(),
-        "messages": messages,
-        "temperature": temperature,
-        "stream": False,
-    }
-    if max_tokens is not None and max_tokens > 0:
-        body["max_tokens"] = int(max_tokens)
-    r = requests.post(
-        _openai_chat_url(),
-        headers=headers,
-        json=body,
-        timeout=_ai_timeout_seconds(image_count=len(images)),
+    text, _reason = _openai_chat_completion(
+        messages,
+        temperature=temperature,
+        max_tokens=max_tokens,
+        image_count=len(images),
    )
-    r.raise_for_status()
-    data = r.json()
-    choices = data.get("choices") or []
-    if not choices:
-        return "AI 生成失败：响应无 choices"
-    msg = choices[0].get("message") or {}
-    return (msg.get("content") or "").strip() or "AI 生成失败：空内容"
+    return text


 def _generate_ollama(
@@ -159,7 +205,8 @@ def _generate_ollama(
    temperature: float,
    *,
    max_tokens: int | None = None,
-) -> str:
+    chat: bool = False,
+) -> Tuple[str, str]:
    options: dict = {"temperature": temperature}
    if max_tokens is not None and max_tokens > 0:
        options["num_predict"] = int(max_tokens)
@@ -171,9 +218,15 @@ def _generate_ollama(
    }
    if images:
        payload["images"] = [b64 for b64, _mime in images]
-    r = requests.post(_ollama_api(), json=payload, timeout=_ai_timeout_seconds(image_count=len(images)))
+    r = requests.post(
+        _ollama_api(),
+        json=payload,
+        timeout=_ai_timeout_seconds(image_count=len(images), chat=chat),
+    )
    r.raise_for_status()
-    return (r.json().get("response") or "").strip() or "AI 生成失败"
+    data = r.json()
+    text = (data.get("response") or "").strip() or "AI 生成失败"
+    return text, str(data.get("done_reason") or "")


 def ai_generate(
@@ -189,7 +242,115 @@ def ai_generate(
    try:
        if _use_openai():
            return _generate_openai(prompt, images, temperature, max_tokens=max_tokens)
-        return _generate_ollama(prompt, images, temperature, max_tokens=max_tokens)
+        text, _reason = _generate_ollama(prompt, images, temperature, max_tokens=max_tokens)
+        return text
+    except requests.HTTPError as e:
+        detail = ""
+        try:
+            detail = (e.response.text or "")[:500]
+        except Exception:
+            pass
+        prov = "OpenAI" if _use_openai() else "Ollama"
+        return f"AI 调用失败（{prov} HTTP {e.response.status_code if e.response else '?'}）：{detail or str(e)}"
+    except Exception as e:
+        prov = "OpenAI" if _use_openai() else "Ollama"
+        return f"AI 调用失败（{prov}）：{str(e)}"
+
+
+_CHAT_CONTINUE_USER = (
+    "你上一条回复在中途截断了。请从断点处继续写完，不要重复已写内容，"
+    "保持同一语气，写完给出完整结尾。"
+)
+_CHAT_END_CHARS = "。！？.!?\"」』）)>】"
+
+
+def _looks_truncated(text: str) -> bool:
+    t = (text or "").rstrip()
+    if len(t) < 48:
+        return False
+    if t[-1] in _CHAT_END_CHARS:
+        return False
+    if t.endswith("…") or t.endswith("..."):
+        return True
+    return t[-1] not in "，、,;；:：\n"
+
+
+def _should_continue(reason: str, chunk: str) -> bool:
+    if reason == "length":
+        return True
+    return _looks_truncated(chunk)
+
+
+def ai_generate_chat(
+    *,
+    system: str,
+    user: str,
+    temperature: float = 0.5,
+    images_b64: Optional[Sequence[str]] = None,
+    max_tokens: int = 8192,
+    max_continuations: int = 3,
+) -> str:
+    """聊天专用：system/user 分消息；输出触顶时自动续写。"""
+    images = _collect_images(None, images_b64)
+    try:
+        if _use_openai():
+            messages: list[dict] = [
+                {"role": "system", "content": system.strip()},
+            ]
+            if images:
+                content: List[dict] = [{"type": "text", "text": user.strip()}]
+                for b64, mime in images:
+                    content.append(
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:{mime};base64,{b64}"},
+                        }
+                    )
+                messages.append({"role": "user", "content": content})
+            else:
+                messages.append({"role": "user", "content": user.strip()})
+
+            parts: list[str] = []
+            for _ in range(max(1, int(max_continuations) + 1)):
+                chunk, reason = _openai_chat_completion(
+                    messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    image_count=len(images),
+                    chat=True,
+                )
+                if chunk.startswith("AI 调用失败") or chunk.startswith("AI 生成失败"):
+                    return chunk if not parts else "".join(parts)
+                parts.append(chunk)
+                if not _should_continue(reason, chunk):
+                    break
+                messages.append({"role": "assistant", "content": chunk})
+                messages.append({"role": "user", "content": _CHAT_CONTINUE_USER})
+            return "".join(parts).strip() or "AI 生成失败：空内容"
+
+        prompt = f"{system.strip()}\n\n---\n\n{user.strip()}"
+        parts = []
+        current_prompt = prompt
+        for _ in range(max(1, int(max_continuations) + 1)):
+            chunk, reason = _generate_ollama(
+                current_prompt,
+                images if not parts else [],
+                temperature,
+                max_tokens=max_tokens,
+                chat=True,
+            )
+            if chunk.startswith("AI 生成失败") and not parts:
+                return chunk
+            parts.append(chunk)
+            if not _should_continue(reason, chunk):
+                break
+            tail = chunk[-400:] if len(chunk) > 400 else chunk
+            current_prompt = (
+                f"{prompt}\n\n{''.join(parts)}\n\n"
+                f"{_CHAT_CONTINUE_USER}\n\n"
+                f"（已写结尾片段供衔接：…{tail}）"
+            )
+        return "".join(parts).strip() or "AI 生成失败"
    except requests.HTTPError as e:
        detail = ""
        try: