fix: prevent AI coach chat replies from truncating mid-sentence
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
+194
-33
@@ -7,7 +7,7 @@ from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import os
|
||||
from typing import List, Optional, Sequence
|
||||
from typing import List, Optional, Sequence, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
@@ -19,7 +19,12 @@ def _env_str(name: str, default: str = "") -> str:
|
||||
return str(v).strip()
|
||||
|
||||
|
||||
def _ai_timeout_seconds(*, image_count: int = 0) -> int:
|
||||
def _ai_timeout_seconds(*, image_count: int = 0, chat: bool = False) -> int:
|
||||
if chat:
|
||||
try:
|
||||
return max(30, int(_env_str("CHAT_AI_TIMEOUT_SECONDS", "300") or "300"))
|
||||
except ValueError:
|
||||
return 300
|
||||
if image_count > 0:
|
||||
try:
|
||||
return max(30, int(_env_str("AI_REVIEW_TIMEOUT_SECONDS", "300") or "300"))
|
||||
@@ -104,6 +109,68 @@ def _openai_chat_url() -> str:
|
||||
return f"{base}/chat/completions"
|
||||
|
||||
|
||||
def _openai_message_text(msg: dict) -> str:
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for part in content:
|
||||
if isinstance(part, dict) and part.get("type") == "text":
|
||||
parts.append(str(part.get("text") or ""))
|
||||
content = "".join(parts)
|
||||
text = str(content or "").strip()
|
||||
if not text:
|
||||
text = str(msg.get("reasoning_content") or "").strip()
|
||||
return text
|
||||
|
||||
|
||||
def _apply_max_tokens(body: dict, max_tokens: int | None) -> None:
|
||||
if max_tokens is not None and max_tokens > 0:
|
||||
mt = int(max_tokens)
|
||||
body["max_tokens"] = mt
|
||||
body["max_completion_tokens"] = mt
|
||||
|
||||
|
||||
def _openai_chat_completion(
|
||||
messages: list[dict],
|
||||
*,
|
||||
temperature: float,
|
||||
max_tokens: int | None = None,
|
||||
image_count: int = 0,
|
||||
chat: bool = False,
|
||||
) -> Tuple[str, str]:
|
||||
api_key = _openai_api_key()
|
||||
if not api_key:
|
||||
return "AI 调用失败:未配置 OPENAI_API_KEY(请在当前实例目录 .env 中设置,修改后需重启服务)", "error"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
body: dict = {
|
||||
"model": _openai_model(),
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"stream": False,
|
||||
}
|
||||
_apply_max_tokens(body, max_tokens)
|
||||
r = requests.post(
|
||||
_openai_chat_url(),
|
||||
headers=headers,
|
||||
json=body,
|
||||
timeout=_ai_timeout_seconds(image_count=image_count, chat=chat),
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
choices = data.get("choices") or []
|
||||
if not choices:
|
||||
return "AI 生成失败:响应无 choices", "error"
|
||||
choice = choices[0] or {}
|
||||
msg = choice.get("message") or {}
|
||||
text = _openai_message_text(msg)
|
||||
if not text:
|
||||
return "AI 生成失败:空内容", choice.get("finish_reason") or "error"
|
||||
return text, str(choice.get("finish_reason") or "")
|
||||
|
||||
|
||||
def _generate_openai(
|
||||
prompt: str,
|
||||
images: List[tuple],
|
||||
@@ -111,13 +178,6 @@ def _generate_openai(
|
||||
*,
|
||||
max_tokens: int | None = None,
|
||||
) -> str:
|
||||
api_key = _openai_api_key()
|
||||
if not api_key:
|
||||
return "AI 调用失败:未配置 OPENAI_API_KEY(请在当前实例目录 .env 中设置,修改后需重启服务)"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
if images:
|
||||
content: List[dict] = [{"type": "text", "text": prompt}]
|
||||
for b64, mime in images:
|
||||
@@ -130,27 +190,13 @@ def _generate_openai(
|
||||
messages = [{"role": "user", "content": content}]
|
||||
else:
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
body: dict = {
|
||||
"model": _openai_model(),
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"stream": False,
|
||||
}
|
||||
if max_tokens is not None and max_tokens > 0:
|
||||
body["max_tokens"] = int(max_tokens)
|
||||
r = requests.post(
|
||||
_openai_chat_url(),
|
||||
headers=headers,
|
||||
json=body,
|
||||
timeout=_ai_timeout_seconds(image_count=len(images)),
|
||||
text, _reason = _openai_chat_completion(
|
||||
messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
image_count=len(images),
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
choices = data.get("choices") or []
|
||||
if not choices:
|
||||
return "AI 生成失败:响应无 choices"
|
||||
msg = choices[0].get("message") or {}
|
||||
return (msg.get("content") or "").strip() or "AI 生成失败:空内容"
|
||||
return text
|
||||
|
||||
|
||||
def _generate_ollama(
|
||||
@@ -159,7 +205,8 @@ def _generate_ollama(
|
||||
temperature: float,
|
||||
*,
|
||||
max_tokens: int | None = None,
|
||||
) -> str:
|
||||
chat: bool = False,
|
||||
) -> Tuple[str, str]:
|
||||
options: dict = {"temperature": temperature}
|
||||
if max_tokens is not None and max_tokens > 0:
|
||||
options["num_predict"] = int(max_tokens)
|
||||
@@ -171,9 +218,15 @@ def _generate_ollama(
|
||||
}
|
||||
if images:
|
||||
payload["images"] = [b64 for b64, _mime in images]
|
||||
r = requests.post(_ollama_api(), json=payload, timeout=_ai_timeout_seconds(image_count=len(images)))
|
||||
r = requests.post(
|
||||
_ollama_api(),
|
||||
json=payload,
|
||||
timeout=_ai_timeout_seconds(image_count=len(images), chat=chat),
|
||||
)
|
||||
r.raise_for_status()
|
||||
return (r.json().get("response") or "").strip() or "AI 生成失败"
|
||||
data = r.json()
|
||||
text = (data.get("response") or "").strip() or "AI 生成失败"
|
||||
return text, str(data.get("done_reason") or "")
|
||||
|
||||
|
||||
def ai_generate(
|
||||
@@ -189,7 +242,115 @@ def ai_generate(
|
||||
try:
|
||||
if _use_openai():
|
||||
return _generate_openai(prompt, images, temperature, max_tokens=max_tokens)
|
||||
return _generate_ollama(prompt, images, temperature, max_tokens=max_tokens)
|
||||
text, _reason = _generate_ollama(prompt, images, temperature, max_tokens=max_tokens)
|
||||
return text
|
||||
except requests.HTTPError as e:
|
||||
detail = ""
|
||||
try:
|
||||
detail = (e.response.text or "")[:500]
|
||||
except Exception:
|
||||
pass
|
||||
prov = "OpenAI" if _use_openai() else "Ollama"
|
||||
return f"AI 调用失败({prov} HTTP {e.response.status_code if e.response else '?'}):{detail or str(e)}"
|
||||
except Exception as e:
|
||||
prov = "OpenAI" if _use_openai() else "Ollama"
|
||||
return f"AI 调用失败({prov}):{str(e)}"
|
||||
|
||||
|
||||
_CHAT_CONTINUE_USER = (
|
||||
"你上一条回复在中途截断了。请从断点处继续写完,不要重复已写内容,"
|
||||
"保持同一语气,写完给出完整结尾。"
|
||||
)
|
||||
_CHAT_END_CHARS = "。!?.!?\"」』))>】"
|
||||
|
||||
|
||||
def _looks_truncated(text: str) -> bool:
|
||||
t = (text or "").rstrip()
|
||||
if len(t) < 48:
|
||||
return False
|
||||
if t[-1] in _CHAT_END_CHARS:
|
||||
return False
|
||||
if t.endswith("…") or t.endswith("..."):
|
||||
return True
|
||||
return t[-1] not in ",、,;;::\n"
|
||||
|
||||
|
||||
def _should_continue(reason: str, chunk: str) -> bool:
|
||||
if reason == "length":
|
||||
return True
|
||||
return _looks_truncated(chunk)
|
||||
|
||||
|
||||
def ai_generate_chat(
|
||||
*,
|
||||
system: str,
|
||||
user: str,
|
||||
temperature: float = 0.5,
|
||||
images_b64: Optional[Sequence[str]] = None,
|
||||
max_tokens: int = 8192,
|
||||
max_continuations: int = 3,
|
||||
) -> str:
|
||||
"""聊天专用:system/user 分消息;输出触顶时自动续写。"""
|
||||
images = _collect_images(None, images_b64)
|
||||
try:
|
||||
if _use_openai():
|
||||
messages: list[dict] = [
|
||||
{"role": "system", "content": system.strip()},
|
||||
]
|
||||
if images:
|
||||
content: List[dict] = [{"type": "text", "text": user.strip()}]
|
||||
for b64, mime in images:
|
||||
content.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:{mime};base64,{b64}"},
|
||||
}
|
||||
)
|
||||
messages.append({"role": "user", "content": content})
|
||||
else:
|
||||
messages.append({"role": "user", "content": user.strip()})
|
||||
|
||||
parts: list[str] = []
|
||||
for _ in range(max(1, int(max_continuations) + 1)):
|
||||
chunk, reason = _openai_chat_completion(
|
||||
messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
image_count=len(images),
|
||||
chat=True,
|
||||
)
|
||||
if chunk.startswith("AI 调用失败") or chunk.startswith("AI 生成失败"):
|
||||
return chunk if not parts else "".join(parts)
|
||||
parts.append(chunk)
|
||||
if not _should_continue(reason, chunk):
|
||||
break
|
||||
messages.append({"role": "assistant", "content": chunk})
|
||||
messages.append({"role": "user", "content": _CHAT_CONTINUE_USER})
|
||||
return "".join(parts).strip() or "AI 生成失败:空内容"
|
||||
|
||||
prompt = f"{system.strip()}\n\n---\n\n{user.strip()}"
|
||||
parts = []
|
||||
current_prompt = prompt
|
||||
for _ in range(max(1, int(max_continuations) + 1)):
|
||||
chunk, reason = _generate_ollama(
|
||||
current_prompt,
|
||||
images if not parts else [],
|
||||
temperature,
|
||||
max_tokens=max_tokens,
|
||||
chat=True,
|
||||
)
|
||||
if chunk.startswith("AI 生成失败") and not parts:
|
||||
return chunk
|
||||
parts.append(chunk)
|
||||
if not _should_continue(reason, chunk):
|
||||
break
|
||||
tail = chunk[-400:] if len(chunk) > 400 else chunk
|
||||
current_prompt = (
|
||||
f"{prompt}\n\n{''.join(parts)}\n\n"
|
||||
f"{_CHAT_CONTINUE_USER}\n\n"
|
||||
f"(已写结尾片段供衔接:…{tail})"
|
||||
)
|
||||
return "".join(parts).strip() or "AI 生成失败"
|
||||
except requests.HTTPError as e:
|
||||
detail = ""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user