From 401ee2f130f8475afd7e8f10cca11a712fbc58f5 Mon Sep 17 00:00:00 2001
From: dekun <dekun@local>
Date: Thu, 11 Jun 2026 06:27:06 +0800
Subject: [PATCH] fix: stabilize AI coach chat against truncation and empty
 replies

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 ai_client.py                         | 60 +++++++++++++++++++++-------
 manual_trading_hub/.env.example      |  7 +++-
 manual_trading_hub/hub_ai/chat.py    | 26 ++++++++++--
 manual_trading_hub/hub_ai/config.py  |  7 ++--
 manual_trading_hub/hub_ai/context.py |  2 +-
 manual_trading_hub/static/app.css    |  4 ++
 manual_trading_hub/static/app.js     | 37 +++++++++++++----
 7 files changed, 111 insertions(+), 32 deletions(-)

diff --git a/ai_client.py b/ai_client.py
index 71cc991..3ab2a88 100644
--- a/ai_client.py
+++ b/ai_client.py
@@ -124,11 +124,12 @@ def _openai_message_text(msg: dict) -> str:
     return text
 
 
-def _apply_max_tokens(body: dict, max_tokens: int | None) -> None:
+def _apply_max_tokens(body: dict, max_tokens: int | None, *, chat: bool = False) -> None:
     if max_tokens is not None and max_tokens > 0:
         mt = int(max_tokens)
         body["max_tokens"] = mt
-        body["max_completion_tokens"] = mt
+        if not chat:
+            body["max_completion_tokens"] = mt
 
 
 def _openai_chat_completion(
@@ -152,7 +153,7 @@ def _openai_chat_completion(
         "temperature": temperature,
         "stream": False,
     }
-    _apply_max_tokens(body, max_tokens)
+    _apply_max_tokens(body, max_tokens, chat=chat)
     r = requests.post(
         _openai_chat_url(),
         headers=headers,
@@ -167,9 +168,27 @@ def _openai_chat_completion(
     choice = choices[0] or {}
     msg = choice.get("message") or {}
     text = _openai_message_text(msg)
+    finish = str(choice.get("finish_reason") or "")
+    if not text and chat and max_tokens:
+        retry_body = dict(body)
+        retry_body.pop("max_completion_tokens", None)
+        r2 = requests.post(
+            _openai_chat_url(),
+            headers=headers,
+            json=retry_body,
+            timeout=_ai_timeout_seconds(image_count=image_count, chat=chat),
+        )
+        r2.raise_for_status()
+        data2 = r2.json()
+        choices2 = data2.get("choices") or []
+        if choices2:
+            msg2 = (choices2[0] or {}).get("message") or {}
+            text2 = _openai_message_text(msg2)
+            if text2:
+                return text2, str((choices2[0] or {}).get("finish_reason") or finish)
     if not text:
-        return "AI 生成失败：空内容", choice.get("finish_reason") or "error"
-    return text, str(choice.get("finish_reason") or "")
+        return "AI 生成失败：空内容", finish or "error"
+    return text, finish
 
 
 def _generate_openai(
@@ -264,7 +283,7 @@ _CHAT_CONTINUE_USER = (
 )
 _CHAT_END_CHARS = "。！？.!?\"」』）)>】"
 _INCOMPLETE_TAIL_RE = re.compile(
-    r"(会不会|是不是|够不够|能不能|要不要|如何|怎么|什么|哪里|多少|对吗|怎么样|"
+    r"(不会|不能|没有|会不会|是不是|够不够|能不能|要不要|如何|怎么|什么|哪里|多少|对吗|怎么样|"
     r"这个\.\.\.|这个…|\.\.\.\d+\.|\d+\.)$"
 )
 
@@ -291,7 +310,7 @@ def _should_continue(reason: str, full_text: str) -> bool:
 
 
 def _chat_continue_message(full_text: str) -> str:
-    tail = full_text[-900:] if len(full_text) > 900 else full_text
+    tail = full_text[-500:] if len(full_text) > 500 else full_text
     return (
         f"{_CHAT_CONTINUE_USER}\n\n"
         f"已写到最后这几句：\n「{tail}」\n\n"
@@ -299,6 +318,14 @@ def _chat_continue_message(full_text: str) -> str:
     )
 
 
+def _chat_continue_system(system: str) -> str:
+    return (
+        f"{system.strip()}\n\n"
+        "【续写模式】只输出断点后的剩余内容，不要重复前文；"
+        "列表每条单独一行；必须以句号、问号或感叹号收尾。"
+    )
+
+
 def ai_generate_chat(
     *,
     system: str,
@@ -306,9 +333,9 @@ def ai_generate_chat(
     temperature: float = 0.5,
     images_b64: Optional[Sequence[str]] = None,
     max_tokens: int = 8192,
-    max_continuations: int = 3,
+    max_continuations: int = 4,
 ) -> str:
-    """聊天专用：system/user 分消息；输出触顶时自动续写（携带已写全文）。"""
+    """聊天专用：system/user 分消息；输出触顶时轻量续写（不重复巨型上下文）。"""
     images = _collect_images(None, images_b64)
     max_rounds = max(1, int(max_continuations) + 1)
     try:
@@ -336,7 +363,7 @@ def ai_generate_chat(
                     messages,
                     temperature=temperature,
                     max_tokens=max_tokens,
-                    image_count=len(images),
+                    image_count=len(images) if attempt == 0 else 0,
                     chat=True,
                 )
                 if chunk.startswith("AI 调用失败") or chunk.startswith("AI 生成失败"):
@@ -346,8 +373,7 @@ def ai_generate_chat(
                 if not _should_continue(reason, full) or attempt >= max_rounds - 1:
                     break
                 messages = [
-                    {"role": "system", "content": system.strip()},
-                    base_user_msg,
+                    {"role": "system", "content": _chat_continue_system(system)},
                     {"role": "assistant", "content": full},
                     {"role": "user", "content": _chat_continue_message(full)},
                 ]
@@ -356,12 +382,14 @@ def ai_generate_chat(
         prompt = f"{system.strip()}\n\n---\n\n{user.strip()}"
         parts: list[str] = []
         for attempt in range(max_rounds):
-            current_prompt = prompt
             if parts:
                 full = "".join(parts)
                 current_prompt = (
-                    f"{prompt}\n\n【你已写道】\n{full}\n\n{_chat_continue_message(full)}"
+                    f"{_chat_continue_system(system)}\n\n"
+                    f"【你已写道】\n{full}\n\n{_chat_continue_message(full)}"
                 )
+            else:
+                current_prompt = prompt
             chunk, reason = _generate_ollama(
                 current_prompt,
                 images if not parts else [],
@@ -371,11 +399,13 @@ def ai_generate_chat(
             )
             if chunk.startswith("AI 生成失败") and not parts:
                 return chunk
+            if chunk.startswith("AI 生成失败"):
+                break
             parts.append(chunk)
             full = "".join(parts)
             if not _should_continue(reason, full) or attempt >= max_rounds - 1:
                 break
-        return "".join(parts).strip() or "AI 生成失败"
+        return "".join(parts).strip() or "AI 生成失败：空内容"
     except requests.HTTPError as e:
         detail = ""
         try:
diff --git a/manual_trading_hub/.env.example b/manual_trading_hub/.env.example
index 8ec4b43..8f4e1d2 100644
--- a/manual_trading_hub/.env.example
+++ b/manual_trading_hub/.env.example
@@ -85,9 +85,12 @@ HUB_TRUST_LAN=true
 # 与四实例相同变量名；默认 OpenAI 兼容网关（改 AI_PROVIDER=ollama 可走本机 Ollama）
 # 详见 manual_trading_hub/AI教练说明.md 与仓库根 AI复盘与模型配置说明.md
 AI_TIMEOUT_SECONDS=120
-# AI 教练聊天：单次输出 token 上限与截断自动续写次数（默认 8192 / 3）
+# AI 教练聊天（默认：输出 8192 token、续写 4 次、快照约 2 万字符、历史单条 1500 字）
 # CHAT_MAX_OUTPUT_TOKENS=8192
-# CHAT_MAX_CONTINUATIONS=8
+# CHAT_MAX_CONTINUATIONS=4
+# CHAT_CONTEXT_MAX_CHARS=20000
+# CHAT_SUMMARY_EXCERPT_MAX_CHARS=2000
+# CHAT_HISTORY_MAX_CHARS_PER_MSG=1500
 # CHAT_AI_TIMEOUT_SECONDS=300
 
 # AI 提供方：openai（默认，OpenAI 兼容网关）| ollama（本机 Ollama）
diff --git a/manual_trading_hub/hub_ai/chat.py b/manual_trading_hub/hub_ai/chat.py
index ceb7470..76af86c 100644
--- a/manual_trading_hub/hub_ai/chat.py
+++ b/manual_trading_hub/hub_ai/chat.py
@@ -7,6 +7,7 @@ from hub_ai.attachments import parse_chat_attachments
 from hub_ai.client import generate_text, model_label
 from hub_ai.config import (
     CHAT_CONTEXT_MAX_CHARS,
+    CHAT_HISTORY_MAX_CHARS_PER_MSG,
     CHAT_MAX_CONTINUATIONS,
     CHAT_MAX_HISTORY_TURNS,
     CHAT_MAX_OUTPUT_TOKENS,
@@ -25,17 +26,31 @@ from hub_ai.store import (
 )
 
 
-def _history_lines(messages: list[dict], max_turns: int = CHAT_MAX_HISTORY_TURNS) -> str:
+def _is_ai_error_reply(text: str) -> bool:
+    t = (text or "").strip()
+    return t.startswith("AI 调用失败") or t.startswith("AI 生成失败")
+
+
+def _history_lines(
+    messages: list[dict],
+    max_turns: int = CHAT_MAX_HISTORY_TURNS,
+    *,
+    max_chars_per_msg: int = 1500,
+) -> str:
     rows = [m for m in (messages or []) if m.get("role") in ("user", "assistant")]
     rows = rows[-max_turns * 2 :]
     lines = []
     for m in rows:
         role = "用户" if m.get("role") == "user" else "搭档"
-        content = m.get("content") or ""
+        content = str(m.get("content") or "").strip()
+        if m.get("role") == "assistant" and _is_ai_error_reply(content):
+            continue
         att = m.get("attachments") or []
         if att:
             names = "、".join(str(a.get("name") or "附件") for a in att[:3])
             content = f"{content} [附件: {names}]".strip()
+        if len(content) > max_chars_per_msg:
+            content = content[: max_chars_per_msg - 1].rstrip() + "…"
         lines.append(f"{role}：{content}")
     return "\n".join(lines)
 
@@ -79,7 +94,10 @@ def send_chat_message(
     day = ctx["trading_day"]
     session = ensure_active_session(trading_day=day)
     sid = session["id"]
-    history = _history_lines(session.get("messages") or [])
+    history = _history_lines(
+        session.get("messages") or [],
+        max_chars_per_msg=CHAT_HISTORY_MAX_CHARS_PER_MSG,
+    )
 
     append_chat_message(
         sid,
@@ -110,7 +128,7 @@ def send_chat_message(
         max_tokens=CHAT_MAX_OUTPUT_TOKENS,
         max_continuations=CHAT_MAX_CONTINUATIONS,
     )
-    if reply.startswith("AI 调用失败"):
+    if _is_ai_error_reply(reply):
         return {"ok": False, "msg": reply, "session_id": sid}
 
     session = append_chat_message(sid, "assistant", reply)
diff --git a/manual_trading_hub/hub_ai/config.py b/manual_trading_hub/hub_ai/config.py
index 8608db4..a1a84e3 100644
--- a/manual_trading_hub/hub_ai/config.py
+++ b/manual_trading_hub/hub_ai/config.py
@@ -17,9 +17,10 @@ SUMMARY_TEMPERATURE = 0.15
 CHAT_TEMPERATURE = 0.5
 CHAT_MAX_HISTORY_TURNS = 40
 CHAT_MAX_OUTPUT_TOKENS = _int_env("CHAT_MAX_OUTPUT_TOKENS", 8192)
-CHAT_MAX_CONTINUATIONS = _int_env("CHAT_MAX_CONTINUATIONS", 8)
-CHAT_CONTEXT_MAX_CHARS = 128_000
-CHAT_SUMMARY_EXCERPT_MAX_CHARS = 8000
+CHAT_MAX_CONTINUATIONS = _int_env("CHAT_MAX_CONTINUATIONS", 4)
+CHAT_CONTEXT_MAX_CHARS = _int_env("CHAT_CONTEXT_MAX_CHARS", 20_000)
+CHAT_SUMMARY_EXCERPT_MAX_CHARS = _int_env("CHAT_SUMMARY_EXCERPT_MAX_CHARS", 2000)
+CHAT_HISTORY_MAX_CHARS_PER_MSG = _int_env("CHAT_HISTORY_MAX_CHARS_PER_MSG", 1500)
 SUMMARY_RETENTION_DAYS = 90
 CHAT_SESSION_RETENTION_DAYS = 60
 FUND_HISTORY_DAYS = 180
diff --git a/manual_trading_hub/hub_ai/context.py b/manual_trading_hub/hub_ai/context.py
index 6961fc2..3877b07 100644
--- a/manual_trading_hub/hub_ai/context.py
+++ b/manual_trading_hub/hub_ai/context.py
@@ -767,7 +767,7 @@ def format_chat_context_for_chat(
     max_chars: int = CHAT_CONTEXT_MAX_CHARS,
 ) -> str:
     overview = format_chat_position_overview(payload)
-    body = str(payload.get("text") or "").strip() or format_context_text(payload)
+    body = format_chat_context_slim(payload)
     text = overview + "\n\n" + body
     if len(text) <= max_chars:
         return text
diff --git a/manual_trading_hub/static/app.css b/manual_trading_hub/static/app.css
index 0c4950c..f2c6594 100644
--- a/manual_trading_hub/static/app.css
+++ b/manual_trading_hub/static/app.css
@@ -4327,6 +4327,10 @@ body.hub-page-ai #page-ai {
   font-style: italic;
   animation: ai-think-pulse 1.2s ease-in-out infinite;
 }
+.ai-bubble-error {
+  border-color: color-mix(in srgb, var(--red) 55%, var(--border-soft));
+  color: var(--red);
+}
 @keyframes ai-think-pulse {
   0%,
   100% {
diff --git a/manual_trading_hub/static/app.js b/manual_trading_hub/static/app.js
index 9ed658f..d31240b 100644
--- a/manual_trading_hub/static/app.js
+++ b/manual_trading_hub/static/app.js
@@ -990,10 +990,7 @@
       btn.classList.toggle("is-active", on);
       btn.setAttribute("aria-selected", on ? "true" : "false");
     });
-    if (mobile && active === "chat") {
-      const box = document.getElementById("ai-chat-messages");
-      if (box) requestAnimationFrame(() => { box.scrollTop = box.scrollHeight; });
-    }
+    if (mobile && active === "chat") scrollAiChatToEnd();
   }
 
   function initAiMobileTabs() {
@@ -3271,12 +3268,34 @@
     ].join("");
   }
 
+  function scrollAiChatToEnd() {
+    const box = document.getElementById("ai-chat-messages");
+    if (!box) return;
+    const run = () => {
+      box.scrollTop = box.scrollHeight;
+      const rows = box.querySelectorAll(".ai-msg-row");
+      const last = rows[rows.length - 1];
+      if (last && last.scrollIntoView) {
+        try {
+          last.scrollIntoView({ block: "end", behavior: "auto" });
+        } catch (_) {
+          /* ignore */
+        }
+      }
+    };
+    requestAnimationFrame(() => requestAnimationFrame(run));
+  }
+
   function renderAiChatRow(role, content, extraClass, attachments) {
     const isUser = role === "user";
     const label = isUser ? "主人" : "AI教练";
     const rowCls = isUser ? "ai-msg-row-user" : "ai-msg-row-coach";
     const bubbleCls = isUser ? "ai-bubble-user" : "ai-bubble-assistant";
     const isThinking = extraClass && String(extraClass).includes("ai-bubble-thinking");
+    const isError =
+      !isUser &&
+      !isThinking &&
+      /^(AI 调用失败|AI 生成失败)/.test(String(content || "").trim());
     const bubbleInner = isUser || isThinking ? esc(content || "") : renderHubMarkdown(content || "");
     const mdCls = !isUser && !isThinking ? " ai-result-md" : "";
     const attList = Array.isArray(attachments) ? attachments : [];
@@ -3289,7 +3308,7 @@
       `<div class="ai-msg-row ${rowCls}">` +
       `<span class="ai-msg-role">${label}</span>` +
       `${attHtml}` +
-      `<div class="ai-bubble ${bubbleCls}${mdCls}${extraClass ? " " + extraClass : ""}">${bubbleInner}</div>` +
+      `<div class="ai-bubble ${bubbleCls}${mdCls}${isError ? " ai-bubble-error" : ""}${extraClass ? " " + extraClass : ""}">${bubbleInner}</div>` +
       `</div>`
     );
   }
@@ -3327,7 +3346,7 @@
       html += renderAiChatRow("assistant", "正在思考…", "ai-bubble-thinking");
     }
     box.innerHTML = html;
-    box.scrollTop = box.scrollHeight;
+    scrollAiChatToEnd();
   }
 
   function setAiChatBusy(busy) {
@@ -3452,7 +3471,11 @@
       }
     } catch (e) {
       showToast(String(e), true);
-      renderAiChatMessages(aiChatSessionCache);
+      try {
+        await loadAiChatSession();
+      } catch (_) {
+        renderAiChatMessages(aiChatSessionCache);
+      }
     } finally {
       setAiChatBusy(false);
     }