import httpx from sqlalchemy.orm import Session from app.core.config import settings as app_settings from app.models.user import SchoolLevel, SystemSettings from app.services.school_level import school_level_label from app.services.url_sanitize import sanitize_http_url, sanitize_model_name CURRICULUM_JUNIOR = """初中课程标准:代数、几何(全等/相似/勾股)、一次函数与简单二次函数、基础概率统计。 严禁使用:高中导数、向量、解析几何、排列组合进阶、复数、微积分、大学线性代数等。""" CURRICULUM_SENIOR = """高中课程标准:课内函数、三角、向量、解析几何、概率统计、导数(课内范围)等。 严禁使用:大学数学分析、抽象代数、高等几何、超出课内的竞赛高阶技巧。""" CURRICULUM_JUNIOR_OLYMPIAD = """初中奥数培优范围:整数/整除、因数分解、简单数论、代数恒等变形、几何辅助线与全等相似、简单组合计数。 严禁使用:高中及以上方法(导数、向量、解析几何、微积分、复数运算等)。""" CURRICULUM_SENIOR_OLYMPIAD = """高中奥数/竞赛入门范围:课内知识+常规竞赛技巧(不等式、构造、归纳、简单数论等)。 严禁使用:大学数学、超出高中奥数培优体系的 IMO 高阶理论。""" def _curriculum_block(level: SchoolLevel | str | None, olympiad: bool) -> str: label = school_level_label(level) is_senior = level == SchoolLevel.senior_high or level == "senior_high" if olympiad: return CURRICULUM_SENIOR_OLYMPIAD if is_senior else CURRICULUM_JUNIOR_OLYMPIAD return CURRICULUM_SENIOR if is_senior else CURRICULUM_JUNIOR QUESTION_PROMPT = """你是一位{stage}老师。以下是从试卷 OCR 识别出的文字,可能含有噪声。 科目:{subject} 请整理出清晰的题目内容(保留题号、选项、公式),只输出题目正文,不要解释。 OCR 原文: {ocr_text} """ SOLUTION_PROMPT = """你是一位耐心的{stage}{subject}老师。请像「作业帮」一样,先讲清楚解题思路,再给出完整解答。 【学段要求 — 严禁超纲】 {curriculum} 题目: {question_text} 请严格按以下 Markdown 结构输出: ## 解题思路 (2-5 句话:这题考什么、从哪里入手、关键一步是什么,让学生先懂「怎么想」) ## 详细解答 (分步骤完整推导,每步说明依据) ## 易错点 (指出常见错误及正确做法) 严禁使用超纲方法;若原题超纲,请给出{stage}课内可理解的解法。 """ OLYMPIAD_SOLUTION_PROMPT = """你是一位{stage}奥数教练。请像优秀辅导老师一样,先讲解题思路,再完整解答。 【奥数学段要求 — 严禁超纲】 {curriculum} 题目: {question_text} 请严格按以下 Markdown 结构输出: ## 解题思路 (点明题型、突破口、{stage}奥数常用技巧) ## 详细解答 (完整步骤) ## 关键技巧 (总结,仅限{stage}奥数范围) 严禁超纲;过难题给出{stage}可接受的培优思路。 """ ERROR_DETECT_PROMPT = """你是{stage}{subject}老师。以下是试卷/作业 OCR 识别结果,每行前有编号。 请找出「学生答错的部分」:错误答案、被打叉的作答、明显不正确的计算结果等。 {numbered_lines} 只输出 JSON,不要其他文字: {{"wrong_line_ids": [行编号整数列表]}} 若整张图就是一道错题,请标注含有错误答案或作答的行;找不到则标注最后作答行。 """ REVIEW_INSIGHT_PROMPT = """你是一位{stage}{subject}学习顾问。请仅根据下方「复盘数据」做分析,不得编造未出现的考试或状态。 【学段】{stage} 【科目】{subject}(所有建议必须贴合本科目,禁止套用其他科目的说法) 【复盘数据】 {review_records} 【状态含义(结合本科目理解)】 - 粗心:{careless_hint} - 不会:该科知识点/题型尚未掌握 - 紧张:心态影响,发挥低于平时水平 - 正常发挥:状态稳定 【科目建议方向】 {subject_hints} 【必须遵守】 1. 解读时必须写清具体考试日期(如 2026-06-21),按时间从早到晚分析,不得把「第1次」说成最近一场 2. 得分率 = 得分÷总分;95% 以上才可称「接近满分」,85% 左右应如实描述为「良好但仍有失分空间」,禁止夸大 3. 改进建议必须针对 {subject},禁止出现与本科目无关的表述(如英语科禁止写「计算验算」) 4. 只分析数据中列出的复盘状态,不要臆测未勾选的原因 请用 Markdown 输出: ## 情况解读 (2-4 句:按时间顺序说明每次考试得分率、失分与复盘状态的关系,以及是否有改善或反复) ## 改进建议 (3-5 条,针对出现最多的问题状态,具体可操作,仅限 {stage}{subject} 范围) ## 近期重点 (1-2 条本周可落实的小目标) 语气务实,不要空泛鸡汤。 """ SUBJECT_REVIEW_HINTS: dict[str, dict[str, str]] = { "语文": { "careless": "看错题干、漏读要求、作文偏题或漏写要点", "hints": "阅读审题、文言文/语言运用、作文结构与素材积累", }, "数学": { "careless": "审题不清、计算或抄错、步骤跳步", "hints": "错题归类、计算验算、典型题型归纳与限时练习", }, "英语": { "careless": "看错词义/时态、漏读题干、拼写与语法笔误", "hints": "词汇语法、阅读完形、听力与写作模板,禁止建议计算类训练", }, "物理": { "careless": "审题漏条件、公式代错、单位换算失误", "hints": "概念理解、建模分析、实验题与计算规范", }, "化学": { "careless": "方程式配平/条件遗漏、计算失误", "hints": "方程式、物质性质、实验与推断题", }, "生物": { "careless": "概念混淆、漏答得分点", "hints": "教材概念、图表分析、实验设计表述", }, "历史": { "careless": "材料题漏读、时间/人物混淆", "hints": "时间线、材料分析、论述题答题模板", }, "地理": { "careless": "读图漏信息、术语使用不当", "hints": "地图判读、区域分析、综合题答题条理", }, "政治": { "careless": "漏答采分点、概念表述不准", "hints": "时政结合、材料分析、观点表述规范", }, } def _subject_review_hints(subject: str) -> tuple[str, str]: block = SUBJECT_REVIEW_HINTS.get(subject) if block: return block["careless"], block["hints"] return "审题或作答细节失误", f"针对{subject}常见失分点制定练习与错题巩固" class AIConfig: def __init__( self, provider: str, ollama_base_url: str, ollama_model: str, openai_base_url: str, openai_model: str, openai_api_key: str | None, ): self.provider = provider self.ollama_base_url = ollama_base_url self.ollama_model = ollama_model self.openai_base_url = openai_base_url self.openai_model = openai_model self.openai_api_key = openai_api_key def load_ai_config(db: Session) -> AIConfig: row = db.get(SystemSettings, 1) if row is None: return AIConfig( provider="ollama", ollama_base_url=sanitize_http_url(app_settings.OLLAMA_BASE_URL), ollama_model=sanitize_model_name(app_settings.OLLAMA_MODEL), openai_base_url=sanitize_http_url(app_settings.OPENAI_BASE_URL), openai_model=sanitize_model_name(app_settings.OPENAI_MODEL), openai_api_key=None, ) return AIConfig( provider=row.ai_provider or "ollama", ollama_base_url=sanitize_http_url(row.ollama_base_url or app_settings.OLLAMA_BASE_URL), ollama_model=sanitize_model_name(row.ollama_model or app_settings.OLLAMA_MODEL), openai_base_url=sanitize_http_url(row.openai_base_url or app_settings.OPENAI_BASE_URL), openai_model=sanitize_model_name(row.openai_model or app_settings.OPENAI_MODEL), openai_api_key=row.openai_api_key, ) async def _ollama_generate(prompt: str, cfg: AIConfig, *, temperature: float = 0.3) -> str: url = f"{cfg.ollama_base_url.rstrip('/')}/api/generate" payload = { "model": cfg.ollama_model, "prompt": prompt, "stream": False, "options": {"temperature": temperature}, } async with httpx.AsyncClient(timeout=180.0) as client: response = await client.post(url, json=payload) response.raise_for_status() return (response.json().get("response") or "").strip() async def _openai_generate(prompt: str, cfg: AIConfig, *, temperature: float = 0.3) -> str: if not cfg.openai_api_key: raise ValueError("未配置 OpenAI API Key") url = f"{cfg.openai_base_url.rstrip('/')}/chat/completions" headers = {"Authorization": f"Bearer {cfg.openai_api_key}"} payload = { "model": cfg.openai_model, "messages": [{"role": "user", "content": prompt}], "temperature": temperature, } async with httpx.AsyncClient(timeout=180.0) as client: response = await client.post(url, json=payload, headers=headers) response.raise_for_status() data = response.json() return (data["choices"][0]["message"]["content"] or "").strip() async def generate_text(prompt: str, cfg: AIConfig, *, temperature: float = 0.3) -> str: if cfg.provider == "openai": return await _openai_generate(prompt, cfg, temperature=temperature) return await _ollama_generate(prompt, cfg, temperature=temperature) async def format_question( cfg: AIConfig, subject: str, ocr_text: str, school_level=None, ) -> str: stage = school_level_label(school_level) prompt = QUESTION_PROMPT.format(stage=stage, subject=subject, ocr_text=ocr_text) return await generate_text(prompt, cfg) async def generate_solution( cfg: AIConfig, subject: str, question_text: str, school_level=None, *, olympiad: bool = False, ) -> str: stage = school_level_label(school_level) curriculum = _curriculum_block(school_level, olympiad) template = OLYMPIAD_SOLUTION_PROMPT if olympiad else SOLUTION_PROMPT prompt = template.format( stage=stage, subject=subject, curriculum=curriculum, question_text=question_text, ) return await generate_text(prompt, cfg) async def detect_wrong_line_ids( cfg: AIConfig, subject: str, ocr_lines: list[dict], school_level=None, ) -> str: stage = school_level_label(school_level) numbered = "\n".join(f"[{i}] {line.get('text', '')}" for i, line in enumerate(ocr_lines)) prompt = ERROR_DETECT_PROMPT.format(stage=stage, subject=subject, numbered_lines=numbered) return await generate_text(prompt, cfg) async def generate_review_insight( cfg: AIConfig, subject: str, review_records: str, school_level=None, ) -> str: stage = school_level_label(school_level) careless_hint, subject_hints = _subject_review_hints(subject) prompt = REVIEW_INSIGHT_PROMPT.format( stage=stage, subject=subject, review_records=review_records, careless_hint=careless_hint, subject_hints=subject_hints, ) return await generate_text(prompt, cfg, temperature=0.2) CURRICULUM_CHINESE_JUNIOR = """初中作文:记叙文、写人记事、简单议论文为主,通常 600-800 字。 语言平实,素材来自课内与日常生活,禁止成人化腔调与超纲典故堆砌。""" CURRICULUM_CHINESE_SENIOR = """高中作文:记叙、议论、材料作文为主,通常 800-1000 字。 可适度展开论证,仍须符合课内要求,禁止大学论文式写法与超纲理论。""" COMPOSITION_PROMPT = """你是一位{stage}语文老师,正在辅导{grade_text}学生完成作文。 【学段年级 — 严禁超纲】 {curriculum} 作文题目: {topic} 请严格按以下 Markdown 结构输出(不要增加其他一级标题): ## 写作方案 (审题、立意、结构提纲、段落安排、可用素材方向,分条列出,贴合{grade_text}水平) ## 范文 (完整作文一篇,字数与语言风格必须符合{grade_text}课内要求,禁止超纲) 注意:范文必须是可直接参考的学生习作水准,不要写成评论或教案。 """ def _chinese_curriculum(level, grade: str | None) -> str: is_senior = level == SchoolLevel.senior_high or level == "senior_high" return CURRICULUM_CHINESE_SENIOR if is_senior else CURRICULUM_CHINESE_JUNIOR def _grade_text(grade: str | None) -> str: if grade and grade.strip(): return grade.strip() return "该学段学生" def split_composition_sections(text: str) -> tuple[str, str]: import re text = text.strip() if "## 范文" not in text: return text.replace("## 写作方案", "").strip(), "" parts = re.split(r"\n##\s*范文\s*\n", text, maxsplit=1) plan = parts[0].replace("## 写作方案", "").strip() essay = parts[1].strip() if len(parts) > 1 else "" return plan, essay async def generate_composition( cfg: AIConfig, topic: str, school_level=None, grade: str | None = None, ) -> tuple[str, str]: stage = school_level_label(school_level) grade_text = _grade_text(grade) curriculum = _chinese_curriculum(school_level, grade) prompt = COMPOSITION_PROMPT.format( stage=stage, grade_text=grade_text, curriculum=curriculum, topic=topic.strip(), ) full = await generate_text(prompt, cfg, temperature=0.35) return split_composition_sections(full) def composition_markdown(topic: str, writing_plan: str | None, sample_essay: str | None) -> str: parts = [f"# 作文题目\n\n{topic.strip()}", ""] if writing_plan: parts.extend(["## 写作方案", "", writing_plan.strip(), ""]) if sample_essay: parts.extend(["## 范文", "", sample_essay.strip(), ""]) return "\n".join(parts).strip() + "\n"