Files
secondary-school-grade-archive/backend/app/services/llm.py
T
dekun 1cb3c7fad5 新增作文区与 AI 解读开关,修复 CSV 导出。
系统设置可关闭成绩复盘 AI;学生详情增加作文区(OCR/手动题目、方案与范文、历史与 MD 下载);导出改用 UTF-8 文件名响应。

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-28 17:42:17 +08:00

386 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import httpx
from sqlalchemy.orm import Session
from app.core.config import settings as app_settings
from app.models.user import SchoolLevel, SystemSettings
from app.services.school_level import school_level_label
from app.services.url_sanitize import sanitize_http_url, sanitize_model_name
CURRICULUM_JUNIOR = """初中课程标准:代数、几何(全等/相似/勾股)、一次函数与简单二次函数、基础概率统计。
严禁使用:高中导数、向量、解析几何、排列组合进阶、复数、微积分、大学线性代数等。"""
CURRICULUM_SENIOR = """高中课程标准:课内函数、三角、向量、解析几何、概率统计、导数(课内范围)等。
严禁使用:大学数学分析、抽象代数、高等几何、超出课内的竞赛高阶技巧。"""
CURRICULUM_JUNIOR_OLYMPIAD = """初中奥数培优范围:整数/整除、因数分解、简单数论、代数恒等变形、几何辅助线与全等相似、简单组合计数。
严禁使用:高中及以上方法(导数、向量、解析几何、微积分、复数运算等)。"""
CURRICULUM_SENIOR_OLYMPIAD = """高中奥数/竞赛入门范围:课内知识+常规竞赛技巧(不等式、构造、归纳、简单数论等)。
严禁使用:大学数学、超出高中奥数培优体系的 IMO 高阶理论。"""
def _curriculum_block(level: SchoolLevel | str | None, olympiad: bool) -> str:
label = school_level_label(level)
is_senior = level == SchoolLevel.senior_high or level == "senior_high"
if olympiad:
return CURRICULUM_SENIOR_OLYMPIAD if is_senior else CURRICULUM_JUNIOR_OLYMPIAD
return CURRICULUM_SENIOR if is_senior else CURRICULUM_JUNIOR
QUESTION_PROMPT = """你是一位{stage}老师。以下是从试卷 OCR 识别出的文字,可能含有噪声。
科目:{subject}
请整理出清晰的题目内容(保留题号、选项、公式),只输出题目正文,不要解释。
OCR 原文:
{ocr_text}
"""
SOLUTION_PROMPT = """你是一位耐心的{stage}{subject}老师。请像「作业帮」一样,先讲清楚解题思路,再给出完整解答。
【学段要求 — 严禁超纲】
{curriculum}
题目:
{question_text}
请严格按以下 Markdown 结构输出:
## 解题思路
(2-5 句话:这题考什么、从哪里入手、关键一步是什么,让学生先懂「怎么想」)
## 详细解答
(分步骤完整推导,每步说明依据)
## 易错点
(指出常见错误及正确做法)
严禁使用超纲方法;若原题超纲,请给出{stage}课内可理解的解法。
"""
OLYMPIAD_SOLUTION_PROMPT = """你是一位{stage}奥数教练。请像优秀辅导老师一样,先讲解题思路,再完整解答。
【奥数学段要求 — 严禁超纲】
{curriculum}
题目:
{question_text}
请严格按以下 Markdown 结构输出:
## 解题思路
(点明题型、突破口、{stage}奥数常用技巧)
## 详细解答
(完整步骤)
## 关键技巧
(总结,仅限{stage}奥数范围)
严禁超纲;过难题给出{stage}可接受的培优思路。
"""
ERROR_DETECT_PROMPT = """你是{stage}{subject}老师。以下是试卷/作业 OCR 识别结果,每行前有编号。
请找出「学生答错的部分」:错误答案、被打叉的作答、明显不正确的计算结果等。
{numbered_lines}
只输出 JSON,不要其他文字:
{{"wrong_line_ids": [行编号整数列表]}}
若整张图就是一道错题,请标注含有错误答案或作答的行;找不到则标注最后作答行。
"""
REVIEW_INSIGHT_PROMPT = """你是一位{stage}{subject}学习顾问。请仅根据下方「复盘数据」做分析,不得编造未出现的考试或状态。
【学段】{stage}
【科目】{subject}(所有建议必须贴合本科目,禁止套用其他科目的说法)
【复盘数据】
{review_records}
【状态含义(结合本科目理解)】
- 粗心:{careless_hint}
- 不会:该科知识点/题型尚未掌握
- 紧张:心态影响,发挥低于平时水平
- 正常发挥:状态稳定
【科目建议方向】
{subject_hints}
【必须遵守】
1. 解读时必须写清具体考试日期(如 2026-06-21),按时间从早到晚分析,不得把「第1次」说成最近一场
2. 得分率 = 得分÷总分;95% 以上才可称「接近满分」,85% 左右应如实描述为「良好但仍有失分空间」,禁止夸大
3. 改进建议必须针对 {subject},禁止出现与本科目无关的表述(如英语科禁止写「计算验算」)
4. 只分析数据中列出的复盘状态,不要臆测未勾选的原因
请用 Markdown 输出:
## 情况解读
(2-4 句:按时间顺序说明每次考试得分率、失分与复盘状态的关系,以及是否有改善或反复)
## 改进建议
(3-5 条,针对出现最多的问题状态,具体可操作,仅限 {stage}{subject} 范围)
## 近期重点
1-2 条本周可落实的小目标)
语气务实,不要空泛鸡汤。
"""
SUBJECT_REVIEW_HINTS: dict[str, dict[str, str]] = {
"语文": {
"careless": "看错题干、漏读要求、作文偏题或漏写要点",
"hints": "阅读审题、文言文/语言运用、作文结构与素材积累",
},
"数学": {
"careless": "审题不清、计算或抄错、步骤跳步",
"hints": "错题归类、计算验算、典型题型归纳与限时练习",
},
"英语": {
"careless": "看错词义/时态、漏读题干、拼写与语法笔误",
"hints": "词汇语法、阅读完形、听力与写作模板,禁止建议计算类训练",
},
"物理": {
"careless": "审题漏条件、公式代错、单位换算失误",
"hints": "概念理解、建模分析、实验题与计算规范",
},
"化学": {
"careless": "方程式配平/条件遗漏、计算失误",
"hints": "方程式、物质性质、实验与推断题",
},
"生物": {
"careless": "概念混淆、漏答得分点",
"hints": "教材概念、图表分析、实验设计表述",
},
"历史": {
"careless": "材料题漏读、时间/人物混淆",
"hints": "时间线、材料分析、论述题答题模板",
},
"地理": {
"careless": "读图漏信息、术语使用不当",
"hints": "地图判读、区域分析、综合题答题条理",
},
"政治": {
"careless": "漏答采分点、概念表述不准",
"hints": "时政结合、材料分析、观点表述规范",
},
}
def _subject_review_hints(subject: str) -> tuple[str, str]:
block = SUBJECT_REVIEW_HINTS.get(subject)
if block:
return block["careless"], block["hints"]
return "审题或作答细节失误", f"针对{subject}常见失分点制定练习与错题巩固"
class AIConfig:
def __init__(
self,
provider: str,
ollama_base_url: str,
ollama_model: str,
openai_base_url: str,
openai_model: str,
openai_api_key: str | None,
):
self.provider = provider
self.ollama_base_url = ollama_base_url
self.ollama_model = ollama_model
self.openai_base_url = openai_base_url
self.openai_model = openai_model
self.openai_api_key = openai_api_key
def load_ai_config(db: Session) -> AIConfig:
row = db.get(SystemSettings, 1)
if row is None:
return AIConfig(
provider="ollama",
ollama_base_url=sanitize_http_url(app_settings.OLLAMA_BASE_URL),
ollama_model=sanitize_model_name(app_settings.OLLAMA_MODEL),
openai_base_url=sanitize_http_url(app_settings.OPENAI_BASE_URL),
openai_model=sanitize_model_name(app_settings.OPENAI_MODEL),
openai_api_key=None,
)
return AIConfig(
provider=row.ai_provider or "ollama",
ollama_base_url=sanitize_http_url(row.ollama_base_url or app_settings.OLLAMA_BASE_URL),
ollama_model=sanitize_model_name(row.ollama_model or app_settings.OLLAMA_MODEL),
openai_base_url=sanitize_http_url(row.openai_base_url or app_settings.OPENAI_BASE_URL),
openai_model=sanitize_model_name(row.openai_model or app_settings.OPENAI_MODEL),
openai_api_key=row.openai_api_key,
)
async def _ollama_generate(prompt: str, cfg: AIConfig, *, temperature: float = 0.3) -> str:
url = f"{cfg.ollama_base_url.rstrip('/')}/api/generate"
payload = {
"model": cfg.ollama_model,
"prompt": prompt,
"stream": False,
"options": {"temperature": temperature},
}
async with httpx.AsyncClient(timeout=180.0) as client:
response = await client.post(url, json=payload)
response.raise_for_status()
return (response.json().get("response") or "").strip()
async def _openai_generate(prompt: str, cfg: AIConfig, *, temperature: float = 0.3) -> str:
if not cfg.openai_api_key:
raise ValueError("未配置 OpenAI API Key")
url = f"{cfg.openai_base_url.rstrip('/')}/chat/completions"
headers = {"Authorization": f"Bearer {cfg.openai_api_key}"}
payload = {
"model": cfg.openai_model,
"messages": [{"role": "user", "content": prompt}],
"temperature": temperature,
}
async with httpx.AsyncClient(timeout=180.0) as client:
response = await client.post(url, json=payload, headers=headers)
response.raise_for_status()
data = response.json()
return (data["choices"][0]["message"]["content"] or "").strip()
async def generate_text(prompt: str, cfg: AIConfig, *, temperature: float = 0.3) -> str:
if cfg.provider == "openai":
return await _openai_generate(prompt, cfg, temperature=temperature)
return await _ollama_generate(prompt, cfg, temperature=temperature)
async def format_question(
cfg: AIConfig,
subject: str,
ocr_text: str,
school_level=None,
) -> str:
stage = school_level_label(school_level)
prompt = QUESTION_PROMPT.format(stage=stage, subject=subject, ocr_text=ocr_text)
return await generate_text(prompt, cfg)
async def generate_solution(
cfg: AIConfig,
subject: str,
question_text: str,
school_level=None,
*,
olympiad: bool = False,
) -> str:
stage = school_level_label(school_level)
curriculum = _curriculum_block(school_level, olympiad)
template = OLYMPIAD_SOLUTION_PROMPT if olympiad else SOLUTION_PROMPT
prompt = template.format(
stage=stage,
subject=subject,
curriculum=curriculum,
question_text=question_text,
)
return await generate_text(prompt, cfg)
async def detect_wrong_line_ids(
cfg: AIConfig,
subject: str,
ocr_lines: list[dict],
school_level=None,
) -> str:
stage = school_level_label(school_level)
numbered = "\n".join(f"[{i}] {line.get('text', '')}" for i, line in enumerate(ocr_lines))
prompt = ERROR_DETECT_PROMPT.format(stage=stage, subject=subject, numbered_lines=numbered)
return await generate_text(prompt, cfg)
async def generate_review_insight(
cfg: AIConfig,
subject: str,
review_records: str,
school_level=None,
) -> str:
stage = school_level_label(school_level)
careless_hint, subject_hints = _subject_review_hints(subject)
prompt = REVIEW_INSIGHT_PROMPT.format(
stage=stage,
subject=subject,
review_records=review_records,
careless_hint=careless_hint,
subject_hints=subject_hints,
)
return await generate_text(prompt, cfg, temperature=0.2)
CURRICULUM_CHINESE_JUNIOR = """初中作文:记叙文、写人记事、简单议论文为主,通常 600-800 字。
语言平实,素材来自课内与日常生活,禁止成人化腔调与超纲典故堆砌。"""
CURRICULUM_CHINESE_SENIOR = """高中作文:记叙、议论、材料作文为主,通常 800-1000 字。
可适度展开论证,仍须符合课内要求,禁止大学论文式写法与超纲理论。"""
COMPOSITION_PROMPT = """你是一位{stage}语文老师,正在辅导{grade_text}学生完成作文。
【学段年级 — 严禁超纲】
{curriculum}
作文题目:
{topic}
请严格按以下 Markdown 结构输出(不要增加其他一级标题):
## 写作方案
(审题、立意、结构提纲、段落安排、可用素材方向,分条列出,贴合{grade_text}水平)
## 范文
(完整作文一篇,字数与语言风格必须符合{grade_text}课内要求,禁止超纲)
注意:范文必须是可直接参考的学生习作水准,不要写成评论或教案。
"""
def _chinese_curriculum(level, grade: str | None) -> str:
is_senior = level == SchoolLevel.senior_high or level == "senior_high"
return CURRICULUM_CHINESE_SENIOR if is_senior else CURRICULUM_CHINESE_JUNIOR
def _grade_text(grade: str | None) -> str:
if grade and grade.strip():
return grade.strip()
return "该学段学生"
def split_composition_sections(text: str) -> tuple[str, str]:
import re
text = text.strip()
if "## 范文" not in text:
return text.replace("## 写作方案", "").strip(), ""
parts = re.split(r"\n##\s*范文\s*\n", text, maxsplit=1)
plan = parts[0].replace("## 写作方案", "").strip()
essay = parts[1].strip() if len(parts) > 1 else ""
return plan, essay
async def generate_composition(
cfg: AIConfig,
topic: str,
school_level=None,
grade: str | None = None,
) -> tuple[str, str]:
stage = school_level_label(school_level)
grade_text = _grade_text(grade)
curriculum = _chinese_curriculum(school_level, grade)
prompt = COMPOSITION_PROMPT.format(
stage=stage,
grade_text=grade_text,
curriculum=curriculum,
topic=topic.strip(),
)
full = await generate_text(prompt, cfg, temperature=0.35)
return split_composition_sections(full)
def composition_markdown(topic: str, writing_plan: str | None, sample_essay: str | None) -> str:
parts = [f"# 作文题目\n\n{topic.strip()}", ""]
if writing_plan:
parts.extend(["## 写作方案", "", writing_plan.strip(), ""])
if sample_essay:
parts.extend(["## 范文", "", sample_essay.strip(), ""])
return "\n".join(parts).strip() + "\n"