aaa08cdf38
Co-authored-by: Cursor <cursoragent@cursor.com>
309 lines
11 KiB
Python
309 lines
11 KiB
Python
import httpx
|
||
from sqlalchemy.orm import Session
|
||
|
||
from app.core.config import settings as app_settings
|
||
from app.models.user import SchoolLevel, SystemSettings
|
||
from app.services.school_level import school_level_label
|
||
from app.services.url_sanitize import sanitize_http_url, sanitize_model_name
|
||
|
||
CURRICULUM_JUNIOR = """初中课程标准:代数、几何(全等/相似/勾股)、一次函数与简单二次函数、基础概率统计。
|
||
严禁使用:高中导数、向量、解析几何、排列组合进阶、复数、微积分、大学线性代数等。"""
|
||
|
||
CURRICULUM_SENIOR = """高中课程标准:课内函数、三角、向量、解析几何、概率统计、导数(课内范围)等。
|
||
严禁使用:大学数学分析、抽象代数、高等几何、超出课内的竞赛高阶技巧。"""
|
||
|
||
CURRICULUM_JUNIOR_OLYMPIAD = """初中奥数培优范围:整数/整除、因数分解、简单数论、代数恒等变形、几何辅助线与全等相似、简单组合计数。
|
||
严禁使用:高中及以上方法(导数、向量、解析几何、微积分、复数运算等)。"""
|
||
|
||
CURRICULUM_SENIOR_OLYMPIAD = """高中奥数/竞赛入门范围:课内知识+常规竞赛技巧(不等式、构造、归纳、简单数论等)。
|
||
严禁使用:大学数学、超出高中奥数培优体系的 IMO 高阶理论。"""
|
||
|
||
|
||
def _curriculum_block(level: SchoolLevel | str | None, olympiad: bool) -> str:
|
||
label = school_level_label(level)
|
||
is_senior = level == SchoolLevel.senior_high or level == "senior_high"
|
||
if olympiad:
|
||
return CURRICULUM_SENIOR_OLYMPIAD if is_senior else CURRICULUM_JUNIOR_OLYMPIAD
|
||
return CURRICULUM_SENIOR if is_senior else CURRICULUM_JUNIOR
|
||
|
||
|
||
QUESTION_PROMPT = """你是一位{stage}老师。以下是从试卷 OCR 识别出的文字,可能含有噪声。
|
||
科目:{subject}
|
||
请整理出清晰的题目内容(保留题号、选项、公式),只输出题目正文,不要解释。
|
||
|
||
OCR 原文:
|
||
{ocr_text}
|
||
"""
|
||
|
||
SOLUTION_PROMPT = """你是一位耐心的{stage}{subject}老师。请像「作业帮」一样,先讲清楚解题思路,再给出完整解答。
|
||
|
||
【学段要求 — 严禁超纲】
|
||
{curriculum}
|
||
|
||
题目:
|
||
{question_text}
|
||
|
||
请严格按以下 Markdown 结构输出:
|
||
|
||
## 解题思路
|
||
(2-5 句话:这题考什么、从哪里入手、关键一步是什么,让学生先懂「怎么想」)
|
||
|
||
## 详细解答
|
||
(分步骤完整推导,每步说明依据)
|
||
|
||
## 易错点
|
||
(指出常见错误及正确做法)
|
||
|
||
严禁使用超纲方法;若原题超纲,请给出{stage}课内可理解的解法。
|
||
"""
|
||
|
||
OLYMPIAD_SOLUTION_PROMPT = """你是一位{stage}奥数教练。请像优秀辅导老师一样,先讲解题思路,再完整解答。
|
||
|
||
【奥数学段要求 — 严禁超纲】
|
||
{curriculum}
|
||
|
||
题目:
|
||
{question_text}
|
||
|
||
请严格按以下 Markdown 结构输出:
|
||
|
||
## 解题思路
|
||
(点明题型、突破口、{stage}奥数常用技巧)
|
||
|
||
## 详细解答
|
||
(完整步骤)
|
||
|
||
## 关键技巧
|
||
(总结,仅限{stage}奥数范围)
|
||
|
||
严禁超纲;过难题给出{stage}可接受的培优思路。
|
||
"""
|
||
|
||
ERROR_DETECT_PROMPT = """你是{stage}{subject}老师。以下是试卷/作业 OCR 识别结果,每行前有编号。
|
||
请找出「学生答错的部分」:错误答案、被打叉的作答、明显不正确的计算结果等。
|
||
|
||
{numbered_lines}
|
||
|
||
只输出 JSON,不要其他文字:
|
||
{{"wrong_line_ids": [行编号整数列表]}}
|
||
若整张图就是一道错题,请标注含有错误答案或作答的行;找不到则标注最后作答行。
|
||
"""
|
||
|
||
REVIEW_INSIGHT_PROMPT = """你是一位{stage}{subject}学习顾问。请仅根据下方「复盘数据」做分析,不得编造未出现的考试或状态。
|
||
|
||
【学段】{stage}
|
||
【科目】{subject}(所有建议必须贴合本科目,禁止套用其他科目的说法)
|
||
|
||
【复盘数据】
|
||
{review_records}
|
||
|
||
【状态含义(结合本科目理解)】
|
||
- 粗心:{careless_hint}
|
||
- 不会:该科知识点/题型尚未掌握
|
||
- 紧张:心态影响,发挥低于平时水平
|
||
- 正常发挥:状态稳定
|
||
|
||
【科目建议方向】
|
||
{subject_hints}
|
||
|
||
【必须遵守】
|
||
1. 解读时必须写清具体考试日期(如 2026-06-21),按时间从早到晚分析,不得把「第1次」说成最近一场
|
||
2. 得分率 = 得分÷总分;95% 以上才可称「接近满分」,85% 左右应如实描述为「良好但仍有失分空间」,禁止夸大
|
||
3. 改进建议必须针对 {subject},禁止出现与本科目无关的表述(如英语科禁止写「计算验算」)
|
||
4. 只分析数据中列出的复盘状态,不要臆测未勾选的原因
|
||
|
||
请用 Markdown 输出:
|
||
|
||
## 情况解读
|
||
(2-4 句:按时间顺序说明每次考试得分率、失分与复盘状态的关系,以及是否有改善或反复)
|
||
|
||
## 改进建议
|
||
(3-5 条,针对出现最多的问题状态,具体可操作,仅限 {stage}{subject} 范围)
|
||
|
||
## 近期重点
|
||
(1-2 条本周可落实的小目标)
|
||
|
||
语气务实,不要空泛鸡汤。
|
||
"""
|
||
|
||
SUBJECT_REVIEW_HINTS: dict[str, dict[str, str]] = {
|
||
"语文": {
|
||
"careless": "看错题干、漏读要求、作文偏题或漏写要点",
|
||
"hints": "阅读审题、文言文/语言运用、作文结构与素材积累",
|
||
},
|
||
"数学": {
|
||
"careless": "审题不清、计算或抄错、步骤跳步",
|
||
"hints": "错题归类、计算验算、典型题型归纳与限时练习",
|
||
},
|
||
"英语": {
|
||
"careless": "看错词义/时态、漏读题干、拼写与语法笔误",
|
||
"hints": "词汇语法、阅读完形、听力与写作模板,禁止建议计算类训练",
|
||
},
|
||
"物理": {
|
||
"careless": "审题漏条件、公式代错、单位换算失误",
|
||
"hints": "概念理解、建模分析、实验题与计算规范",
|
||
},
|
||
"化学": {
|
||
"careless": "方程式配平/条件遗漏、计算失误",
|
||
"hints": "方程式、物质性质、实验与推断题",
|
||
},
|
||
"生物": {
|
||
"careless": "概念混淆、漏答得分点",
|
||
"hints": "教材概念、图表分析、实验设计表述",
|
||
},
|
||
"历史": {
|
||
"careless": "材料题漏读、时间/人物混淆",
|
||
"hints": "时间线、材料分析、论述题答题模板",
|
||
},
|
||
"地理": {
|
||
"careless": "读图漏信息、术语使用不当",
|
||
"hints": "地图判读、区域分析、综合题答题条理",
|
||
},
|
||
"政治": {
|
||
"careless": "漏答采分点、概念表述不准",
|
||
"hints": "时政结合、材料分析、观点表述规范",
|
||
},
|
||
}
|
||
|
||
|
||
def _subject_review_hints(subject: str) -> tuple[str, str]:
|
||
block = SUBJECT_REVIEW_HINTS.get(subject)
|
||
if block:
|
||
return block["careless"], block["hints"]
|
||
return "审题或作答细节失误", f"针对{subject}常见失分点制定练习与错题巩固"
|
||
|
||
class AIConfig:
|
||
def __init__(
|
||
self,
|
||
provider: str,
|
||
ollama_base_url: str,
|
||
ollama_model: str,
|
||
openai_base_url: str,
|
||
openai_model: str,
|
||
openai_api_key: str | None,
|
||
):
|
||
self.provider = provider
|
||
self.ollama_base_url = ollama_base_url
|
||
self.ollama_model = ollama_model
|
||
self.openai_base_url = openai_base_url
|
||
self.openai_model = openai_model
|
||
self.openai_api_key = openai_api_key
|
||
|
||
|
||
def load_ai_config(db: Session) -> AIConfig:
|
||
row = db.get(SystemSettings, 1)
|
||
if row is None:
|
||
return AIConfig(
|
||
provider="ollama",
|
||
ollama_base_url=sanitize_http_url(app_settings.OLLAMA_BASE_URL),
|
||
ollama_model=sanitize_model_name(app_settings.OLLAMA_MODEL),
|
||
openai_base_url=sanitize_http_url(app_settings.OPENAI_BASE_URL),
|
||
openai_model=sanitize_model_name(app_settings.OPENAI_MODEL),
|
||
openai_api_key=None,
|
||
)
|
||
return AIConfig(
|
||
provider=row.ai_provider or "ollama",
|
||
ollama_base_url=sanitize_http_url(row.ollama_base_url or app_settings.OLLAMA_BASE_URL),
|
||
ollama_model=sanitize_model_name(row.ollama_model or app_settings.OLLAMA_MODEL),
|
||
openai_base_url=sanitize_http_url(row.openai_base_url or app_settings.OPENAI_BASE_URL),
|
||
openai_model=sanitize_model_name(row.openai_model or app_settings.OPENAI_MODEL),
|
||
openai_api_key=row.openai_api_key,
|
||
)
|
||
|
||
|
||
async def _ollama_generate(prompt: str, cfg: AIConfig, *, temperature: float = 0.3) -> str:
|
||
url = f"{cfg.ollama_base_url.rstrip('/')}/api/generate"
|
||
payload = {
|
||
"model": cfg.ollama_model,
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
"options": {"temperature": temperature},
|
||
}
|
||
async with httpx.AsyncClient(timeout=180.0) as client:
|
||
response = await client.post(url, json=payload)
|
||
response.raise_for_status()
|
||
return (response.json().get("response") or "").strip()
|
||
|
||
|
||
async def _openai_generate(prompt: str, cfg: AIConfig, *, temperature: float = 0.3) -> str:
|
||
if not cfg.openai_api_key:
|
||
raise ValueError("未配置 OpenAI API Key")
|
||
url = f"{cfg.openai_base_url.rstrip('/')}/chat/completions"
|
||
headers = {"Authorization": f"Bearer {cfg.openai_api_key}"}
|
||
payload = {
|
||
"model": cfg.openai_model,
|
||
"messages": [{"role": "user", "content": prompt}],
|
||
"temperature": temperature,
|
||
}
|
||
async with httpx.AsyncClient(timeout=180.0) as client:
|
||
response = await client.post(url, json=payload, headers=headers)
|
||
response.raise_for_status()
|
||
data = response.json()
|
||
return (data["choices"][0]["message"]["content"] or "").strip()
|
||
|
||
|
||
async def generate_text(prompt: str, cfg: AIConfig, *, temperature: float = 0.3) -> str:
|
||
if cfg.provider == "openai":
|
||
return await _openai_generate(prompt, cfg, temperature=temperature)
|
||
return await _ollama_generate(prompt, cfg, temperature=temperature)
|
||
|
||
|
||
async def format_question(
|
||
cfg: AIConfig,
|
||
subject: str,
|
||
ocr_text: str,
|
||
school_level=None,
|
||
) -> str:
|
||
stage = school_level_label(school_level)
|
||
prompt = QUESTION_PROMPT.format(stage=stage, subject=subject, ocr_text=ocr_text)
|
||
return await generate_text(prompt, cfg)
|
||
|
||
|
||
async def generate_solution(
|
||
cfg: AIConfig,
|
||
subject: str,
|
||
question_text: str,
|
||
school_level=None,
|
||
*,
|
||
olympiad: bool = False,
|
||
) -> str:
|
||
stage = school_level_label(school_level)
|
||
curriculum = _curriculum_block(school_level, olympiad)
|
||
template = OLYMPIAD_SOLUTION_PROMPT if olympiad else SOLUTION_PROMPT
|
||
prompt = template.format(
|
||
stage=stage,
|
||
subject=subject,
|
||
curriculum=curriculum,
|
||
question_text=question_text,
|
||
)
|
||
return await generate_text(prompt, cfg)
|
||
|
||
|
||
async def detect_wrong_line_ids(
|
||
cfg: AIConfig,
|
||
subject: str,
|
||
ocr_lines: list[dict],
|
||
school_level=None,
|
||
) -> str:
|
||
stage = school_level_label(school_level)
|
||
numbered = "\n".join(f"[{i}] {line.get('text', '')}" for i, line in enumerate(ocr_lines))
|
||
prompt = ERROR_DETECT_PROMPT.format(stage=stage, subject=subject, numbered_lines=numbered)
|
||
return await generate_text(prompt, cfg)
|
||
|
||
|
||
async def generate_review_insight(
|
||
cfg: AIConfig,
|
||
subject: str,
|
||
review_records: str,
|
||
school_level=None,
|
||
) -> str:
|
||
stage = school_level_label(school_level)
|
||
careless_hint, subject_hints = _subject_review_hints(subject)
|
||
prompt = REVIEW_INSIGHT_PROMPT.format(
|
||
stage=stage,
|
||
subject=subject,
|
||
review_records=review_records,
|
||
careless_hint=careless_hint,
|
||
subject_hints=subject_hints,
|
||
)
|
||
return await generate_text(prompt, cfg, temperature=0.2) |