作业帮式错题标注:OCR 定位错误红框 + 解题思路。
- PaddleOCR 行级坐标 + AI 识别错答区域,生成标注图 - 解法拆分为「解题思路」与「详细解答」 - 详情页标注图/原图切换,列表显示标注缩略图
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
@@ -10,6 +11,7 @@ from app.core.database import SessionLocal, get_db
|
||||
from app.core.deps import get_current_user
|
||||
from app.models.user import Subject, User, WrongQuestion, WrongQuestionCategory, WrongQuestionStatus
|
||||
from app.schemas import WrongQuestionCategoryEnum, WrongQuestionOut, WrongQuestionUpdate
|
||||
from app.services import annotation as annotation_service
|
||||
from app.services import llm as llm_service
|
||||
from app.services import ocr as ocr_service
|
||||
from app.services.student_access import get_student_for_user
|
||||
@@ -17,6 +19,16 @@ from app.services.student_access import get_student_for_user
|
||||
router = APIRouter(tags=["wrong_questions"])
|
||||
|
||||
|
||||
def _parse_mark_regions(raw: str | None) -> list[dict] | None:
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
return data if isinstance(data, list) else None
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
|
||||
def _wq_to_out(wq: WrongQuestion) -> WrongQuestionOut:
|
||||
return WrongQuestionOut(
|
||||
id=wq.id,
|
||||
@@ -27,12 +39,43 @@ def _wq_to_out(wq: WrongQuestion) -> WrongQuestionOut:
|
||||
image_path=wq.image_path,
|
||||
ocr_raw_text=wq.ocr_raw_text,
|
||||
question_text=wq.question_text,
|
||||
solution_approach=wq.solution_approach,
|
||||
solution_text=wq.solution_text,
|
||||
mark_regions=_parse_mark_regions(wq.mark_regions_json),
|
||||
has_annotated_image=bool(wq.annotated_image_path),
|
||||
status=wq.status,
|
||||
created_at=wq.created_at,
|
||||
)
|
||||
|
||||
|
||||
async def _run_ai_pipeline(wq: WrongQuestion, db: Session, ocr_lines: list[dict], ocr_text: str):
|
||||
subject_name = wq.subject.name if wq.subject else "综合"
|
||||
school_level = wq.student.school_level if wq.student else None
|
||||
olympiad = wq.category == WrongQuestionCategory.olympiad
|
||||
ai_cfg = llm_service.load_ai_config(db)
|
||||
image_full = str(Path(settings.UPLOAD_DIR) / wq.image_path)
|
||||
|
||||
detect_resp = await llm_service.detect_wrong_line_ids(ai_cfg, subject_name, ocr_lines, school_level)
|
||||
wrong_ids = annotation_service.parse_wrong_line_ids(detect_resp, ocr_lines)
|
||||
regions = annotation_service.regions_from_lines(ocr_lines, wrong_ids)
|
||||
wq.mark_regions_json = json.dumps(regions, ensure_ascii=False)
|
||||
|
||||
ann_rel = ocr_service.annotated_rel_path(wq.image_path)
|
||||
wq.annotated_image_path = annotation_service.draw_annotated_image(
|
||||
image_full, ocr_lines, wrong_ids, ann_rel
|
||||
)
|
||||
|
||||
question_text = await llm_service.format_question(ai_cfg, subject_name, ocr_text, school_level)
|
||||
solution_full = await llm_service.generate_solution(
|
||||
ai_cfg, subject_name, question_text, school_level, olympiad=olympiad
|
||||
)
|
||||
approach, solution_body = annotation_service.split_solution_sections(solution_full)
|
||||
wq.question_text = question_text
|
||||
wq.solution_approach = approach
|
||||
wq.solution_text = solution_body if approach else solution_full
|
||||
wq.status = WrongQuestionStatus.solved
|
||||
|
||||
|
||||
def _process_wrong_question(question_id: uuid.UUID):
|
||||
db = SessionLocal()
|
||||
try:
|
||||
@@ -47,7 +90,9 @@ def _process_wrong_question(question_id: uuid.UUID):
|
||||
|
||||
image_full = Path(settings.UPLOAD_DIR) / wq.image_path
|
||||
try:
|
||||
ocr_text = ocr_service.run_ocr(str(image_full))
|
||||
ocr_result = ocr_service.run_ocr_with_regions(str(image_full))
|
||||
ocr_text = ocr_result["text"]
|
||||
ocr_lines = ocr_result["lines"]
|
||||
wq.ocr_raw_text = ocr_text or None
|
||||
wq.status = WrongQuestionStatus.ocr_done if ocr_text else WrongQuestionStatus.failed
|
||||
db.commit()
|
||||
@@ -59,31 +104,12 @@ def _process_wrong_question(question_id: uuid.UUID):
|
||||
if not ocr_text:
|
||||
return
|
||||
|
||||
subject_name = wq.subject.name if wq.subject else "综合"
|
||||
school_level = wq.student.school_level if wq.student else None
|
||||
olympiad = wq.category == WrongQuestionCategory.olympiad
|
||||
ai_cfg = llm_service.load_ai_config(db)
|
||||
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
question_text = loop.run_until_complete(
|
||||
llm_service.format_question(ai_cfg, subject_name, ocr_text, school_level)
|
||||
)
|
||||
solution_text = loop.run_until_complete(
|
||||
llm_service.generate_solution(
|
||||
ai_cfg,
|
||||
subject_name,
|
||||
question_text,
|
||||
school_level,
|
||||
olympiad=olympiad,
|
||||
)
|
||||
)
|
||||
wq.question_text = question_text
|
||||
wq.solution_text = solution_text
|
||||
wq.status = WrongQuestionStatus.solved
|
||||
loop.run_until_complete(_run_ai_pipeline(wq, db, ocr_lines, ocr_text))
|
||||
db.commit()
|
||||
except Exception:
|
||||
wq.status = WrongQuestionStatus.ocr_done
|
||||
@@ -217,6 +243,8 @@ def update_wrong_question(
|
||||
wq.question_text = data.question_text
|
||||
if data.solution_text is not None:
|
||||
wq.solution_text = data.solution_text
|
||||
if data.solution_approach is not None:
|
||||
wq.solution_approach = data.solution_approach
|
||||
|
||||
db.commit()
|
||||
db.refresh(wq)
|
||||
@@ -239,10 +267,13 @@ def delete_wrong_question(
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="错题不存在")
|
||||
|
||||
image_full = Path(settings.UPLOAD_DIR) / wq.image_path
|
||||
ann_full = Path(settings.UPLOAD_DIR) / wq.annotated_image_path if wq.annotated_image_path else None
|
||||
db.delete(wq)
|
||||
db.commit()
|
||||
if image_full.exists():
|
||||
image_full.unlink()
|
||||
if ann_full and ann_full.exists():
|
||||
ann_full.unlink()
|
||||
|
||||
|
||||
@router.post("/wrong-questions/{question_id}/retry-ocr", response_model=WrongQuestionOut)
|
||||
@@ -297,13 +328,16 @@ async def regenerate_solution(
|
||||
ai_cfg, subject_name, wq.ocr_raw_text, school_level
|
||||
)
|
||||
question_text = wq.question_text
|
||||
wq.solution_text = await llm_service.generate_solution(
|
||||
solution_full = await llm_service.generate_solution(
|
||||
ai_cfg,
|
||||
subject_name,
|
||||
question_text,
|
||||
school_level,
|
||||
olympiad=olympiad,
|
||||
)
|
||||
approach, solution_body = annotation_service.split_solution_sections(solution_full)
|
||||
wq.solution_approach = approach
|
||||
wq.solution_text = solution_body if approach else solution_full
|
||||
wq.status = WrongQuestionStatus.solved
|
||||
except Exception as exc:
|
||||
raise HTTPException(
|
||||
@@ -334,3 +368,26 @@ def get_wrong_question_image(
|
||||
if not image_full.exists():
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="图片不存在")
|
||||
return FileResponse(image_full)
|
||||
|
||||
|
||||
@router.get("/wrong-questions/{question_id}/annotated-image")
|
||||
def get_wrong_question_annotated_image(
|
||||
question_id: uuid.UUID,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
wq = (
|
||||
db.query(WrongQuestion)
|
||||
.options(joinedload(WrongQuestion.student))
|
||||
.filter(WrongQuestion.id == question_id)
|
||||
.first()
|
||||
)
|
||||
if wq is None or wq.student.user_id != current_user.id:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="错题不存在")
|
||||
if not wq.annotated_image_path:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="标注图尚未生成")
|
||||
|
||||
image_full = Path(settings.UPLOAD_DIR) / wq.annotated_image_path
|
||||
if not image_full.exists():
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="标注图不存在")
|
||||
return FileResponse(image_full)
|
||||
|
||||
Reference in New Issue
Block a user