作业帮式错题标注:OCR 定位错误红框 + 解题思路。

- PaddleOCR 行级坐标 + AI 识别错答区域,生成标注图

- 解法拆分为「解题思路」与「详细解答」

- 详情页标注图/原图切换,列表显示标注缩略图
This commit is contained in:
dekun
2026-06-28 13:50:20 +08:00
parent c30e21b51e
commit a2a6d59f7c
16 changed files with 852 additions and 507 deletions
+109
View File
@@ -0,0 +1,109 @@
import json
import re
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont
from app.core.config import settings
def _parse_llm_json(text: str) -> dict | None:
text = text.strip()
match = re.search(r"\{[\s\S]*\}", text)
if not match:
return None
try:
return json.loads(match.group())
except json.JSONDecodeError:
return None
def heuristic_wrong_line_ids(lines: list[dict]) -> list[int]:
wrong: list[int] = []
for i, line in enumerate(lines):
t = line.get("text", "")
if any(c in t for c in ("×", "", "", "")):
wrong.append(i)
continue
if re.search(r"[×xX]\s*$", t.strip()):
wrong.append(i)
if wrong:
return wrong
# 单题照片:标注最后几行作答区域
if len(lines) == 1:
return [0]
if len(lines) <= 4:
return list(range(max(0, len(lines) - 2), len(lines)))
return list(range(len(lines) - 2, len(lines)))
def parse_wrong_line_ids(llm_response: str, lines: list[dict]) -> list[int]:
data = _parse_llm_json(llm_response)
if data and isinstance(data.get("wrong_line_ids"), list):
ids = [int(x) for x in data["wrong_line_ids"] if isinstance(x, (int, float, str))]
ids = [i for i in ids if 0 <= i < len(lines)]
if ids:
return ids
return heuristic_wrong_line_ids(lines)
def regions_from_lines(lines: list[dict], wrong_ids: list[int]) -> list[dict]:
regions = []
for i in wrong_ids:
if i >= len(lines):
continue
line = lines[i]
bbox = line.get("bbox") or [0, 0, 0, 0]
regions.append(
{
"line_id": i,
"text": line.get("text", ""),
"bbox": bbox,
"type": "wrong",
"label": "",
}
)
return regions
def draw_annotated_image(
src_path: str,
lines: list[dict],
wrong_ids: list[int],
dest_rel_path: str,
) -> str:
img = Image.open(src_path).convert("RGBA")
overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
draw = ImageDraw.Draw(overlay)
try:
font = ImageFont.truetype("DejaVuSans-Bold.ttf", max(14, img.size[0] // 40))
except OSError:
font = ImageFont.load_default()
for i in wrong_ids:
if i >= len(lines):
continue
bbox = lines[i].get("bbox") or [0, 0, 0, 0]
x1, y1, x2, y2 = bbox
pad = 6
box = [x1 - pad, y1 - pad, x2 + pad, y2 + pad]
draw.rounded_rectangle(box, radius=4, fill=(255, 59, 48, 55), outline=(255, 59, 48, 220), width=3)
draw.text((x1, max(0, y1 - 18)), "×", fill=(255, 59, 48, 255), font=font)
combined = Image.alpha_composite(img, overlay).convert("RGB")
full_path = Path(settings.UPLOAD_DIR) / dest_rel_path
full_path.parent.mkdir(parents=True, exist_ok=True)
combined.save(full_path, format="JPEG", quality=92)
return dest_rel_path
def split_solution_sections(text: str) -> tuple[str | None, str]:
if "## 解题思路" not in text:
return None, text
parts = re.split(r"\n##\s*", text, maxsplit=1)
if len(parts) < 2:
return None, text
approach = parts[0].replace("## 解题思路", "").strip()
rest = "## " + parts[1]
return approach or None, rest.strip()