secondary-school-grade-archive/backend/app/services/annotation.py

import json
import re
from pathlib import Path

from PIL import Image, ImageDraw, ImageFont

from app.core.config import settings


def _parse_llm_json(text: str) -> dict | None:
    text = text.strip()
    match = re.search(r"\{[\s\S]*\}", text)
    if not match:
        return None
    try:
        return json.loads(match.group())
    except json.JSONDecodeError:
        return None


def heuristic_wrong_line_ids(lines: list[dict]) -> list[int]:
    wrong: list[int] = []
    for i, line in enumerate(lines):
        t = line.get("text", "")
        if any(c in t for c in ("×", "✗", "❌", "错")):
            wrong.append(i)
            continue
        if re.search(r"[×xX]\s*$", t.strip()):
            wrong.append(i)
    if wrong:
        return wrong
    # 单题照片：标注最后几行作答区域
    if len(lines) == 1:
        return [0]
    if len(lines) <= 4:
        return list(range(max(0, len(lines) - 2), len(lines)))
    return list(range(len(lines) - 2, len(lines)))


def parse_wrong_line_ids(llm_response: str, lines: list[dict]) -> list[int]:
    data = _parse_llm_json(llm_response)
    if data and isinstance(data.get("wrong_line_ids"), list):
        ids = [int(x) for x in data["wrong_line_ids"] if isinstance(x, (int, float, str))]
        ids = [i for i in ids if 0 <= i < len(lines)]
        if ids:
            return ids
    return heuristic_wrong_line_ids(lines)


def regions_from_lines(lines: list[dict], wrong_ids: list[int]) -> list[dict]:
    regions = []
    for i in wrong_ids:
        if i >= len(lines):
            continue
        line = lines[i]
        bbox = line.get("bbox") or [0, 0, 0, 0]
        regions.append(
            {
                "line_id": i,
                "text": line.get("text", ""),
                "bbox": bbox,
                "type": "wrong",
                "label": "错",
            }
        )
    return regions


def draw_annotated_image(
    src_path: str,
    lines: list[dict],
    wrong_ids: list[int],
    dest_rel_path: str,
) -> str:
    img = Image.open(src_path).convert("RGBA")
    overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
    draw = ImageDraw.Draw(overlay)

    try:
        font = ImageFont.truetype("DejaVuSans-Bold.ttf", max(14, img.size[0] // 40))
    except OSError:
        font = ImageFont.load_default()

    for i in wrong_ids:
        if i >= len(lines):
            continue
        bbox = lines[i].get("bbox") or [0, 0, 0, 0]
        x1, y1, x2, y2 = bbox
        pad = 6
        box = [x1 - pad, y1 - pad, x2 + pad, y2 + pad]
        draw.rounded_rectangle(box, radius=4, fill=(255, 59, 48, 55), outline=(255, 59, 48, 220), width=3)
        draw.text((x1, max(0, y1 - 18)), "×", fill=(255, 59, 48, 255), font=font)

    combined = Image.alpha_composite(img, overlay).convert("RGB")
    full_path = Path(settings.UPLOAD_DIR) / dest_rel_path
    full_path.parent.mkdir(parents=True, exist_ok=True)
    combined.save(full_path, format="JPEG", quality=92)
    return dest_rel_path


def split_solution_sections(text: str) -> tuple[str | None, str]:
    if "## 解题思路" not in text:
        return None, text
    parts = re.split(r"\n##\s*", text, maxsplit=1)
    if len(parts) < 2:
        return None, text
    approach = parts[0].replace("## 解题思路", "").strip()
    rest = "## " + parts[1]
    return approach or None, rest.strip()