secondary-school-grade-archive/deploy/ocr-worker/app.py

"""局域网 OCR 服务：在带 NVIDIA 显卡的机器上运行，供成绩档案系统调用。"""

import logging
import os
import tempfile
from io import BytesIO
from pathlib import Path

from fastapi import FastAPI, File, Header, HTTPException, UploadFile
from PIL import Image

os.environ.setdefault("OPENCV_IO_ENABLE_OPENEXR", "0")

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger("ocr-worker")

OCR_MAX_SIDE = int(os.getenv("OCR_MAX_SIDE", "1280"))
OCR_API_KEY = os.getenv("OCR_API_KEY", "").strip()
OCR_USE_GPU = os.getenv("OCR_USE_GPU", "true").lower() in {"1", "true", "yes"}

app = FastAPI(title="Grade Archive OCR Worker", version="1.0.0")
_engine = None
_engine_mode = "none"


def _check_key(key: str | None) -> None:
    if OCR_API_KEY and key != OCR_API_KEY:
        raise HTTPException(status_code=401, detail="Invalid OCR API key")


def _create_engine(use_gpu: bool):
    from paddleocr import PaddleOCR

    return PaddleOCR(
        use_angle_cls=False,
        lang="ch",
        show_log=False,
        use_gpu=use_gpu,
        enable_mkldnn=not use_gpu,
        det_limit_side_len=min(OCR_MAX_SIDE, 1280),
        rec_batch_num=8,
    )


def get_engine(force_cpu: bool = False):
    global _engine, _engine_mode
    if _engine is not None and not force_cpu:
        return _engine

    modes: list[bool] = [False] if force_cpu or not OCR_USE_GPU else [True, False]
    last_err: Exception | None = None
    for use_gpu in modes:
        try:
            logger.info("Loading PaddleOCR use_gpu=%s", use_gpu)
            _engine = _create_engine(use_gpu)
            _engine_mode = "gpu" if use_gpu else "cpu"
            logger.info("PaddleOCR ready mode=%s", _engine_mode)
            return _engine
        except Exception as exc:
            last_err = exc
            logger.warning("PaddleOCR init failed use_gpu=%s: %s", use_gpu, exc)
            _engine = None
            _engine_mode = "none"

    hint = ""
    err_text = str(last_err or "")
    if "libGL" in err_text:
        hint = " 请执行: sudo bash deploy/install-ocr-deps.sh 后重启 OCR"
    elif any(x in err_text.lower() for x in ("cuda", "cudnn", "gpu", "out of memory")):
        hint = " 显存不足或 CUDA 异常，可设置 OCR_USE_GPU=false 用 CPU"
    raise RuntimeError(f"PaddleOCR 初始化失败: {last_err}{hint}") from last_err


def _reset_engine():
    global _engine, _engine_mode
    _engine = None
    _engine_mode = "none"


def _bbox_from_box(box: list) -> list[float]:
    xs = [float(p[0]) for p in box]
    ys = [float(p[1]) for p in box]
    return [min(xs), min(ys), max(xs), max(ys)]


def _scale_box(box: list, scale_x: float, scale_y: float) -> list:
    return [[float(p[0]) * scale_x, float(p[1]) * scale_y] for p in box]


def _prepare_image_bytes(content: bytes) -> tuple[bytes, float, float, int, int]:
    with Image.open(BytesIO(content)) as img:
        img = img.convert("RGB")
        orig_w, orig_h = img.size
        longest = max(orig_w, orig_h)
        if longest <= OCR_MAX_SIDE:
            buf = BytesIO()
            img.save(buf, format="JPEG", quality=88)
            return buf.getvalue(), 1.0, 1.0, orig_w, orig_h

        ratio = OCR_MAX_SIDE / longest
        new_w = max(1, int(orig_w * ratio))
        new_h = max(1, int(orig_h * ratio))
        resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
        buf = BytesIO()
        resized.save(buf, format="JPEG", quality=85)
        scale_x = orig_w / new_w
        scale_y = orig_h / new_h
        return buf.getvalue(), scale_x, scale_y, orig_w, orig_h


def _run_ocr_impl(content: bytes) -> dict:
    engine = get_engine()
    image_bytes, scale_x, scale_y, orig_w, orig_h = _prepare_image_bytes(content)
    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
        tmp.write(image_bytes)
        tmp_path = tmp.name
    try:
        result = engine.ocr(tmp_path, cls=False)
    finally:
        Path(tmp_path).unlink(missing_ok=True)

    lines: list[dict] = []
    if result and result[0]:
        for item in result[0]:
            if not item or len(item) < 2:
                continue
            box, rec = item[0], item[1]
            text = rec[0] if rec else ""
            conf = float(rec[1]) if rec and len(rec) > 1 else 0.0
            if not text:
                continue
            if scale_x != 1.0 or scale_y != 1.0:
                box = _scale_box(box, scale_x, scale_y)
            lines.append(
                {
                    "text": text,
                    "confidence": conf,
                    "box": box,
                    "bbox": _bbox_from_box(box),
                }
            )

    return {
        "text": "\n".join(line["text"] for line in lines),
        "lines": lines,
        "width": orig_w,
        "height": orig_h,
        "engine_mode": _engine_mode,
    }


def run_ocr_on_bytes(content: bytes) -> dict:
    try:
        return _run_ocr_impl(content)
    except Exception as exc:
        err = str(exc).lower()
        gpu_fail = _engine_mode == "gpu" and any(
            x in err for x in ("cuda", "cudnn", "gpu", "out of memory", "resource exhausted", "precondition")
        )
        if gpu_fail and OCR_USE_GPU:
            logger.warning("GPU OCR runtime failed, retry CPU: %s", exc)
            _reset_engine()
            get_engine(force_cpu=True)
            return _run_ocr_impl(content)
        raise


@app.get("/health")
def health():
    return {"status": "ok", "gpu_requested": OCR_USE_GPU, "engine_mode": _engine_mode}


@app.post("/api/ocr/regions")
async def ocr_regions(
    file: UploadFile = File(...),
    x_ocr_key: str | None = Header(default=None, alias="X-OCR-Key"),
):
    _check_key(x_ocr_key)
    content = await file.read()
    if not content:
        raise HTTPException(status_code=400, detail="Empty image")
    try:
        return run_ocr_on_bytes(content)
    except Exception as exc:
        logger.exception("OCR failed")
        raise HTTPException(status_code=500, detail=str(exc)) from exc