支持局域网 GPU OCR 服务，配置方式类似 Ollama。

2026-06-28 14:16:06 +08:00
parent 14bf314544
commit ff0c103dc5
19 changed files with 305 additions and 9 deletions
@@ -0,0 +1,140 @@
+"""局域网 OCR 服务：在带 NVIDIA 显卡的机器上运行，供成绩档案系统调用。"""
+
+import os
+import tempfile
+from pathlib import Path
+
+from fastapi import FastAPI, File, Header, HTTPException, UploadFile
+from PIL import Image
+
+os.environ.setdefault("OPENCV_IO_ENABLE_OPENEXR", "0")
+
+OCR_MAX_SIDE = int(os.getenv("OCR_MAX_SIDE", "1280"))
+OCR_API_KEY = os.getenv("OCR_API_KEY", "").strip()
+OCR_USE_GPU = os.getenv("OCR_USE_GPU", "true").lower() in {"1", "true", "yes"}
+
+app = FastAPI(title="Grade Archive OCR Worker", version="1.0.0")
+_engine = None
+
+
+def _check_key(key: str | None) -> None:
+    if OCR_API_KEY and key != OCR_API_KEY:
+        raise HTTPException(status_code=401, detail="Invalid OCR API key")
+
+
+def get_engine():
+    global _engine
+    if _engine is None:
+        from paddleocr import PaddleOCR
+
+        _engine = PaddleOCR(
+            use_angle_cls=False,
+            lang="ch",
+            show_log=False,
+            use_gpu=OCR_USE_GPU,
+            enable_mkldnn=not OCR_USE_GPU,
+            det_limit_side_len=min(OCR_MAX_SIDE, 1280),
+            rec_batch_num=8,
+        )
+    return _engine
+
+
+def _bbox_from_box(box: list) -> list[float]:
+    xs = [float(p[0]) for p in box]
+    ys = [float(p[1]) for p in box]
+    return [min(xs), min(ys), max(xs), max(ys)]
+
+
+def _scale_box(box: list, scale_x: float, scale_y: float) -> list:
+    return [[float(p[0]) * scale_x, float(p[1]) * scale_y] for p in box]
+
+
+def _prepare_image_bytes(content: bytes) -> tuple[bytes, float, float, int, int]:
+    with Image.open(__import__("io").BytesIO(content)) as img:
+        img = img.convert("RGB")
+        orig_w, orig_h = img.size
+        longest = max(orig_w, orig_h)
+        if longest <= OCR_MAX_SIDE:
+            buf = __import__("io").BytesIO()
+            img.save(buf, format="JPEG", quality=88)
+            return buf.getvalue(), 1.0, 1.0, orig_w, orig_h
+
+        ratio = OCR_MAX_SIDE / longest
+        new_w = max(1, int(orig_w * ratio))
+        new_h = max(1, int(orig_h * ratio))
+        resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+        buf = __import__("io").BytesIO()
+        resized.save(buf, format="JPEG", quality=85)
+        scale_x = orig_w / new_w
+        scale_y = orig_h / new_h
+        return buf.getvalue(), scale_x, scale_y, orig_w, orig_h
+
+
+def run_ocr_on_bytes(content: bytes) -> dict:
+    engine = get_engine()
+    image_bytes, scale_x, scale_y, orig_w, orig_h = _prepare_image_bytes(content)
+    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
+        tmp.write(image_bytes)
+        tmp_path = tmp.name
+    try:
+        result = engine.ocr(tmp_path, cls=False)
+    finally:
+        Path(tmp_path).unlink(missing_ok=True)
+
+    lines: list[dict] = []
+    if result and result[0]:
+        for item in result[0]:
+            if not item or len(item) < 2:
+                continue
+            box, rec = item[0], item[1]
+            text = rec[0] if rec else ""
+            conf = float(rec[1]) if rec and len(rec) > 1 else 0.0
+            if not text:
+                continue
+            if scale_x != 1.0 or scale_y != 1.0:
+                box = _scale_box(box, scale_x, scale_y)
+            lines.append(
+                {
+                    "text": text,
+                    "confidence": conf,
+                    "box": box,
+                    "bbox": _bbox_from_box(box),
+                }
+            )
+
+    return {
+        "text": "\n".join(line["text"] for line in lines),
+        "lines": lines,
+        "width": orig_w,
+        "height": orig_h,
+    }
+
+
+@app.on_event("startup")
+def warmup():
+    buf = __import__("io").BytesIO()
+    Image.new("RGB", (120, 40), color=(255, 255, 255)).save(buf, format="JPEG")
+    try:
+        run_ocr_on_bytes(buf.getvalue())
+    except Exception:
+        pass
+
+
+@app.get("/health")
+def health():
+    return {"status": "ok", "gpu": OCR_USE_GPU}
+
+
+@app.post("/api/ocr/regions")
+async def ocr_regions(
+    file: UploadFile = File(...),
+    x_ocr_key: str | None = Header(default=None, alias="X-OCR-Key"),
+):
+    _check_key(x_ocr_key)
+    content = await file.read()
+    if not content:
+        raise HTTPException(status_code=400, detail="Empty image")
+    try:
+        return run_ocr_on_bytes(content)
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# 在带 NVIDIA 显卡（如 RTX 3060 Ti）的 Linux 机器上安装 OCR Worker
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")" && pwd)"
+VENV="${ROOT}/.venv"
+PORT="${OCR_PORT:-23567}"
+
+echo "==> OCR Worker 安装目录: ${ROOT}"
+
+if ! command -v python3 >/dev/null; then
+  echo "请先安装 python3"
+  exit 1
+fi
+
+python3 -m venv "${VENV}"
+# shellcheck disable=SC1091
+source "${VENV}/bin/activate"
+pip install -U pip wheel
+
+# Paddle GPU（CUDA 11.8，适配多数 3060 Ti 驱动）
+pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
+pip install -r "${ROOT}/requirements.txt"
+
+cat <<EOF
+
+安装完成。
+
+启动（前台测试）:
+  cd ${ROOT}
+  OCR_USE_GPU=true OCR_PORT=${PORT} bash start.sh
+
+局域网地址示例:
+  http://192.168.x.x:${PORT}
+
+在成绩档案「系统设置 → AI 模型」里填写 OCR 服务地址:
+  http://192.168.x.x:${PORT}
+
+可选环境变量:
+  OCR_USE_GPU=true          # 使用显卡（默认 true）
+  OCR_API_KEY=随机字符串     # 局域网鉴权（可选）
+  OCR_MAX_SIDE=1280           # 识别长边上限
+  OCR_PORT=${PORT}            # 监听端口
+
+EOF
@@ -0,0 +1,17 @@
+[Unit]
+Description=Grade Archive OCR Worker (PaddleOCR GPU)
+After=network.target
+
+[Service]
+Type=simple
+WorkingDirectory=/opt/ocr-worker
+Environment=OCR_USE_GPU=true
+Environment=OCR_PORT=23567
+Environment=OCR_HOST=0.0.0.0
+# Environment=OCR_API_KEY=请设置随机密钥
+ExecStart=/opt/ocr-worker/.venv/bin/uvicorn app:app --host 0.0.0.0 --port 23567
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=multi-user.target
@@ -0,0 +1,6 @@
+fastapi==0.115.6
+uvicorn[standard]==0.34.0
+python-multipart==0.0.20
+paddleocr==2.9.1
+Pillow==11.0.0
+# GPU 版 Paddle 请用 install.sh 安装，勿直接 pip install paddlepaddle
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")" && pwd)"
+VENV="${ROOT}/.venv"
+HOST="${OCR_HOST:-0.0.0.0}"
+PORT="${OCR_PORT:-23567}"
+
+if [[ ! -d "${VENV}" ]]; then
+  echo "未找到虚拟环境，请先运行: bash install.sh"
+  exit 1
+fi
+
+# shellcheck disable=SC1091
+source "${VENV}/bin/activate"
+cd "${ROOT}"
+exec uvicorn app:app --host "${HOST}" --port "${PORT}"