支持局域网 GPU OCR 服务，配置方式类似 Ollama。

2026-06-28 14:16:06 +08:00
parent 14bf314544
commit ff0c103dc5
19 changed files with 305 additions and 9 deletions
@@ -61,6 +61,8 @@ def run_migrations() -> None:
            alters.append("ADD COLUMN openai_model VARCHAR(128)")
        if "openai_api_key" not in ss_columns:
            alters.append("ADD COLUMN openai_api_key VARCHAR(512)")
+        if "ocr_service_url" not in ss_columns:
+            alters.append("ADD COLUMN ocr_service_url VARCHAR(256)")
        if alters:
            with engine.begin() as conn:
                for clause in alters:
@@ -5,6 +5,7 @@ import threading
 from io import BytesIO
 from pathlib import Path

+import httpx
 from PIL import Image

 from app.core.config import settings
@@ -23,22 +24,32 @@ def get_ocr_engine():
    if _ocr_engine is None:
        from paddleocr import PaddleOCR

+        use_gpu = settings.OCR_USE_GPU
        _ocr_engine = PaddleOCR(
            use_angle_cls=False,
            lang="ch",
            show_log=False,
-            use_gpu=False,
-            enable_mkldnn=True,
+            use_gpu=use_gpu,
+            enable_mkldnn=not use_gpu,
            det_limit_side_len=min(settings.OCR_MAX_SIDE, 1280),
            rec_batch_num=8,
        )
    return _ocr_engine


+def resolve_ocr_service_url(service_url: str | None = None) -> str | None:
+    url = (service_url or settings.OCR_SERVICE_URL or "").strip()
+    return url or None
+
+
+def uses_remote_ocr(service_url: str | None = None) -> bool:
+    return resolve_ocr_service_url(service_url) is not None
+
+
 def warmup_ocr_engine() -> None:
    """后台预加载 OCR 模型，避免首张图片等待数分钟。"""
    global _ocr_warmup_started
-    if _ocr_warmup_started or not settings.OCR_WARMUP:
+    if _ocr_warmup_started or not settings.OCR_WARMUP or uses_remote_ocr():
        return
    _ocr_warmup_started = True

@@ -110,8 +121,20 @@ def _prepare_ocr_image(image_path: str) -> tuple[str, float, float, int, int, Pa
    return str(tmp), scale_x, scale_y, orig_w, orig_h, tmp


-def run_ocr_with_regions(image_path: str) -> dict:
-    """Return OCR text plus line-level bounding boxes for annotation."""
+def _run_remote_ocr(service_url: str, image_path: str) -> dict:
+    url = f"{service_url.rstrip('/')}/api/ocr/regions"
+    headers: dict[str, str] = {}
+    if settings.OCR_API_KEY:
+        headers["X-OCR-Key"] = settings.OCR_API_KEY
+    with open(image_path, "rb") as handle:
+        files = {"file": (Path(image_path).name, handle, "image/jpeg")}
+        with httpx.Client(timeout=settings.OCR_TIMEOUT_SECONDS) as client:
+            resp = client.post(url, files=files, headers=headers)
+            resp.raise_for_status()
+            return resp.json()
+
+
+def _run_local_ocr(image_path: str) -> dict:
    engine = get_ocr_engine()
    ocr_path, scale_x, scale_y, orig_w, orig_h, tmp_path = _prepare_ocr_image(image_path)
    try:
@@ -150,6 +173,14 @@ def run_ocr_with_regions(image_path: str) -> dict:
    }


+def run_ocr_with_regions(image_path: str, service_url: str | None = None) -> dict:
+    """Return OCR text plus line-level bounding boxes for annotation."""
+    remote = resolve_ocr_service_url(service_url)
+    if remote:
+        return _run_remote_ocr(remote, image_path)
+    return _run_local_ocr(image_path)
+
+
 def run_ocr(image_path: str) -> str:
    return run_ocr_with_regions(image_path)["text"]