From 4de460c23579c6cd72f148c09f62a4c60d48328d Mon Sep 17 00:00:00 2001
From: dekun <dekun@local>
Date: Sun, 28 Jun 2026 15:27:59 +0800
Subject: [PATCH] =?UTF-8?q?OCR=20Worker=20=E6=94=B9=E7=94=A8=20RapidOCR/ON?=
 =?UTF-8?q?NX=EF=BC=8C=E4=BF=AE=E5=A4=8D=20Paddle=20SIGILL=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deploy/ocr-common.sh               | 16 +++---
 deploy/ocr-worker/app.py           | 87 +++++++-----------------------
 deploy/ocr-worker/install.sh       | 33 +++---------
 deploy/ocr-worker/requirements.txt |  4 +-
 deploy/ocr-worker/run.sh           |  2 +-
 5 files changed, 34 insertions(+), 108 deletions(-)

diff --git a/deploy/ocr-common.sh b/deploy/ocr-common.sh
index 45216cc..e36ba42 100644
--- a/deploy/ocr-common.sh
+++ b/deploy/ocr-common.sh
@@ -36,11 +36,9 @@ install_ocr_worker() {
     log_error "未找到 ${worker_dir}"
     return 1
   fi
-  log_info "安装/更新 OCR Worker (PaddleOCR)…"
+  log_info "安装/更新 OCR Worker (RapidOCR/ONNX)…"
   chmod +x "${worker_dir}"/*.sh 2>/dev/null || true
-  local use_gpu
-  use_gpu="$(detect_ocr_use_gpu)"
-  OCR_PORT="${OCR_PORT}" OCR_USE_GPU="${use_gpu}" bash "${worker_dir}/install.sh"
+  OCR_PORT="${OCR_PORT}" bash "${worker_dir}/install.sh"
 }
 
 ocr_screen_running() {
@@ -48,7 +46,7 @@ ocr_screen_running() {
 }
 
 start_ocr_screen() {
-  local worker_dir
+  local worker_dir log_file
   worker_dir="$(ocr_worker_dir)"
   if [[ ! -x "${worker_dir}/.venv/bin/uvicorn" ]]; then
     log_warn "OCR Worker 未安装，跳过 screen 启动"
@@ -59,20 +57,18 @@ start_ocr_screen() {
     return 1
   fi
 
-  local use_gpu log_file
-  use_gpu="$(detect_ocr_use_gpu)"
-  log_info "启动 OCR Worker → screen「${OCR_SCREEN_NAME}」(GPU=${use_gpu}, 端口 ${OCR_PORT})"
+  log_info "启动 OCR Worker → screen「${OCR_SCREEN_NAME}」(RapidOCR, 端口 ${OCR_PORT})"
   if ocr_screen_running; then
     screen -S "${OCR_SCREEN_NAME}" -X quit 2>/dev/null || true
     sleep 1
   fi
 
   mkdir -p "${INSTALL_DIR}/logs" 2>/dev/null || true
-  local log_file="${INSTALL_DIR}/logs/ocr-worker.log"
+  log_file="${INSTALL_DIR}/logs/ocr-worker.log"
 
   screen -dmS "${OCR_SCREEN_NAME}" bash -c "
     cd '${worker_dir}' &&
-    export OCR_USE_GPU='${use_gpu}' OCR_PORT='${OCR_PORT}' OCR_HOST=0.0.0.0 &&
+    export OCR_PORT='${OCR_PORT}' OCR_HOST=0.0.0.0 &&
     exec bash run.sh >> '${log_file}' 2>&1
   "
   sleep 2
diff --git a/deploy/ocr-worker/app.py b/deploy/ocr-worker/app.py
index 0604659..ddaf6e3 100644
--- a/deploy/ocr-worker/app.py
+++ b/deploy/ocr-worker/app.py
@@ -1,4 +1,4 @@
-"""局域网 OCR 服务：在带 NVIDIA 显卡的机器上运行，供成绩档案系统调用。"""
+"""局域网 OCR 服务：RapidOCR(ONNX)，不依赖 Paddle，避免 SIGILL/cuDNN 问题。"""
 
 import logging
 import os
@@ -9,19 +9,14 @@ from pathlib import Path
 from fastapi import FastAPI, File, Header, HTTPException, UploadFile
 from PIL import Image
 
-os.environ.setdefault("OPENCV_IO_ENABLE_OPENEXR", "0")
-os.environ.setdefault("FLAGS_use_mkldnn", "0")
-
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 logger = logging.getLogger("ocr-worker")
 
 OCR_MAX_SIDE = int(os.getenv("OCR_MAX_SIDE", "1280"))
 OCR_API_KEY = os.getenv("OCR_API_KEY", "").strip()
-OCR_USE_GPU = os.getenv("OCR_USE_GPU", "true").lower() in {"1", "true", "yes"}
 
-app = FastAPI(title="Grade Archive OCR Worker", version="1.0.0")
+app = FastAPI(title="Grade Archive OCR Worker", version="2.0.0")
 _engine = None
-_engine_mode = "none"
 
 
 def _check_key(key: str | None) -> None:
@@ -29,54 +24,15 @@ def _check_key(key: str | None) -> None:
         raise HTTPException(status_code=401, detail="Invalid OCR API key")
 
 
-def _create_engine(use_gpu: bool):
-    from paddleocr import PaddleOCR
+def get_engine():
+    global _engine
+    if _engine is None:
+        from rapidocr_onnxruntime import RapidOCR
 
-    return PaddleOCR(
-        use_angle_cls=False,
-        lang="ch",
-        show_log=False,
-        use_gpu=use_gpu,
-        enable_mkldnn=False,
-        ir_optim=False,
-        det_limit_side_len=min(OCR_MAX_SIDE, 1280),
-        rec_batch_num=8,
-    )
-
-
-def get_engine(force_cpu: bool = False):
-    global _engine, _engine_mode
-    if _engine is not None and not force_cpu:
-        return _engine
-
-    modes: list[bool] = [False] if force_cpu or not OCR_USE_GPU else [True, False]
-    last_err: Exception | None = None
-    for use_gpu in modes:
-        try:
-            logger.info("Loading PaddleOCR use_gpu=%s", use_gpu)
-            _engine = _create_engine(use_gpu)
-            _engine_mode = "gpu" if use_gpu else "cpu"
-            logger.info("PaddleOCR ready mode=%s", _engine_mode)
-            return _engine
-        except Exception as exc:
-            last_err = exc
-            logger.warning("PaddleOCR init failed use_gpu=%s: %s", use_gpu, exc)
-            _engine = None
-            _engine_mode = "none"
-
-    hint = ""
-    err_text = str(last_err or "")
-    if "libGL" in err_text:
-        hint = " 请执行: sudo bash deploy/install-ocr-deps.sh 后重启 OCR"
-    elif any(x in err_text.lower() for x in ("cuda", "cudnn", "gpu", "out of memory")):
-        hint = " 显存不足或 CUDA 异常，可设置 OCR_USE_GPU=false 用 CPU"
-    raise RuntimeError(f"PaddleOCR 初始化失败: {last_err}{hint}") from last_err
-
-
-def _reset_engine():
-    global _engine, _engine_mode
-    _engine = None
-    _engine_mode = "none"
+        logger.info("Loading RapidOCR (ONNX CPU)…")
+        _engine = RapidOCR()
+        logger.info("RapidOCR ready")
+    return _engine
 
 
 def _bbox_from_box(box: list) -> list[float]:
@@ -110,32 +66,31 @@ def _prepare_image_bytes(content: bytes) -> tuple[bytes, float, float, int, int]
         return buf.getvalue(), scale_x, scale_y, orig_w, orig_h
 
 
-def _run_ocr_impl(content: bytes) -> dict:
+def run_ocr_on_bytes(content: bytes) -> dict:
     engine = get_engine()
     image_bytes, scale_x, scale_y, orig_w, orig_h = _prepare_image_bytes(content)
     with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
         tmp.write(image_bytes)
         tmp_path = tmp.name
     try:
-        result = engine.ocr(tmp_path, cls=False)
+        result, _elapsed = engine(tmp_path)
     finally:
         Path(tmp_path).unlink(missing_ok=True)
 
     lines: list[dict] = []
-    if result and result[0]:
-        for item in result[0]:
+    if result:
+        for item in result:
             if not item or len(item) < 2:
                 continue
-            box, rec = item[0], item[1]
-            text = rec[0] if rec else ""
-            conf = float(rec[1]) if rec and len(rec) > 1 else 0.0
+            box, text = item[0], item[1]
+            conf = float(item[2]) if len(item) > 2 else 0.0
             if not text:
                 continue
             if scale_x != 1.0 or scale_y != 1.0:
                 box = _scale_box(box, scale_x, scale_y)
             lines.append(
                 {
-                    "text": text,
+                    "text": str(text),
                     "confidence": conf,
                     "box": box,
                     "bbox": _bbox_from_box(box),
@@ -147,17 +102,13 @@ def _run_ocr_impl(content: bytes) -> dict:
         "lines": lines,
         "width": orig_w,
         "height": orig_h,
-        "engine_mode": _engine_mode,
+        "engine_mode": "rapidocr-onnx",
     }
 
 
-def run_ocr_on_bytes(content: bytes) -> dict:
-    return _run_ocr_impl(content)
-
-
 @app.get("/health")
 def health():
-    return {"status": "ok", "gpu_requested": OCR_USE_GPU, "engine_mode": _engine_mode}
+    return {"status": "ok", "engine": "rapidocr-onnxruntime"}
 
 
 @app.post("/api/ocr/regions")
diff --git a/deploy/ocr-worker/install.sh b/deploy/ocr-worker/install.sh
index 904641d..ec1c77b 100644
--- a/deploy/ocr-worker/install.sh
+++ b/deploy/ocr-worker/install.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# 在带 NVIDIA 显卡的 Linux 机器上安装 OCR Worker（由 deploy/install.sh 自动调用）
+# OCR Worker 安装（RapidOCR / ONNX，无需 Paddle/GPU）
 set -euo pipefail
 
 ROOT="$(cd "$(dirname "$0")" && pwd)"
@@ -8,6 +8,7 @@ PORT="${OCR_PORT:-23567}"
 PIP_MIRROR="${PIP_MIRROR:-https://pypi.tuna.tsinghua.edu.cn/simple}"
 
 echo "==> OCR Worker 安装目录: ${ROOT}"
+echo "==> 引擎: RapidOCR (ONNX CPU，无需 cuDNN/GPU)"
 
 if ! command -v python3 >/dev/null; then
   echo "错误: 请先安装 python3"
@@ -15,7 +16,7 @@ if ! command -v python3 >/dev/null; then
 fi
 
 if [[ -d "${VENV}" ]]; then
-  echo "==> 已有虚拟环境，跳过 python3 -m venv"
+  echo "==> 已有虚拟环境"
 else
   python3 -m venv "${VENV}"
 fi
@@ -23,33 +24,11 @@ fi
 # shellcheck disable=SC1091
 source "${VENV}/bin/activate"
 pip install -U pip wheel -i "${PIP_MIRROR}"
-
-install_paddle() {
-  local use_gpu="${OCR_USE_GPU:-false}"
-  if [[ "${use_gpu}" == "true" ]] && command -v nvidia-smi >/dev/null 2>&1 && ldconfig -p 2>/dev/null | grep -q libcudnn; then
-    local cuda_major
-    cuda_major="$(nvidia-smi 2>/dev/null | sed -n 's/.*CUDA Version: \([0-9]*\)\.[0-9]*/\1/p' | head -1)"
-    cuda_major="${cuda_major:-11}"
-    echo "==> 安装 paddlepaddle-gpu (CUDA ${cuda_major}.x)…"
-    if [[ "${cuda_major}" -ge 12 ]]; then
-      pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/ \
-        || pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
-    else
-      pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
-    fi
-  else
-    echo "==> 安装 paddlepaddle CPU（无 cuDNN 或 OCR_USE_GPU=false）…"
-    pip install paddlepaddle==2.6.2 -i "${PIP_MIRROR}"
-  fi
-  pip install 'protobuf>=3.20,<4' -q
-}
-
-install_paddle
+pip uninstall -y paddlepaddle paddlepaddle-gpu paddleocr 2>/dev/null || true
 pip install -r "${ROOT}/requirements.txt" -i "${PIP_MIRROR}"
 chmod +x "${ROOT}/run.sh" "${ROOT}/start.sh" 2>/dev/null || true
 
 echo ""
-python3 -c "import fastapi, uvicorn, paddle; print('paddle', paddle.__version__, 'OK')"
+python3 -c "from rapidocr_onnxruntime import RapidOCR; print('RapidOCR OK')"
 echo ""
-echo "==> OCR Worker 安装完成。由 deploy/install.sh 通过 screen 自动启动。"
-echo "    手动管理: bash $(dirname "$ROOT")/ocr-screen.sh status"
+echo "==> 安装完成。管理: bash $(dirname "$ROOT")/ocr-screen.sh status"
diff --git a/deploy/ocr-worker/requirements.txt b/deploy/ocr-worker/requirements.txt
index fd7bfb7..21cf2da 100644
--- a/deploy/ocr-worker/requirements.txt
+++ b/deploy/ocr-worker/requirements.txt
@@ -1,6 +1,6 @@
 fastapi==0.115.6
 uvicorn[standard]==0.34.0
 python-multipart==0.0.20
-paddleocr==2.9.1
 Pillow==11.0.0
-# GPU 版 Paddle 请用 install.sh 安装，勿直接 pip install paddlepaddle
+rapidocr-onnxruntime>=1.3.0
+onnxruntime>=1.16.0
diff --git a/deploy/ocr-worker/run.sh b/deploy/ocr-worker/run.sh
index 72d9a86..0d96be8 100644
--- a/deploy/ocr-worker/run.sh
+++ b/deploy/ocr-worker/run.sh
@@ -6,7 +6,7 @@ ROOT="$(cd "$(dirname "$0")" && pwd)"
 VENV="${ROOT}/.venv"
 PORT="${OCR_PORT:-23567}"
 
-export OCR_USE_GPU="${OCR_USE_GPU:-true}"
+export OCR_USE_GPU="${OCR_USE_GPU:-false}"
 export OCR_HOST="${OCR_HOST:-0.0.0.0}"
 
 if [[ ! -d "${VENV}" ]]; then