diff --git a/backend/app/routers/wrong_questions.py b/backend/app/routers/wrong_questions.py
index 11dea05..05aa2d2 100644
--- a/backend/app/routers/wrong_questions.py
+++ b/backend/app/routers/wrong_questions.py
@@ -177,7 +177,8 @@ def _process_wrong_question(question_id: uuid.UUID):
             if "libGL" in str(exc):
                 msg += " 请在服务器执行: sudo bash deploy/install-ocr-deps.sh && systemctl restart grade-archive"
             elif ocr_url:
-                msg += f" 请检查 OCR 服务是否可达: {ocr_url} （可浏览器访问 {ocr_url.rstrip('/')}/health）"
+                if "OCR 服务" not in msg:
+                    msg += " 诊断: bash deploy/ocr-screen.sh status && bash deploy/ocr-worker/test-ocr.sh"
             wq.error_message = msg
             db.commit()
             return
diff --git a/backend/app/services/ocr.py b/backend/app/services/ocr.py
index 37f8c1d..d57404b 100644
--- a/backend/app/services/ocr.py
+++ b/backend/app/services/ocr.py
@@ -130,7 +130,17 @@ def _run_remote_ocr(service_url: str, image_path: str) -> dict:
         files = {"file": (Path(image_path).name, handle, "image/jpeg")}
         with httpx.Client(timeout=settings.OCR_TIMEOUT_SECONDS) as client:
             resp = client.post(url, files=files, headers=headers)
-            resp.raise_for_status()
+            if resp.status_code >= 400:
+                detail = resp.text
+                try:
+                    body = resp.json()
+                    if isinstance(body.get("detail"), str):
+                        detail = body["detail"]
+                    elif isinstance(body.get("detail"), list):
+                        detail = str(body["detail"])
+                except Exception:
+                    pass
+                raise RuntimeError(f"OCR 服务 {resp.status_code}: {detail}")
             return resp.json()
 
 
diff --git a/deploy/install.sh b/deploy/install.sh
index c468ad4..9bd8194 100644
--- a/deploy/install.sh
+++ b/deploy/install.sh
@@ -210,6 +210,9 @@ setup_ocr_gpu() {
   else
     log_warn "未检测到 NVIDIA GPU，OCR 将使用 CPU（较慢）"
   fi
+  if [[ -x "${INSTALL_DIR}/deploy/install-ocr-deps.sh" ]]; then
+    bash "${INSTALL_DIR}/deploy/install-ocr-deps.sh" || log_warn "OCR 系统库安装跳过"
+  fi
   install_ocr_worker
   start_ocr_screen
   wait_ocr_healthy 30 || log_warn "OCR 后台加载中，继续安装主程序…"
diff --git a/deploy/ocr-worker/app.py b/deploy/ocr-worker/app.py
index 9762492..c3bdb40 100644
--- a/deploy/ocr-worker/app.py
+++ b/deploy/ocr-worker/app.py
@@ -1,7 +1,9 @@
 """局域网 OCR 服务：在带 NVIDIA 显卡的机器上运行，供成绩档案系统调用。"""
 
+import logging
 import os
 import tempfile
+from io import BytesIO
 from pathlib import Path
 
 from fastapi import FastAPI, File, Header, HTTPException, UploadFile
@@ -9,12 +11,16 @@ from PIL import Image
 
 os.environ.setdefault("OPENCV_IO_ENABLE_OPENEXR", "0")
 
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+logger = logging.getLogger("ocr-worker")
+
 OCR_MAX_SIDE = int(os.getenv("OCR_MAX_SIDE", "1280"))
 OCR_API_KEY = os.getenv("OCR_API_KEY", "").strip()
 OCR_USE_GPU = os.getenv("OCR_USE_GPU", "true").lower() in {"1", "true", "yes"}
 
 app = FastAPI(title="Grade Archive OCR Worker", version="1.0.0")
 _engine = None
+_engine_mode = "none"
 
 
 def _check_key(key: str | None) -> None:
@@ -22,21 +28,53 @@ def _check_key(key: str | None) -> None:
         raise HTTPException(status_code=401, detail="Invalid OCR API key")
 
 
-def get_engine():
-    global _engine
-    if _engine is None:
-        from paddleocr import PaddleOCR
+def _create_engine(use_gpu: bool):
+    from paddleocr import PaddleOCR
 
-        _engine = PaddleOCR(
-            use_angle_cls=False,
-            lang="ch",
-            show_log=False,
-            use_gpu=OCR_USE_GPU,
-            enable_mkldnn=not OCR_USE_GPU,
-            det_limit_side_len=min(OCR_MAX_SIDE, 1280),
-            rec_batch_num=8,
-        )
-    return _engine
+    return PaddleOCR(
+        use_angle_cls=False,
+        lang="ch",
+        show_log=False,
+        use_gpu=use_gpu,
+        enable_mkldnn=not use_gpu,
+        det_limit_side_len=min(OCR_MAX_SIDE, 1280),
+        rec_batch_num=8,
+    )
+
+
+def get_engine(force_cpu: bool = False):
+    global _engine, _engine_mode
+    if _engine is not None and not force_cpu:
+        return _engine
+
+    modes: list[bool] = [False] if force_cpu or not OCR_USE_GPU else [True, False]
+    last_err: Exception | None = None
+    for use_gpu in modes:
+        try:
+            logger.info("Loading PaddleOCR use_gpu=%s", use_gpu)
+            _engine = _create_engine(use_gpu)
+            _engine_mode = "gpu" if use_gpu else "cpu"
+            logger.info("PaddleOCR ready mode=%s", _engine_mode)
+            return _engine
+        except Exception as exc:
+            last_err = exc
+            logger.warning("PaddleOCR init failed use_gpu=%s: %s", use_gpu, exc)
+            _engine = None
+            _engine_mode = "none"
+
+    hint = ""
+    err_text = str(last_err or "")
+    if "libGL" in err_text:
+        hint = " 请执行: sudo bash deploy/install-ocr-deps.sh 后重启 OCR"
+    elif any(x in err_text.lower() for x in ("cuda", "cudnn", "gpu", "out of memory")):
+        hint = " 显存不足或 CUDA 异常，可设置 OCR_USE_GPU=false 用 CPU"
+    raise RuntimeError(f"PaddleOCR 初始化失败: {last_err}{hint}") from last_err
+
+
+def _reset_engine():
+    global _engine, _engine_mode
+    _engine = None
+    _engine_mode = "none"
 
 
 def _bbox_from_box(box: list) -> list[float]:
@@ -50,12 +88,12 @@ def _scale_box(box: list, scale_x: float, scale_y: float) -> list:
 
 
 def _prepare_image_bytes(content: bytes) -> tuple[bytes, float, float, int, int]:
-    with Image.open(__import__("io").BytesIO(content)) as img:
+    with Image.open(BytesIO(content)) as img:
         img = img.convert("RGB")
         orig_w, orig_h = img.size
         longest = max(orig_w, orig_h)
         if longest <= OCR_MAX_SIDE:
-            buf = __import__("io").BytesIO()
+            buf = BytesIO()
             img.save(buf, format="JPEG", quality=88)
             return buf.getvalue(), 1.0, 1.0, orig_w, orig_h
 
@@ -63,14 +101,14 @@ def _prepare_image_bytes(content: bytes) -> tuple[bytes, float, float, int, int]
         new_w = max(1, int(orig_w * ratio))
         new_h = max(1, int(orig_h * ratio))
         resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
-        buf = __import__("io").BytesIO()
+        buf = BytesIO()
         resized.save(buf, format="JPEG", quality=85)
         scale_x = orig_w / new_w
         scale_y = orig_h / new_h
         return buf.getvalue(), scale_x, scale_y, orig_w, orig_h
 
 
-def run_ocr_on_bytes(content: bytes) -> dict:
+def _run_ocr_impl(content: bytes) -> dict:
     engine = get_engine()
     image_bytes, scale_x, scale_y, orig_w, orig_h = _prepare_image_bytes(content)
     with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
@@ -107,15 +145,29 @@ def run_ocr_on_bytes(content: bytes) -> dict:
         "lines": lines,
         "width": orig_w,
         "height": orig_h,
+        "engine_mode": _engine_mode,
     }
 
 
+def run_ocr_on_bytes(content: bytes) -> dict:
+    try:
+        return _run_ocr_impl(content)
+    except Exception as exc:
+        err = str(exc).lower()
+        gpu_fail = _engine_mode == "gpu" and any(
+            x in err for x in ("cuda", "cudnn", "gpu", "out of memory", "resource exhausted", "precondition")
+        )
+        if gpu_fail and OCR_USE_GPU:
+            logger.warning("GPU OCR runtime failed, retry CPU: %s", exc)
+            _reset_engine()
+            get_engine(force_cpu=True)
+            return _run_ocr_impl(content)
+        raise
+
+
 @app.get("/health")
 def health():
-    return {"status": "ok", "gpu": OCR_USE_GPU}
-
-
-# 首次 /api/ocr/regions 请求时再加载模型（/health 立即响应，避免安装脚本长时间等待）
+    return {"status": "ok", "gpu_requested": OCR_USE_GPU, "engine_mode": _engine_mode}
 
 
 @app.post("/api/ocr/regions")
@@ -130,4 +182,5 @@ async def ocr_regions(
     try:
         return run_ocr_on_bytes(content)
     except Exception as exc:
+        logger.exception("OCR failed")
         raise HTTPException(status_code=500, detail=str(exc)) from exc
diff --git a/deploy/ocr-worker/test-ocr.sh b/deploy/ocr-worker/test-ocr.sh
new file mode 100644
index 0000000..d4a3eea
--- /dev/null
+++ b/deploy/ocr-worker/test-ocr.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "$0")" && pwd)"
+PORT="${OCR_PORT:-23567}"
+TMP="/tmp/ocr-test-$$.jpg"
+
+if [[ ! -d "${ROOT}/.venv" ]]; then
+  echo "请先 bash install.sh"
+  exit 1
+fi
+
+# shellcheck disable=SC1091
+source "${ROOT}/.venv/bin/activate"
+python3 -c "from PIL import Image; Image.new('RGB',(200,80),(255,255,255)).save('${TMP}')"
+
+echo "==> GET /health"
+curl -sS "http://127.0.0.1:${PORT}/health" || { echo "FAIL: OCR 未启动"; exit 1; }
+echo ""
+
+echo "==> POST /api/ocr/regions"
+curl -sS -w "\nHTTP %{http_code}\n" -F "file=@${TMP};type=image/jpeg" \
+  "http://127.0.0.1:${PORT}/api/ocr/regions"
+rm -f "${TMP}"