OCR Worker 改用 RapidOCR/ONNX,修复 Paddle SIGILL。
This commit is contained in:
+6
-10
@@ -36,11 +36,9 @@ install_ocr_worker() {
|
|||||||
log_error "未找到 ${worker_dir}"
|
log_error "未找到 ${worker_dir}"
|
||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
log_info "安装/更新 OCR Worker (PaddleOCR)…"
|
log_info "安装/更新 OCR Worker (RapidOCR/ONNX)…"
|
||||||
chmod +x "${worker_dir}"/*.sh 2>/dev/null || true
|
chmod +x "${worker_dir}"/*.sh 2>/dev/null || true
|
||||||
local use_gpu
|
OCR_PORT="${OCR_PORT}" bash "${worker_dir}/install.sh"
|
||||||
use_gpu="$(detect_ocr_use_gpu)"
|
|
||||||
OCR_PORT="${OCR_PORT}" OCR_USE_GPU="${use_gpu}" bash "${worker_dir}/install.sh"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ocr_screen_running() {
|
ocr_screen_running() {
|
||||||
@@ -48,7 +46,7 @@ ocr_screen_running() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
start_ocr_screen() {
|
start_ocr_screen() {
|
||||||
local worker_dir
|
local worker_dir log_file
|
||||||
worker_dir="$(ocr_worker_dir)"
|
worker_dir="$(ocr_worker_dir)"
|
||||||
if [[ ! -x "${worker_dir}/.venv/bin/uvicorn" ]]; then
|
if [[ ! -x "${worker_dir}/.venv/bin/uvicorn" ]]; then
|
||||||
log_warn "OCR Worker 未安装,跳过 screen 启动"
|
log_warn "OCR Worker 未安装,跳过 screen 启动"
|
||||||
@@ -59,20 +57,18 @@ start_ocr_screen() {
|
|||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
local use_gpu log_file
|
log_info "启动 OCR Worker → screen「${OCR_SCREEN_NAME}」(RapidOCR, 端口 ${OCR_PORT})"
|
||||||
use_gpu="$(detect_ocr_use_gpu)"
|
|
||||||
log_info "启动 OCR Worker → screen「${OCR_SCREEN_NAME}」(GPU=${use_gpu}, 端口 ${OCR_PORT})"
|
|
||||||
if ocr_screen_running; then
|
if ocr_screen_running; then
|
||||||
screen -S "${OCR_SCREEN_NAME}" -X quit 2>/dev/null || true
|
screen -S "${OCR_SCREEN_NAME}" -X quit 2>/dev/null || true
|
||||||
sleep 1
|
sleep 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
mkdir -p "${INSTALL_DIR}/logs" 2>/dev/null || true
|
mkdir -p "${INSTALL_DIR}/logs" 2>/dev/null || true
|
||||||
local log_file="${INSTALL_DIR}/logs/ocr-worker.log"
|
log_file="${INSTALL_DIR}/logs/ocr-worker.log"
|
||||||
|
|
||||||
screen -dmS "${OCR_SCREEN_NAME}" bash -c "
|
screen -dmS "${OCR_SCREEN_NAME}" bash -c "
|
||||||
cd '${worker_dir}' &&
|
cd '${worker_dir}' &&
|
||||||
export OCR_USE_GPU='${use_gpu}' OCR_PORT='${OCR_PORT}' OCR_HOST=0.0.0.0 &&
|
export OCR_PORT='${OCR_PORT}' OCR_HOST=0.0.0.0 &&
|
||||||
exec bash run.sh >> '${log_file}' 2>&1
|
exec bash run.sh >> '${log_file}' 2>&1
|
||||||
"
|
"
|
||||||
sleep 2
|
sleep 2
|
||||||
|
|||||||
+19
-68
@@ -1,4 +1,4 @@
|
|||||||
"""局域网 OCR 服务:在带 NVIDIA 显卡的机器上运行,供成绩档案系统调用。"""
|
"""局域网 OCR 服务:RapidOCR(ONNX),不依赖 Paddle,避免 SIGILL/cuDNN 问题。"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -9,19 +9,14 @@ from pathlib import Path
|
|||||||
from fastapi import FastAPI, File, Header, HTTPException, UploadFile
|
from fastapi import FastAPI, File, Header, HTTPException, UploadFile
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
os.environ.setdefault("OPENCV_IO_ENABLE_OPENEXR", "0")
|
|
||||||
os.environ.setdefault("FLAGS_use_mkldnn", "0")
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||||
logger = logging.getLogger("ocr-worker")
|
logger = logging.getLogger("ocr-worker")
|
||||||
|
|
||||||
OCR_MAX_SIDE = int(os.getenv("OCR_MAX_SIDE", "1280"))
|
OCR_MAX_SIDE = int(os.getenv("OCR_MAX_SIDE", "1280"))
|
||||||
OCR_API_KEY = os.getenv("OCR_API_KEY", "").strip()
|
OCR_API_KEY = os.getenv("OCR_API_KEY", "").strip()
|
||||||
OCR_USE_GPU = os.getenv("OCR_USE_GPU", "true").lower() in {"1", "true", "yes"}
|
|
||||||
|
|
||||||
app = FastAPI(title="Grade Archive OCR Worker", version="1.0.0")
|
app = FastAPI(title="Grade Archive OCR Worker", version="2.0.0")
|
||||||
_engine = None
|
_engine = None
|
||||||
_engine_mode = "none"
|
|
||||||
|
|
||||||
|
|
||||||
def _check_key(key: str | None) -> None:
|
def _check_key(key: str | None) -> None:
|
||||||
@@ -29,54 +24,15 @@ def _check_key(key: str | None) -> None:
|
|||||||
raise HTTPException(status_code=401, detail="Invalid OCR API key")
|
raise HTTPException(status_code=401, detail="Invalid OCR API key")
|
||||||
|
|
||||||
|
|
||||||
def _create_engine(use_gpu: bool):
|
def get_engine():
|
||||||
from paddleocr import PaddleOCR
|
global _engine
|
||||||
|
if _engine is None:
|
||||||
|
from rapidocr_onnxruntime import RapidOCR
|
||||||
|
|
||||||
return PaddleOCR(
|
logger.info("Loading RapidOCR (ONNX CPU)…")
|
||||||
use_angle_cls=False,
|
_engine = RapidOCR()
|
||||||
lang="ch",
|
logger.info("RapidOCR ready")
|
||||||
show_log=False,
|
return _engine
|
||||||
use_gpu=use_gpu,
|
|
||||||
enable_mkldnn=False,
|
|
||||||
ir_optim=False,
|
|
||||||
det_limit_side_len=min(OCR_MAX_SIDE, 1280),
|
|
||||||
rec_batch_num=8,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_engine(force_cpu: bool = False):
|
|
||||||
global _engine, _engine_mode
|
|
||||||
if _engine is not None and not force_cpu:
|
|
||||||
return _engine
|
|
||||||
|
|
||||||
modes: list[bool] = [False] if force_cpu or not OCR_USE_GPU else [True, False]
|
|
||||||
last_err: Exception | None = None
|
|
||||||
for use_gpu in modes:
|
|
||||||
try:
|
|
||||||
logger.info("Loading PaddleOCR use_gpu=%s", use_gpu)
|
|
||||||
_engine = _create_engine(use_gpu)
|
|
||||||
_engine_mode = "gpu" if use_gpu else "cpu"
|
|
||||||
logger.info("PaddleOCR ready mode=%s", _engine_mode)
|
|
||||||
return _engine
|
|
||||||
except Exception as exc:
|
|
||||||
last_err = exc
|
|
||||||
logger.warning("PaddleOCR init failed use_gpu=%s: %s", use_gpu, exc)
|
|
||||||
_engine = None
|
|
||||||
_engine_mode = "none"
|
|
||||||
|
|
||||||
hint = ""
|
|
||||||
err_text = str(last_err or "")
|
|
||||||
if "libGL" in err_text:
|
|
||||||
hint = " 请执行: sudo bash deploy/install-ocr-deps.sh 后重启 OCR"
|
|
||||||
elif any(x in err_text.lower() for x in ("cuda", "cudnn", "gpu", "out of memory")):
|
|
||||||
hint = " 显存不足或 CUDA 异常,可设置 OCR_USE_GPU=false 用 CPU"
|
|
||||||
raise RuntimeError(f"PaddleOCR 初始化失败: {last_err}{hint}") from last_err
|
|
||||||
|
|
||||||
|
|
||||||
def _reset_engine():
|
|
||||||
global _engine, _engine_mode
|
|
||||||
_engine = None
|
|
||||||
_engine_mode = "none"
|
|
||||||
|
|
||||||
|
|
||||||
def _bbox_from_box(box: list) -> list[float]:
|
def _bbox_from_box(box: list) -> list[float]:
|
||||||
@@ -110,32 +66,31 @@ def _prepare_image_bytes(content: bytes) -> tuple[bytes, float, float, int, int]
|
|||||||
return buf.getvalue(), scale_x, scale_y, orig_w, orig_h
|
return buf.getvalue(), scale_x, scale_y, orig_w, orig_h
|
||||||
|
|
||||||
|
|
||||||
def _run_ocr_impl(content: bytes) -> dict:
|
def run_ocr_on_bytes(content: bytes) -> dict:
|
||||||
engine = get_engine()
|
engine = get_engine()
|
||||||
image_bytes, scale_x, scale_y, orig_w, orig_h = _prepare_image_bytes(content)
|
image_bytes, scale_x, scale_y, orig_w, orig_h = _prepare_image_bytes(content)
|
||||||
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
|
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
|
||||||
tmp.write(image_bytes)
|
tmp.write(image_bytes)
|
||||||
tmp_path = tmp.name
|
tmp_path = tmp.name
|
||||||
try:
|
try:
|
||||||
result = engine.ocr(tmp_path, cls=False)
|
result, _elapsed = engine(tmp_path)
|
||||||
finally:
|
finally:
|
||||||
Path(tmp_path).unlink(missing_ok=True)
|
Path(tmp_path).unlink(missing_ok=True)
|
||||||
|
|
||||||
lines: list[dict] = []
|
lines: list[dict] = []
|
||||||
if result and result[0]:
|
if result:
|
||||||
for item in result[0]:
|
for item in result:
|
||||||
if not item or len(item) < 2:
|
if not item or len(item) < 2:
|
||||||
continue
|
continue
|
||||||
box, rec = item[0], item[1]
|
box, text = item[0], item[1]
|
||||||
text = rec[0] if rec else ""
|
conf = float(item[2]) if len(item) > 2 else 0.0
|
||||||
conf = float(rec[1]) if rec and len(rec) > 1 else 0.0
|
|
||||||
if not text:
|
if not text:
|
||||||
continue
|
continue
|
||||||
if scale_x != 1.0 or scale_y != 1.0:
|
if scale_x != 1.0 or scale_y != 1.0:
|
||||||
box = _scale_box(box, scale_x, scale_y)
|
box = _scale_box(box, scale_x, scale_y)
|
||||||
lines.append(
|
lines.append(
|
||||||
{
|
{
|
||||||
"text": text,
|
"text": str(text),
|
||||||
"confidence": conf,
|
"confidence": conf,
|
||||||
"box": box,
|
"box": box,
|
||||||
"bbox": _bbox_from_box(box),
|
"bbox": _bbox_from_box(box),
|
||||||
@@ -147,17 +102,13 @@ def _run_ocr_impl(content: bytes) -> dict:
|
|||||||
"lines": lines,
|
"lines": lines,
|
||||||
"width": orig_w,
|
"width": orig_w,
|
||||||
"height": orig_h,
|
"height": orig_h,
|
||||||
"engine_mode": _engine_mode,
|
"engine_mode": "rapidocr-onnx",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def run_ocr_on_bytes(content: bytes) -> dict:
|
|
||||||
return _run_ocr_impl(content)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
def health():
|
def health():
|
||||||
return {"status": "ok", "gpu_requested": OCR_USE_GPU, "engine_mode": _engine_mode}
|
return {"status": "ok", "engine": "rapidocr-onnxruntime"}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/ocr/regions")
|
@app.post("/api/ocr/regions")
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# 在带 NVIDIA 显卡的 Linux 机器上安装 OCR Worker(由 deploy/install.sh 自动调用)
|
# OCR Worker 安装(RapidOCR / ONNX,无需 Paddle/GPU)
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
ROOT="$(cd "$(dirname "$0")" && pwd)"
|
ROOT="$(cd "$(dirname "$0")" && pwd)"
|
||||||
@@ -8,6 +8,7 @@ PORT="${OCR_PORT:-23567}"
|
|||||||
PIP_MIRROR="${PIP_MIRROR:-https://pypi.tuna.tsinghua.edu.cn/simple}"
|
PIP_MIRROR="${PIP_MIRROR:-https://pypi.tuna.tsinghua.edu.cn/simple}"
|
||||||
|
|
||||||
echo "==> OCR Worker 安装目录: ${ROOT}"
|
echo "==> OCR Worker 安装目录: ${ROOT}"
|
||||||
|
echo "==> 引擎: RapidOCR (ONNX CPU,无需 cuDNN/GPU)"
|
||||||
|
|
||||||
if ! command -v python3 >/dev/null; then
|
if ! command -v python3 >/dev/null; then
|
||||||
echo "错误: 请先安装 python3"
|
echo "错误: 请先安装 python3"
|
||||||
@@ -15,7 +16,7 @@ if ! command -v python3 >/dev/null; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ -d "${VENV}" ]]; then
|
if [[ -d "${VENV}" ]]; then
|
||||||
echo "==> 已有虚拟环境,跳过 python3 -m venv"
|
echo "==> 已有虚拟环境"
|
||||||
else
|
else
|
||||||
python3 -m venv "${VENV}"
|
python3 -m venv "${VENV}"
|
||||||
fi
|
fi
|
||||||
@@ -23,33 +24,11 @@ fi
|
|||||||
# shellcheck disable=SC1091
|
# shellcheck disable=SC1091
|
||||||
source "${VENV}/bin/activate"
|
source "${VENV}/bin/activate"
|
||||||
pip install -U pip wheel -i "${PIP_MIRROR}"
|
pip install -U pip wheel -i "${PIP_MIRROR}"
|
||||||
|
pip uninstall -y paddlepaddle paddlepaddle-gpu paddleocr 2>/dev/null || true
|
||||||
install_paddle() {
|
|
||||||
local use_gpu="${OCR_USE_GPU:-false}"
|
|
||||||
if [[ "${use_gpu}" == "true" ]] && command -v nvidia-smi >/dev/null 2>&1 && ldconfig -p 2>/dev/null | grep -q libcudnn; then
|
|
||||||
local cuda_major
|
|
||||||
cuda_major="$(nvidia-smi 2>/dev/null | sed -n 's/.*CUDA Version: \([0-9]*\)\.[0-9]*/\1/p' | head -1)"
|
|
||||||
cuda_major="${cuda_major:-11}"
|
|
||||||
echo "==> 安装 paddlepaddle-gpu (CUDA ${cuda_major}.x)…"
|
|
||||||
if [[ "${cuda_major}" -ge 12 ]]; then
|
|
||||||
pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/ \
|
|
||||||
|| pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
|
|
||||||
else
|
|
||||||
pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "==> 安装 paddlepaddle CPU(无 cuDNN 或 OCR_USE_GPU=false)…"
|
|
||||||
pip install paddlepaddle==2.6.2 -i "${PIP_MIRROR}"
|
|
||||||
fi
|
|
||||||
pip install 'protobuf>=3.20,<4' -q
|
|
||||||
}
|
|
||||||
|
|
||||||
install_paddle
|
|
||||||
pip install -r "${ROOT}/requirements.txt" -i "${PIP_MIRROR}"
|
pip install -r "${ROOT}/requirements.txt" -i "${PIP_MIRROR}"
|
||||||
chmod +x "${ROOT}/run.sh" "${ROOT}/start.sh" 2>/dev/null || true
|
chmod +x "${ROOT}/run.sh" "${ROOT}/start.sh" 2>/dev/null || true
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
python3 -c "import fastapi, uvicorn, paddle; print('paddle', paddle.__version__, 'OK')"
|
python3 -c "from rapidocr_onnxruntime import RapidOCR; print('RapidOCR OK')"
|
||||||
echo ""
|
echo ""
|
||||||
echo "==> OCR Worker 安装完成。由 deploy/install.sh 通过 screen 自动启动。"
|
echo "==> 安装完成。管理: bash $(dirname "$ROOT")/ocr-screen.sh status"
|
||||||
echo " 手动管理: bash $(dirname "$ROOT")/ocr-screen.sh status"
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
fastapi==0.115.6
|
fastapi==0.115.6
|
||||||
uvicorn[standard]==0.34.0
|
uvicorn[standard]==0.34.0
|
||||||
python-multipart==0.0.20
|
python-multipart==0.0.20
|
||||||
paddleocr==2.9.1
|
|
||||||
Pillow==11.0.0
|
Pillow==11.0.0
|
||||||
# GPU 版 Paddle 请用 install.sh 安装,勿直接 pip install paddlepaddle
|
rapidocr-onnxruntime>=1.3.0
|
||||||
|
onnxruntime>=1.16.0
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ ROOT="$(cd "$(dirname "$0")" && pwd)"
|
|||||||
VENV="${ROOT}/.venv"
|
VENV="${ROOT}/.venv"
|
||||||
PORT="${OCR_PORT:-23567}"
|
PORT="${OCR_PORT:-23567}"
|
||||||
|
|
||||||
export OCR_USE_GPU="${OCR_USE_GPU:-true}"
|
export OCR_USE_GPU="${OCR_USE_GPU:-false}"
|
||||||
export OCR_HOST="${OCR_HOST:-0.0.0.0}"
|
export OCR_HOST="${OCR_HOST:-0.0.0.0}"
|
||||||
|
|
||||||
if [[ ! -d "${VENV}" ]]; then
|
if [[ ! -d "${VENV}" ]]; then
|
||||||
|
|||||||
Reference in New Issue
Block a user