支持局域网 GPU OCR 服务,配置方式类似 Ollama。

This commit is contained in:
dekun
2026-06-28 14:16:06 +08:00
parent 14bf314544
commit ff0c103dc5
19 changed files with 305 additions and 9 deletions
+3
View File
@@ -16,6 +16,9 @@ CORS_ORIGINS=http://127.0.0.1:23566,http://localhost:23566
OLLAMA_BASE_URL=http://127.0.0.1:11434 OLLAMA_BASE_URL=http://127.0.0.1:11434
OLLAMA_MODEL=qwen2.5:7b OLLAMA_MODEL=qwen2.5:7b
# 局域网 GPU OCR 服务(deploy/ocr-worker),留空则本机 CPU 识别
OCR_SERVICE_URL=
OCR_API_KEY=
OPENAI_BASE_URL=https://api.openai.com/v1 OPENAI_BASE_URL=https://api.openai.com/v1
OPENAI_MODEL=gpt-4o-mini OPENAI_MODEL=gpt-4o-mini
FLUCTUATION_THRESHOLD=0.08 FLUCTUATION_THRESHOLD=0.08
+2
View File
@@ -5,4 +5,6 @@ UPLOAD_DIR=uploads
API_PORT=23568 API_PORT=23568
OLLAMA_BASE_URL=http://127.0.0.1:11434 OLLAMA_BASE_URL=http://127.0.0.1:11434
OLLAMA_MODEL=qwen2.5:7b OLLAMA_MODEL=qwen2.5:7b
OCR_SERVICE_URL=
OCR_API_KEY=
FLUCTUATION_THRESHOLD=0.08 FLUCTUATION_THRESHOLD=0.08
+3
View File
@@ -25,6 +25,9 @@ class Settings(BaseSettings):
OCR_MAX_SIDE: int = 1280 OCR_MAX_SIDE: int = 1280
UPLOAD_MAX_SIDE: int = 2048 UPLOAD_MAX_SIDE: int = 2048
OCR_WARMUP: bool = True OCR_WARMUP: bool = True
OCR_SERVICE_URL: str = ""
OCR_API_KEY: str = ""
OCR_USE_GPU: bool = False
class Config: class Config:
env_file = ".env" env_file = ".env"
+1
View File
@@ -159,6 +159,7 @@ class SystemSettings(Base):
openai_base_url: Mapped[str | None] = mapped_column(String(256), nullable=True) openai_base_url: Mapped[str | None] = mapped_column(String(256), nullable=True)
openai_model: Mapped[str | None] = mapped_column(String(128), nullable=True) openai_model: Mapped[str | None] = mapped_column(String(128), nullable=True)
openai_api_key: Mapped[str | None] = mapped_column(String(512), nullable=True) openai_api_key: Mapped[str | None] = mapped_column(String(512), nullable=True)
ocr_service_url: Mapped[str | None] = mapped_column(String(256), nullable=True)
updated_at: Mapped[datetime] = mapped_column( updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=lambda: datetime.now(timezone.utc) DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
) )
+3
View File
@@ -31,6 +31,7 @@ def settings_to_out(row: SystemSettings) -> SystemSettingsOut:
openai_base_url=row.openai_base_url, openai_base_url=row.openai_base_url,
openai_model=row.openai_model, openai_model=row.openai_model,
openai_api_key_set=bool(row.openai_api_key), openai_api_key_set=bool(row.openai_api_key),
ocr_service_url=row.ocr_service_url,
updated_at=row.updated_at, updated_at=row.updated_at,
) )
@@ -74,6 +75,8 @@ def update_settings(
row.openai_model = data.openai_model or None row.openai_model = data.openai_model or None
if data.openai_api_key is not None and data.openai_api_key.strip(): if data.openai_api_key is not None and data.openai_api_key.strip():
row.openai_api_key = data.openai_api_key.strip() row.openai_api_key = data.openai_api_key.strip()
if data.ocr_service_url is not None:
row.ocr_service_url = data.ocr_service_url.strip() or None
row.updated_at = datetime.now(timezone.utc) row.updated_at = datetime.now(timezone.utc)
db.commit() db.commit()
db.refresh(row) db.refresh(row)
+14 -2
View File
@@ -11,7 +11,7 @@ from sqlalchemy.orm import Session, joinedload
from app.core.config import settings from app.core.config import settings
from app.core.database import SessionLocal, get_db from app.core.database import SessionLocal, get_db
from app.core.deps import get_current_user from app.core.deps import get_current_user
from app.models.user import Subject, User, WrongQuestion, WrongQuestionCategory, WrongQuestionStatus from app.models.user import Subject, SystemSettings, User, WrongQuestion, WrongQuestionCategory, WrongQuestionStatus
from app.schemas import WrongQuestionCategoryEnum, WrongQuestionOut, WrongQuestionUpdate from app.schemas import WrongQuestionCategoryEnum, WrongQuestionOut, WrongQuestionUpdate
from app.services import annotation as annotation_service from app.services import annotation as annotation_service
from app.services import llm as llm_service from app.services import llm as llm_service
@@ -50,6 +50,13 @@ def _expire_stale_processing(wq: WrongQuestion, db: Session) -> None:
db.commit() db.commit()
def _ocr_service_url(db: Session) -> str | None:
row = db.get(SystemSettings, 1)
if row and row.ocr_service_url:
return row.ocr_service_url.strip() or None
return ocr_service.resolve_ocr_service_url()
def _parse_mark_regions(raw: str | None) -> list[dict] | None: def _parse_mark_regions(raw: str | None) -> list[dict] | None:
if not raw: if not raw:
return None return None
@@ -140,9 +147,12 @@ def _process_wrong_question(question_id: uuid.UUID):
wq.error_message = None wq.error_message = None
image_full = Path(settings.UPLOAD_DIR) / wq.image_path image_full = Path(settings.UPLOAD_DIR) / wq.image_path
ocr_url = _ocr_service_url(db)
try: try:
with ThreadPoolExecutor(max_workers=1) as pool: with ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(ocr_service.run_ocr_with_regions, str(image_full)) future = pool.submit(
ocr_service.run_ocr_with_regions, str(image_full), ocr_url
)
ocr_result = future.result(timeout=settings.OCR_TIMEOUT_SECONDS) ocr_result = future.result(timeout=settings.OCR_TIMEOUT_SECONDS)
ocr_text = ocr_result["text"] ocr_text = ocr_result["text"]
ocr_lines = ocr_result["lines"] ocr_lines = ocr_result["lines"]
@@ -166,6 +176,8 @@ def _process_wrong_question(question_id: uuid.UUID):
msg = _short_error(exc, "OCR 识别失败:") msg = _short_error(exc, "OCR 识别失败:")
if "libGL" in str(exc): if "libGL" in str(exc):
msg += " 请在服务器执行: sudo bash deploy/install-ocr-deps.sh && systemctl restart grade-archive" msg += " 请在服务器执行: sudo bash deploy/install-ocr-deps.sh && systemctl restart grade-archive"
elif ocr_url:
msg += f" 请检查 OCR 服务是否可达: {ocr_url} (可浏览器访问 {ocr_url.rstrip('/')}/health"
wq.error_message = msg wq.error_message = msg
db.commit() db.commit()
return return
+2
View File
@@ -74,6 +74,7 @@ class SystemSettingsOut(BaseModel):
openai_base_url: str | None = None openai_base_url: str | None = None
openai_model: str | None = None openai_model: str | None = None
openai_api_key_set: bool = False openai_api_key_set: bool = False
ocr_service_url: str | None = None
updated_at: datetime updated_at: datetime
model_config = {"from_attributes": True} model_config = {"from_attributes": True}
@@ -87,6 +88,7 @@ class SystemSettingsUpdate(BaseModel):
openai_base_url: str | None = None openai_base_url: str | None = None
openai_model: str | None = None openai_model: str | None = None
openai_api_key: str | None = None openai_api_key: str | None = None
ocr_service_url: str | None = None
class AdminProfileUpdate(BaseModel): class AdminProfileUpdate(BaseModel):
+2
View File
@@ -61,6 +61,8 @@ def run_migrations() -> None:
alters.append("ADD COLUMN openai_model VARCHAR(128)") alters.append("ADD COLUMN openai_model VARCHAR(128)")
if "openai_api_key" not in ss_columns: if "openai_api_key" not in ss_columns:
alters.append("ADD COLUMN openai_api_key VARCHAR(512)") alters.append("ADD COLUMN openai_api_key VARCHAR(512)")
if "ocr_service_url" not in ss_columns:
alters.append("ADD COLUMN ocr_service_url VARCHAR(256)")
if alters: if alters:
with engine.begin() as conn: with engine.begin() as conn:
for clause in alters: for clause in alters:
+36 -5
View File
@@ -5,6 +5,7 @@ import threading
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
import httpx
from PIL import Image from PIL import Image
from app.core.config import settings from app.core.config import settings
@@ -23,22 +24,32 @@ def get_ocr_engine():
if _ocr_engine is None: if _ocr_engine is None:
from paddleocr import PaddleOCR from paddleocr import PaddleOCR
use_gpu = settings.OCR_USE_GPU
_ocr_engine = PaddleOCR( _ocr_engine = PaddleOCR(
use_angle_cls=False, use_angle_cls=False,
lang="ch", lang="ch",
show_log=False, show_log=False,
use_gpu=False, use_gpu=use_gpu,
enable_mkldnn=True, enable_mkldnn=not use_gpu,
det_limit_side_len=min(settings.OCR_MAX_SIDE, 1280), det_limit_side_len=min(settings.OCR_MAX_SIDE, 1280),
rec_batch_num=8, rec_batch_num=8,
) )
return _ocr_engine return _ocr_engine
def resolve_ocr_service_url(service_url: str | None = None) -> str | None:
url = (service_url or settings.OCR_SERVICE_URL or "").strip()
return url or None
def uses_remote_ocr(service_url: str | None = None) -> bool:
return resolve_ocr_service_url(service_url) is not None
def warmup_ocr_engine() -> None: def warmup_ocr_engine() -> None:
"""后台预加载 OCR 模型,避免首张图片等待数分钟。""" """后台预加载 OCR 模型,避免首张图片等待数分钟。"""
global _ocr_warmup_started global _ocr_warmup_started
if _ocr_warmup_started or not settings.OCR_WARMUP: if _ocr_warmup_started or not settings.OCR_WARMUP or uses_remote_ocr():
return return
_ocr_warmup_started = True _ocr_warmup_started = True
@@ -110,8 +121,20 @@ def _prepare_ocr_image(image_path: str) -> tuple[str, float, float, int, int, Pa
return str(tmp), scale_x, scale_y, orig_w, orig_h, tmp return str(tmp), scale_x, scale_y, orig_w, orig_h, tmp
def run_ocr_with_regions(image_path: str) -> dict: def _run_remote_ocr(service_url: str, image_path: str) -> dict:
"""Return OCR text plus line-level bounding boxes for annotation.""" url = f"{service_url.rstrip('/')}/api/ocr/regions"
headers: dict[str, str] = {}
if settings.OCR_API_KEY:
headers["X-OCR-Key"] = settings.OCR_API_KEY
with open(image_path, "rb") as handle:
files = {"file": (Path(image_path).name, handle, "image/jpeg")}
with httpx.Client(timeout=settings.OCR_TIMEOUT_SECONDS) as client:
resp = client.post(url, files=files, headers=headers)
resp.raise_for_status()
return resp.json()
def _run_local_ocr(image_path: str) -> dict:
engine = get_ocr_engine() engine = get_ocr_engine()
ocr_path, scale_x, scale_y, orig_w, orig_h, tmp_path = _prepare_ocr_image(image_path) ocr_path, scale_x, scale_y, orig_w, orig_h, tmp_path = _prepare_ocr_image(image_path)
try: try:
@@ -150,6 +173,14 @@ def run_ocr_with_regions(image_path: str) -> dict:
} }
def run_ocr_with_regions(image_path: str, service_url: str | None = None) -> dict:
"""Return OCR text plus line-level bounding boxes for annotation."""
remote = resolve_ocr_service_url(service_url)
if remote:
return _run_remote_ocr(remote, image_path)
return _run_local_ocr(image_path)
def run_ocr(image_path: str) -> str: def run_ocr(image_path: str) -> str:
return run_ocr_with_regions(image_path)["text"] return run_ocr_with_regions(image_path)["text"]
+140
View File
@@ -0,0 +1,140 @@
"""局域网 OCR 服务:在带 NVIDIA 显卡的机器上运行,供成绩档案系统调用。"""
import os
import tempfile
from pathlib import Path
from fastapi import FastAPI, File, Header, HTTPException, UploadFile
from PIL import Image
os.environ.setdefault("OPENCV_IO_ENABLE_OPENEXR", "0")
OCR_MAX_SIDE = int(os.getenv("OCR_MAX_SIDE", "1280"))
OCR_API_KEY = os.getenv("OCR_API_KEY", "").strip()
OCR_USE_GPU = os.getenv("OCR_USE_GPU", "true").lower() in {"1", "true", "yes"}
app = FastAPI(title="Grade Archive OCR Worker", version="1.0.0")
_engine = None
def _check_key(key: str | None) -> None:
if OCR_API_KEY and key != OCR_API_KEY:
raise HTTPException(status_code=401, detail="Invalid OCR API key")
def get_engine():
global _engine
if _engine is None:
from paddleocr import PaddleOCR
_engine = PaddleOCR(
use_angle_cls=False,
lang="ch",
show_log=False,
use_gpu=OCR_USE_GPU,
enable_mkldnn=not OCR_USE_GPU,
det_limit_side_len=min(OCR_MAX_SIDE, 1280),
rec_batch_num=8,
)
return _engine
def _bbox_from_box(box: list) -> list[float]:
xs = [float(p[0]) for p in box]
ys = [float(p[1]) for p in box]
return [min(xs), min(ys), max(xs), max(ys)]
def _scale_box(box: list, scale_x: float, scale_y: float) -> list:
return [[float(p[0]) * scale_x, float(p[1]) * scale_y] for p in box]
def _prepare_image_bytes(content: bytes) -> tuple[bytes, float, float, int, int]:
with Image.open(__import__("io").BytesIO(content)) as img:
img = img.convert("RGB")
orig_w, orig_h = img.size
longest = max(orig_w, orig_h)
if longest <= OCR_MAX_SIDE:
buf = __import__("io").BytesIO()
img.save(buf, format="JPEG", quality=88)
return buf.getvalue(), 1.0, 1.0, orig_w, orig_h
ratio = OCR_MAX_SIDE / longest
new_w = max(1, int(orig_w * ratio))
new_h = max(1, int(orig_h * ratio))
resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
buf = __import__("io").BytesIO()
resized.save(buf, format="JPEG", quality=85)
scale_x = orig_w / new_w
scale_y = orig_h / new_h
return buf.getvalue(), scale_x, scale_y, orig_w, orig_h
def run_ocr_on_bytes(content: bytes) -> dict:
engine = get_engine()
image_bytes, scale_x, scale_y, orig_w, orig_h = _prepare_image_bytes(content)
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
tmp.write(image_bytes)
tmp_path = tmp.name
try:
result = engine.ocr(tmp_path, cls=False)
finally:
Path(tmp_path).unlink(missing_ok=True)
lines: list[dict] = []
if result and result[0]:
for item in result[0]:
if not item or len(item) < 2:
continue
box, rec = item[0], item[1]
text = rec[0] if rec else ""
conf = float(rec[1]) if rec and len(rec) > 1 else 0.0
if not text:
continue
if scale_x != 1.0 or scale_y != 1.0:
box = _scale_box(box, scale_x, scale_y)
lines.append(
{
"text": text,
"confidence": conf,
"box": box,
"bbox": _bbox_from_box(box),
}
)
return {
"text": "\n".join(line["text"] for line in lines),
"lines": lines,
"width": orig_w,
"height": orig_h,
}
@app.on_event("startup")
def warmup():
buf = __import__("io").BytesIO()
Image.new("RGB", (120, 40), color=(255, 255, 255)).save(buf, format="JPEG")
try:
run_ocr_on_bytes(buf.getvalue())
except Exception:
pass
@app.get("/health")
def health():
return {"status": "ok", "gpu": OCR_USE_GPU}
@app.post("/api/ocr/regions")
async def ocr_regions(
file: UploadFile = File(...),
x_ocr_key: str | None = Header(default=None, alias="X-OCR-Key"),
):
_check_key(x_ocr_key)
content = await file.read()
if not content:
raise HTTPException(status_code=400, detail="Empty image")
try:
return run_ocr_on_bytes(content)
except Exception as exc:
raise HTTPException(status_code=500, detail=str(exc)) from exc
+45
View File
@@ -0,0 +1,45 @@
#!/usr/bin/env bash
# 在带 NVIDIA 显卡(如 RTX 3060 Ti)的 Linux 机器上安装 OCR Worker
set -euo pipefail
ROOT="$(cd "$(dirname "$0")" && pwd)"
VENV="${ROOT}/.venv"
PORT="${OCR_PORT:-23567}"
echo "==> OCR Worker 安装目录: ${ROOT}"
if ! command -v python3 >/dev/null; then
echo "请先安装 python3"
exit 1
fi
python3 -m venv "${VENV}"
# shellcheck disable=SC1091
source "${VENV}/bin/activate"
pip install -U pip wheel
# Paddle GPUCUDA 11.8,适配多数 3060 Ti 驱动)
pip install paddlepaddle-gpu==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
pip install -r "${ROOT}/requirements.txt"
cat <<EOF
安装完成。
启动(前台测试):
cd ${ROOT}
OCR_USE_GPU=true OCR_PORT=${PORT} bash start.sh
局域网地址示例:
http://192.168.x.x:${PORT}
在成绩档案「系统设置 → AI 模型」里填写 OCR 服务地址:
http://192.168.x.x:${PORT}
可选环境变量:
OCR_USE_GPU=true # 使用显卡(默认 true
OCR_API_KEY=随机字符串 # 局域网鉴权(可选)
OCR_MAX_SIDE=1280 # 识别长边上限
OCR_PORT=${PORT} # 监听端口
EOF
+17
View File
@@ -0,0 +1,17 @@
[Unit]
Description=Grade Archive OCR Worker (PaddleOCR GPU)
After=network.target
[Service]
Type=simple
WorkingDirectory=/opt/ocr-worker
Environment=OCR_USE_GPU=true
Environment=OCR_PORT=23567
Environment=OCR_HOST=0.0.0.0
# Environment=OCR_API_KEY=请设置随机密钥
ExecStart=/opt/ocr-worker/.venv/bin/uvicorn app:app --host 0.0.0.0 --port 23567
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target
+6
View File
@@ -0,0 +1,6 @@
fastapi==0.115.6
uvicorn[standard]==0.34.0
python-multipart==0.0.20
paddleocr==2.9.1
Pillow==11.0.0
# GPU 版 Paddle 请用 install.sh 安装,勿直接 pip install paddlepaddle
+17
View File
@@ -0,0 +1,17 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT="$(cd "$(dirname "$0")" && pwd)"
VENV="${ROOT}/.venv"
HOST="${OCR_HOST:-0.0.0.0}"
PORT="${OCR_PORT:-23567}"
if [[ ! -d "${VENV}" ]]; then
echo "未找到虚拟环境,请先运行: bash install.sh"
exit 1
fi
# shellcheck disable=SC1091
source "${VENV}/bin/activate"
cd "${ROOT}"
exec uvicorn app:app --host "${HOST}" --port "${PORT}"
File diff suppressed because one or more lines are too long
+1 -1
View File
@@ -9,7 +9,7 @@
<meta name="author" content="马建军" /> <meta name="author" content="马建军" />
<meta name="copyright" content="Copyright (c) 马建军. All rights reserved." /> <meta name="copyright" content="Copyright (c) 马建军. All rights reserved." />
<title>中学成绩档案</title> <title>中学成绩档案</title>
<script type="module" crossorigin src="/assets/index-DzzkB1zh.js"></script> <script type="module" crossorigin src="/assets/index-19dlnnB9.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-GY2etMYN.css"> <link rel="stylesheet" crossorigin href="/assets/index-GY2etMYN.css">
</head> </head>
<body> <body>
+1
View File
@@ -78,6 +78,7 @@ export const adminApi = {
openai_base_url?: string | null openai_base_url?: string | null
openai_model?: string | null openai_model?: string | null
openai_api_key?: string openai_api_key?: string
ocr_service_url?: string | null
}) => api.patch<SystemSettings>('/admin/settings', data), }) => api.patch<SystemSettings>('/admin/settings', data),
updateProfile: (data: { updateProfile: (data: {
username?: string username?: string
+10
View File
@@ -52,6 +52,7 @@ export default function SettingsPage() {
openai_base_url: settingsRes.data.openai_base_url || '', openai_base_url: settingsRes.data.openai_base_url || '',
openai_model: settingsRes.data.openai_model || '', openai_model: settingsRes.data.openai_model || '',
openai_api_key: '', openai_api_key: '',
ocr_service_url: settingsRes.data.ocr_service_url || '',
}) })
} finally { } finally {
setLoading(false) setLoading(false)
@@ -115,6 +116,7 @@ export default function SettingsPage() {
openai_base_url?: string openai_base_url?: string
openai_model?: string openai_model?: string
openai_api_key?: string openai_api_key?: string
ocr_service_url?: string
}) => { }) => {
const payload: Parameters<typeof adminApi.updateSettings>[0] = { const payload: Parameters<typeof adminApi.updateSettings>[0] = {
ai_provider: values.ai_provider, ai_provider: values.ai_provider,
@@ -122,6 +124,7 @@ export default function SettingsPage() {
ollama_model: values.ollama_model || null, ollama_model: values.ollama_model || null,
openai_base_url: values.openai_base_url || null, openai_base_url: values.openai_base_url || null,
openai_model: values.openai_model || null, openai_model: values.openai_model || null,
ocr_service_url: values.ocr_service_url?.trim() || null,
} }
if (values.openai_api_key?.trim()) { if (values.openai_api_key?.trim()) {
payload.openai_api_key = values.openai_api_key.trim() payload.openai_api_key = values.openai_api_key.trim()
@@ -234,6 +237,13 @@ export default function SettingsPage() {
</Form.Item> </Form.Item>
</> </>
)} )}
<Form.Item
name="ocr_service_url"
label="OCR 服务地址(局域网 GPU 机器)"
extra="留空则在应用服务器本机 CPU 识别。填写后类似 Ollama,例如 http://192.168.8.100:23567"
>
<Input placeholder="http://192.168.8.100:23567" />
</Form.Item>
<Typography.Paragraph type="secondary"> <Typography.Paragraph type="secondary">
// //
</Typography.Paragraph> </Typography.Paragraph>
+1
View File
@@ -23,6 +23,7 @@ export interface SystemSettings {
openai_base_url: string | null openai_base_url: string | null
openai_model: string | null openai_model: string | null
openai_api_key_set: boolean openai_api_key_set: boolean
ocr_service_url: string | null
updated_at: string updated_at: string
} }