1c50ebc0ec
- 新增 deploy/install-ocr-deps.sh,update.sh 自动检查 - install.sh 预装 OpenGL/Mesa 库
87 lines
2.4 KiB
Python
87 lines
2.4 KiB
Python
from pathlib import Path
|
|
|
|
import os
|
|
|
|
from PIL import Image
|
|
|
|
from app.core.config import settings
|
|
|
|
# 无图形界面服务器:避免 OpenCV/Paddle 依赖 X11
|
|
os.environ.setdefault("OPENCV_IO_ENABLE_OPENEXR", "0")
|
|
|
|
_ocr_engine = None
|
|
|
|
|
|
def get_ocr_engine():
|
|
global _ocr_engine
|
|
if _ocr_engine is None:
|
|
from paddleocr import PaddleOCR
|
|
|
|
_ocr_engine = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
|
|
return _ocr_engine
|
|
|
|
|
|
def _bbox_from_box(box: list) -> list[float]:
|
|
xs = [float(p[0]) for p in box]
|
|
ys = [float(p[1]) for p in box]
|
|
return [min(xs), min(ys), max(xs), max(ys)]
|
|
|
|
|
|
def run_ocr_with_regions(image_path: str) -> dict:
|
|
"""Return OCR text plus line-level bounding boxes for annotation."""
|
|
engine = get_ocr_engine()
|
|
result = engine.ocr(image_path, cls=True)
|
|
lines: list[dict] = []
|
|
if result and result[0]:
|
|
for item in result[0]:
|
|
if not item or len(item) < 2:
|
|
continue
|
|
box, rec = item[0], item[1]
|
|
text = rec[0] if rec else ""
|
|
conf = float(rec[1]) if rec and len(rec) > 1 else 0.0
|
|
if not text:
|
|
continue
|
|
lines.append(
|
|
{
|
|
"text": text,
|
|
"confidence": conf,
|
|
"box": box,
|
|
"bbox": _bbox_from_box(box),
|
|
}
|
|
)
|
|
|
|
width, height = 0, 0
|
|
try:
|
|
with Image.open(image_path) as img:
|
|
width, height = img.size
|
|
except OSError:
|
|
pass
|
|
|
|
return {
|
|
"text": "\n".join(line["text"] for line in lines),
|
|
"lines": lines,
|
|
"width": width,
|
|
"height": height,
|
|
}
|
|
|
|
|
|
def run_ocr(image_path: str) -> str:
|
|
return run_ocr_with_regions(image_path)["text"]
|
|
|
|
|
|
def save_upload_file(user_id: str, question_id: str, filename: str, content: bytes) -> str:
|
|
ext = Path(filename).suffix.lower() or ".jpg"
|
|
if ext not in {".jpg", ".jpeg", ".png", ".webp"}:
|
|
ext = ".jpg"
|
|
user_dir = Path(settings.UPLOAD_DIR) / user_id
|
|
user_dir.mkdir(parents=True, exist_ok=True)
|
|
rel_path = f"{user_id}/{question_id}{ext}"
|
|
full_path = Path(settings.UPLOAD_DIR) / rel_path
|
|
full_path.write_bytes(content)
|
|
return rel_path
|
|
|
|
|
|
def annotated_rel_path(original_rel: str) -> str:
|
|
p = Path(original_rel)
|
|
return str(p.parent / f"{p.stem}_marked.jpg")
|