from pathlib import Path import os from PIL import Image from app.core.config import settings # 无图形界面服务器:避免 OpenCV/Paddle 依赖 X11 os.environ.setdefault("OPENCV_IO_ENABLE_OPENEXR", "0") _ocr_engine = None def get_ocr_engine(): global _ocr_engine if _ocr_engine is None: from paddleocr import PaddleOCR _ocr_engine = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False) return _ocr_engine def _bbox_from_box(box: list) -> list[float]: xs = [float(p[0]) for p in box] ys = [float(p[1]) for p in box] return [min(xs), min(ys), max(xs), max(ys)] def run_ocr_with_regions(image_path: str) -> dict: """Return OCR text plus line-level bounding boxes for annotation.""" engine = get_ocr_engine() result = engine.ocr(image_path, cls=True) lines: list[dict] = [] if result and result[0]: for item in result[0]: if not item or len(item) < 2: continue box, rec = item[0], item[1] text = rec[0] if rec else "" conf = float(rec[1]) if rec and len(rec) > 1 else 0.0 if not text: continue lines.append( { "text": text, "confidence": conf, "box": box, "bbox": _bbox_from_box(box), } ) width, height = 0, 0 try: with Image.open(image_path) as img: width, height = img.size except OSError: pass return { "text": "\n".join(line["text"] for line in lines), "lines": lines, "width": width, "height": height, } def run_ocr(image_path: str) -> str: return run_ocr_with_regions(image_path)["text"] def save_upload_file(user_id: str, question_id: str, filename: str, content: bytes) -> str: ext = Path(filename).suffix.lower() or ".jpg" if ext not in {".jpg", ".jpeg", ".png", ".webp"}: ext = ".jpg" user_dir = Path(settings.UPLOAD_DIR) / user_id user_dir.mkdir(parents=True, exist_ok=True) rel_path = f"{user_id}/{question_id}{ext}" full_path = Path(settings.UPLOAD_DIR) / rel_path full_path.write_bytes(content) return rel_path def annotated_rel_path(original_rel: str) -> str: p = Path(original_rel) return str(p.parent / f"{p.stem}_marked.jpg")