"""中控 AI 聊天附件解析。""" from __future__ import annotations import base64 from typing import Any from hub_ai.config import ( CHAT_MAX_ATTACHMENTS, CHAT_MAX_IMAGE_BYTES, CHAT_MAX_TEXT_FILE_BYTES, ) IMAGE_MIMES = { "image/jpeg", "image/jpg", "image/png", "image/webp", "image/gif", } TEXT_MIMES = { "text/plain", "text/markdown", "application/json", } def _guess_mime(filename: str, content_type: str) -> str: ct = (content_type or "").split(";")[0].strip().lower() if ct: return ct name = (filename or "").lower() if name.endswith(".png"): return "image/png" if name.endswith((".jpg", ".jpeg")): return "image/jpeg" if name.endswith(".webp"): return "image/webp" if name.endswith(".gif"): return "image/gif" if name.endswith((".md", ".markdown")): return "text/markdown" if name.endswith(".txt"): return "text/plain" if name.endswith(".json"): return "application/json" return "application/octet-stream" def parse_chat_attachments(raw_files: list[dict[str, Any]]) -> dict[str, Any]: """ raw_files: [{filename, content_type, data: bytes}] 返回 images_b64, attachment_note, attachment_meta, text_append """ images_b64: list[str] = [] meta: list[dict] = [] notes: list[str] = [] text_blocks: list[str] = [] errors: list[str] = [] for item in (raw_files or [])[:CHAT_MAX_ATTACHMENTS]: name = str(item.get("filename") or "file") data = item.get("data") or b"" if not isinstance(data, (bytes, bytearray)): errors.append(f"{name}: 无效数据") continue mime = _guess_mime(name, str(item.get("content_type") or "")) size = len(data) if mime in IMAGE_MIMES: if size > CHAT_MAX_IMAGE_BYTES: errors.append(f"{name}: 图片超过 {CHAT_MAX_IMAGE_BYTES // 1024 // 1024}MB") continue images_b64.append(base64.b64encode(bytes(data)).decode("ascii")) meta.append({"name": name, "kind": "image", "mime": mime, "size": size}) notes.append(f"图片 {name}") continue if mime in TEXT_MIMES or name.lower().endswith((".txt", ".md", ".markdown", ".json")): if size > CHAT_MAX_TEXT_FILE_BYTES: errors.append(f"{name}: 文本超过 {CHAT_MAX_TEXT_FILE_BYTES // 1024}KB") continue try: text = bytes(data).decode("utf-8") except UnicodeDecodeError: errors.append(f"{name}: 非 UTF-8 文本") continue text_blocks.append(f"--- 附件 {name} ---\n{text.strip()}") meta.append({"name": name, "kind": "text", "mime": mime, "size": size}) notes.append(f"文档 {name}") continue errors.append(f"{name}: 不支持的类型(仅图片或 txt/md/json)") attachment_note = ";".join(notes) if notes else "" if errors: attachment_note = (attachment_note + ";" if attachment_note else "") + ";".join(errors) text_append = "\n\n".join(text_blocks) return { "images_b64": images_b64, "attachment_note": attachment_note, "attachment_meta": meta, "text_append": text_append, "errors": errors, }