qihuo/doc_render.py

# Copyright (c) 2025-2026 马建军. All rights reserved.
# 专有软件 — 未经授权禁止复制、传播、转售。
# 详见 LICENSE.zh-CN.txt

"""将项目 docs 下的 Markdown 转为安全 HTML（无第三方依赖）。"""
from __future__ import annotations

import html
import re
from pathlib import Path

_DOCS_ROOT = Path(__file__).resolve().parent / "docs"

ALLOWED_DOCS: dict[str, str] = {
    "risk-guide": "风控说明.md",
}


def docs_root() -> Path:
    return _DOCS_ROOT


def read_doc(slug: str) -> tuple[str, str]:
    """返回 (title, raw_markdown)。"""
    name = ALLOWED_DOCS.get(slug)
    if not name:
        raise FileNotFoundError(slug)
    path = (_DOCS_ROOT / name).resolve()
    if not path.is_file() or _DOCS_ROOT.resolve() not in path.parents:
        raise FileNotFoundError(slug)
    text = path.read_text(encoding="utf-8")
    title = name
    for line in text.splitlines():
        s = line.strip()
        if s.startswith("# "):
            title = s[2:].strip()
            break
    return title, text


def _inline(text: str) -> str:
    s = html.escape(text)
    s = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", s)
    s = re.sub(r"`([^`]+)`", r"<code>\1</code>", s)
    s = re.sub(
        r"\[([^\]]+)\]\(([^)]+)\)",
        lambda m: _link_html(m.group(1), m.group(2)),
        s,
    )
    return s


def _link_html(label: str, href: str) -> str:
    h = html.escape(href)
    lbl = _inline(label)
    if href.startswith(("http://", "https://", "mailto:")):
        return f'<a href="{h}" target="_blank" rel="noopener noreferrer">{lbl}</a>'
    if href.endswith(".md") or href.startswith("./"):
        return f'<span class="doc-xref">{lbl}</span>'
    return f'<a href="{h}">{lbl}</a>'


def render_markdown(text: str) -> str:
    lines = text.splitlines()
    out: list[str] = []
    i = 0
    in_ul = False
    in_ol = False

    def close_lists() -> None:
        nonlocal in_ul, in_ol
        if in_ul:
            out.append("</ul>")
            in_ul = False
        if in_ol:
            out.append("</ol>")
            in_ol = False

    while i < len(lines):
        line = lines[i]
        stripped = line.strip()

        if not stripped:
            close_lists()
            i += 1
            continue

        if stripped == "---":
            close_lists()
            out.append("<hr>")
            i += 1
            continue

        if stripped.startswith("|") and stripped.endswith("|"):
            close_lists()
            table_lines: list[str] = []
            while i < len(lines) and lines[i].strip().startswith("|"):
                table_lines.append(lines[i].strip())
                i += 1
            out.append(_render_table(table_lines))
            continue

        if stripped.startswith("### "):
            close_lists()
            out.append(f"<h3>{_inline(stripped[4:])}</h3>")
            i += 1
            continue
        if stripped.startswith("## "):
            close_lists()
            out.append(f"<h2>{_inline(stripped[3:])}</h2>")
            i += 1
            continue
        if stripped.startswith("# "):
            close_lists()
            out.append(f"<h1>{_inline(stripped[2:])}</h1>")
            i += 1
            continue

        if re.match(r"^[-*]\s+", stripped):
            if not in_ul:
                close_lists()
                out.append("<ul>")
                in_ul = True
            item_text = re.sub(r"^[-*]\s+", "", stripped)
            out.append(f"<li>{_inline(item_text)}</li>")
            i += 1
            continue

        if re.match(r"^\d+\.\s+", stripped):
            if not in_ol:
                close_lists()
                out.append("<ol>")
                in_ol = True
            item_text = re.sub(r"^\d+\.\s+", "", stripped)
            out.append(f"<li>{_inline(item_text)}</li>")
            i += 1
            continue

        close_lists()
        para = stripped
        i += 1
        while i < len(lines):
            nxt = lines[i].strip()
            if not nxt or nxt == "---" or nxt.startswith("#") or nxt.startswith("|") or re.match(r"^[-*]\s+", nxt):
                break
            para += " " + nxt
            i += 1
        out.append(f"<p>{_inline(para)}</p>")

    close_lists()
    return "\n".join(out)


def _render_table(rows: list[str]) -> str:
    if len(rows) < 2:
        return ""
    header = [c.strip() for c in rows[0].strip("|").split("|")]
    body_rows = rows[2:] if len(rows) > 2 and re.match(r"^[\|\s:-]+$", rows[1]) else rows[1:]
    parts = ["<table class=\"doc-table\">", "<thead><tr>"]
    for cell in header:
        parts.append(f"<th>{_inline(cell)}</th>")
    parts.append("</tr></thead><tbody>")
    for row in body_rows:
        cells = [c.strip() for c in row.strip("|").split("|")]
        parts.append("<tr>")
        for cell in cells:
            parts.append(f"<td>{_inline(cell)}</td>")
        parts.append("</tr>")
    parts.append("</tbody></table>")
    return "".join(parts)