Refactor market K-line storage with tiered retention and chunked loading.

Store 1m/5m/1h/12h/1d/1w with per-timeframe policies, aggregate 15m and 2h/4h on read, and support left-pan history fetches via before_ms.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
dekun
2026-06-08 07:27:16 +08:00
parent 41bdee2416
commit 11cc482599
5 changed files with 762 additions and 148 deletions
+268 -72
View File
@@ -1,4 +1,4 @@
"""中控 K 线 SQLite 缓存:按需拉取、15 天滚动保留"""
"""中控 K 线 SQLite:分周期保留、本地聚合、分页读取"""
from __future__ import annotations
@@ -9,21 +9,31 @@ from pathlib import Path
from typing import Any, Callable, Optional
from hub_ohlcv_lib import (
HUB_KLINE_1M_MAX_BARS,
HUB_KLINE_5M_1H_RETENTION_DAYS,
TIMEFRAME_MS,
bar_limit_for_timeframe,
chart_fetch_start_ms,
format_price_by_tick,
last_closed_bar_open_ms,
aggregate_ohlcv_bars,
aggregate_ratio,
aggregation_source_for_display,
chart_chunk_limit,
chart_initial_limit,
chart_memory_cap,
history_cutoff_ms_for_storage,
normalize_chart_timeframe,
normalize_price_tick,
format_price_by_tick,
last_closed_bar_open_ms,
retention_policy_meta,
round_ohlcv_bars_to_tick,
window_start_ms,
seed_bar_target,
sync_timeframe_for_display,
)
_DEFAULT_RETENTION_DAYS = 15
def retention_days() -> int:
"""兼容旧配置;新策略见 retention_policy_meta。"""
try:
return max(1, int(os.getenv("HUB_KLINE_RETENTION_DAYS", str(_DEFAULT_RETENTION_DAYS))))
except ValueError:
@@ -145,18 +155,59 @@ def load_symbol_price_tick(
conn.close()
def purge_retention(db_path: Path | None = None, *, days: int | None = None) -> int:
"""删除早于 retention 的 K 线;返回删除行数。"""
keep = days if days is not None else retention_days()
cutoff = int(time.time() * 1000) - keep * 86400000
def purge_timeframe_by_days(
timeframe: str,
days: int,
db_path: Path | None = None,
) -> int:
cutoff = int(time.time() * 1000) - max(1, int(days)) * 86400000
tf = normalize_chart_timeframe(timeframe)
conn = _connect(db_path)
try:
cur = conn.execute("DELETE FROM ohlcv_bars WHERE open_time_ms < ?", (cutoff,))
cur = conn.execute(
"DELETE FROM ohlcv_bars WHERE timeframe=? AND open_time_ms < ?",
(tf, cutoff),
)
return int(cur.rowcount or 0)
finally:
conn.close()
def purge_1m_bar_cap(db_path: Path | None = None, *, max_bars: int | None = None) -> int:
cap = max(100, int(max_bars or HUB_KLINE_1M_MAX_BARS))
conn = _connect(db_path)
try:
cur = conn.execute(
"""
DELETE FROM ohlcv_bars
WHERE timeframe='1m' AND rowid IN (
SELECT rowid FROM (
SELECT rowid,
ROW_NUMBER() OVER (
PARTITION BY exchange_key, symbol
ORDER BY open_time_ms DESC
) AS rn
FROM ohlcv_bars
WHERE timeframe='1m'
) WHERE rn > ?
)
""",
(cap,),
)
return int(cur.rowcount or 0)
finally:
conn.close()
def purge_retention(db_path: Path | None = None) -> int:
"""按周期策略清理:5m/1h 一年;1m 保留最近 N 根;12h/1d/1w 不删。"""
n = 0
n += purge_timeframe_by_days("5m", HUB_KLINE_5M_1H_RETENTION_DAYS, db_path)
n += purge_timeframe_by_days("1h", HUB_KLINE_5M_1H_RETENTION_DAYS, db_path)
n += purge_1m_bar_cap(db_path)
return n
def upsert_bars(
exchange_key: str,
symbol: str,
@@ -233,21 +284,84 @@ def load_bars_range(
""",
(ex_k, sym, tf, int(start_ms), int(end_ms)),
).fetchall()
return [
{
"open_time_ms": int(r["open_time_ms"]),
"open": float(r["open"]),
"high": float(r["high"]),
"low": float(r["low"]),
"close": float(r["close"]),
"volume": float(r["volume"] or 0),
}
for r in rows
]
return _rows_to_bars(rows)
finally:
conn.close()
def load_bars_latest(
exchange_key: str,
symbol: str,
timeframe: str,
limit: int,
db_path: Path | None = None,
) -> list[dict[str, Any]]:
ex_k = (exchange_key or "").strip().lower()
sym = (symbol or "").strip().upper()
tf = normalize_chart_timeframe(timeframe)
lim = max(1, int(limit))
conn = _connect(db_path)
try:
rows = conn.execute(
"""
SELECT open_time_ms, open, high, low, close, volume
FROM ohlcv_bars
WHERE exchange_key=? AND symbol=? AND timeframe=?
ORDER BY open_time_ms DESC
LIMIT ?
""",
(ex_k, sym, tf, lim),
).fetchall()
return list(reversed(_rows_to_bars(rows)))
finally:
conn.close()
def load_bars_before(
exchange_key: str,
symbol: str,
timeframe: str,
before_ms: int,
limit: int,
db_path: Path | None = None,
) -> list[dict[str, Any]]:
ex_k = (exchange_key or "").strip().lower()
sym = (symbol or "").strip().upper()
tf = normalize_chart_timeframe(timeframe)
lim = max(1, int(limit))
bms = int(before_ms)
conn = _connect(db_path)
try:
rows = conn.execute(
"""
SELECT open_time_ms, open, high, low, close, volume
FROM ohlcv_bars
WHERE exchange_key=? AND symbol=? AND timeframe=?
AND open_time_ms < ?
ORDER BY open_time_ms DESC
LIMIT ?
""",
(ex_k, sym, tf, bms, lim),
).fetchall()
return list(reversed(_rows_to_bars(rows)))
finally:
conn.close()
def _rows_to_bars(rows) -> list[dict[str, Any]]:
return [
{
"open_time_ms": int(r["open_time_ms"]),
"open": float(r["open"]),
"high": float(r["high"]),
"low": float(r["low"]),
"close": float(r["close"]),
"volume": float(r["volume"] or 0),
}
for r in rows
]
def _to_chart_candles(bars: list[dict[str, Any]]) -> list[dict[str, Any]]:
out = []
for b in bars:
@@ -267,15 +381,36 @@ def _to_chart_candles(bars: list[dict[str, Any]]) -> list[dict[str, Any]]:
return out
def _merge_bars(*groups: list[dict[str, Any]]) -> list[dict[str, Any]]:
merged: dict[int, dict[str, Any]] = {}
for g in groups:
for b in g or []:
try:
merged[int(b["open_time_ms"])] = b
except (KeyError, TypeError, ValueError):
continue
return [merged[k] for k in sorted(merged.keys())]
def _trim_display_bars(
bars: list[dict[str, Any]],
*,
need: int,
before_ms: int | None,
) -> list[dict[str, Any]]:
if not bars:
return []
if before_ms is not None and int(before_ms) > 0:
bms = int(before_ms)
bars = [b for b in bars if int(b["open_time_ms"]) < bms]
if len(bars) > need:
bars = bars[-need:]
return bars
if len(bars) > need:
bars = bars[-need:]
return bars
def _aggregate_display_bars(
src_bars: list[dict[str, Any]],
display_tf: str,
*,
need: int,
before_ms: int | None,
) -> list[dict[str, Any]]:
if not src_bars:
return []
agg = aggregate_ohlcv_bars(src_bars, display_tf)
return _trim_display_bars(agg, need=need, before_ms=before_ms)
def resolve_chart_bars(
@@ -287,39 +422,71 @@ def resolve_chart_bars(
db_path: Path | None = None,
force_refresh: bool = False,
tail_refresh: bool = False,
limit: int | None = None,
before_ms: int | None = None,
) -> dict[str, Any]:
"""
按需:先读库,不足则 remote_fetch(symbol, timeframe, since_ms, limit) 补齐并写库
tail_refresh=True 时即使库内「够新」也增量拉取尾部 K 线(未收盘 K 的 OHLC 更新)
分页读库:首屏 / 左拖 before_ms / 尾部 tail_refresh
15m←5m,2h/4h←1h 现场聚合;其余直读入库周期
"""
init_db(db_path)
purged = purge_retention(db_path)
sym = (symbol or "").strip().upper()
ex_k = (exchange_key or "").strip().lower()
tf = normalize_chart_timeframe(timeframe)
display_tf = normalize_chart_timeframe(timeframe)
if not sym or not ex_k:
return {"ok": False, "msg": "缺少 exchange 或 symbol"}
need = bar_limit_for_timeframe(tf)
agg_src = aggregation_source_for_display(display_tf)
storage_tf = agg_src or sync_timeframe_for_display(display_tf)
is_history = before_ms is not None and int(before_ms) > 0
need = int(
limit
or (chart_chunk_limit(display_tf) if is_history else chart_initial_limit(display_tf))
)
need = max(1, min(need, chart_memory_cap(display_tf)))
now_ms = int(time.time() * 1000)
fetch_start_ms = chart_fetch_start_ms(tf, need, now_ms)
db_read_start_ms = window_start_ms(tf, need, retention_days(), now_ms)
last_closed = last_closed_bar_open_ms(tf, now_ms)
period_display = TIMEFRAME_MS[display_tf]
period_storage = TIMEFRAME_MS[storage_tf]
ratio = aggregate_ratio(display_tf, storage_tf) if agg_src else 1
if tail_refresh and not is_history:
need = min(need, max(30, ratio * 6 if agg_src else 20))
src_need = need * ratio + ratio * 4
cutoff = history_cutoff_ms_for_storage(storage_tf, now_ms)
source_kind = "aggregate" if agg_src else "db"
def load_display_rows() -> list[dict[str, Any]]:
if agg_src:
if is_history:
src = load_bars_before(ex_k, sym, storage_tf, int(before_ms), src_need, db_path)
else:
src = load_bars_latest(ex_k, sym, storage_tf, src_need, db_path)
return _aggregate_display_bars(
src, display_tf, need=need, before_ms=before_ms if is_history else None
)
if is_history:
return load_bars_before(ex_k, sym, storage_tf, int(before_ms), need, db_path)
return load_bars_latest(ex_k, sym, storage_tf, need, db_path)
db_rows: list[dict[str, Any]] = []
if not force_refresh:
period_ms = TIMEFRAME_MS[tf]
db_rows = load_bars_range(
ex_k, sym, tf, max(0, db_read_start_ms - period_ms), now_ms + period_ms, db_path
)
db_rows = load_display_rows()
last_closed = last_closed_bar_open_ms(display_tf, now_ms)
newest_db = db_rows[-1]["open_time_ms"] if db_rows else None
period_ms = TIMEFRAME_MS[tf]
newest_ok = newest_db is not None and int(newest_db) >= int(last_closed) - period_ms
need_fetch = force_refresh or len(db_rows) < need or not newest_ok
if is_history:
newest_ok = True
else:
newest_ok = newest_db is not None and int(newest_db) >= int(last_closed) - period_display
need_fetch = force_refresh or (not is_history and (len(db_rows) < need or not newest_ok))
if is_history and len(db_rows) < need:
need_fetch = True
tail_only = False
if tail_refresh and db_rows and not force_refresh and not need_fetch:
if tail_refresh and not is_history and db_rows and not force_refresh and not need_fetch:
need_fetch = True
tail_only = True
@@ -328,44 +495,66 @@ def resolve_chart_bars(
remote_err: Optional[str] = None
if need_fetch:
since = fetch_start_ms
if tail_only and newest_db is not None:
since = max(0, int(newest_db) - period_ms * 3)
# 仅当库内根数已够且缺口在尾部时做增量拉取;否则全量回看,避免 Gate from>to
elif (
db_rows
and not force_refresh
and newest_ok
and len(db_rows) >= need
):
since = max(0, int(newest_db) - period_ms * 2)
if is_history:
bms = int(before_ms)
anchor = bms - period_display
since = max(cutoff, anchor - period_storage * src_need)
fetch_limit = min(src_need + 20, 1500)
elif tail_only:
if agg_src:
src_tail = load_bars_latest(ex_k, sym, storage_tf, 5, db_path)
anchor_ms = int(src_tail[-1]["open_time_ms"]) if src_tail else now_ms
else:
anchor_ms = int(newest_db) if newest_db is not None else now_ms
since = max(cutoff, anchor_ms - period_storage * max(5, ratio * 3))
fetch_limit = min(max(20, ratio * 8), 300)
else:
since = max(cutoff, now_ms - period_storage * min(src_need, seed_bar_target(storage_tf)))
fetch_limit = min(
seed_bar_target(storage_tf) if force_refresh else src_need + 20,
1500,
)
remote = remote_fetch(
symbol=sym,
timeframe=tf,
timeframe=storage_tf,
since_ms=since,
limit=need + 20,
limit=fetch_limit,
)
if remote.get("ok") and remote.get("bars"):
fetched = upsert_bars(ex_k, sym, tf, remote["bars"], db_path)
fetched = upsert_bars(ex_k, sym, storage_tf, remote["bars"], db_path)
price_tick = remote.get("price_tick")
if price_tick is not None:
save_symbol_price_tick(ex_k, sym, price_tick, db_path)
db_rows = load_bars_range(ex_k, sym, tf, fetch_start_ms, now_ms, db_path)
db_rows = load_display_rows()
if fetched:
source_kind = "remote" if source_kind == "db" else source_kind
else:
remote_err = remote.get("msg") or remote.get("error") or "实例拉取 K 线失败"
if not db_rows:
return {"ok": False, "msg": remote_err, "purged": purged}
if is_history:
exhausted = True
else:
return {"ok": False, "msg": remote_err, "purged": purged}
if len(db_rows) > need:
db_rows = db_rows[-need:]
exhausted = False
if is_history:
if not db_rows:
exhausted = True
elif len(db_rows) < need:
oldest = int(db_rows[0]["open_time_ms"])
if cutoff > 0 and oldest <= cutoff + period_storage:
exhausted = True
elif fetched == 0:
exhausted = True
if price_tick is None:
price_tick = load_symbol_price_tick(ex_k, sym, db_path)
if price_tick is None:
if price_tick is None and not is_history:
try:
tick_probe = remote_fetch(
symbol=sym,
timeframe=tf,
timeframe=storage_tf,
since_ms=None,
limit=3,
)
@@ -381,20 +570,27 @@ def resolve_chart_bars(
round_ohlcv_bars_to_tick(db_rows, price_tick)
candles = _to_chart_candles(db_rows)
if not candles:
if not is_history and not candles and not exhausted:
return {"ok": False, "msg": remote_err or "无 K 线数据", "purged": purged}
from_cache = max(0, len(candles) - (1 if fetched else 0))
if fetched:
from_cache = max(0, len(candles) - min(fetched, len(candles)))
oldest_ms = int(db_rows[0]["open_time_ms"]) if db_rows else None
newest_ms = int(db_rows[-1]["open_time_ms"]) if db_rows else None
from_cache = max(0, len(candles) - min(fetched, len(candles))) if fetched else len(candles)
return {
"ok": True,
"symbol": sym,
"exchange_key": ex_k,
"timeframe": tf,
"timeframe": display_tf,
"storage_timeframe": storage_tf,
"limit": need,
"retention_days": retention_days(),
"before_ms": int(before_ms) if is_history else None,
"oldest_ms": oldest_ms,
"newest_ms": newest_ms,
"exhausted": exhausted,
"source": "remote" if fetched else source_kind,
"retention_policy": retention_policy_meta(),
"candles": candles,
"from_cache": from_cache,
"fetched": fetched,