Refactor market K-line storage with tiered retention and chunked loading.
Store 1m/5m/1h/12h/1d/1w with per-timeframe policies, aggregate 15m and 2h/4h on read, and support left-pan history fetches via before_ms. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
+268
-72
@@ -1,4 +1,4 @@
|
||||
"""中控 K 线 SQLite 缓存:按需拉取、15 天滚动保留。"""
|
||||
"""中控 K 线 SQLite:分周期保留、本地聚合、分页读取。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -9,21 +9,31 @@ from pathlib import Path
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from hub_ohlcv_lib import (
|
||||
HUB_KLINE_1M_MAX_BARS,
|
||||
HUB_KLINE_5M_1H_RETENTION_DAYS,
|
||||
TIMEFRAME_MS,
|
||||
bar_limit_for_timeframe,
|
||||
chart_fetch_start_ms,
|
||||
format_price_by_tick,
|
||||
last_closed_bar_open_ms,
|
||||
aggregate_ohlcv_bars,
|
||||
aggregate_ratio,
|
||||
aggregation_source_for_display,
|
||||
chart_chunk_limit,
|
||||
chart_initial_limit,
|
||||
chart_memory_cap,
|
||||
history_cutoff_ms_for_storage,
|
||||
normalize_chart_timeframe,
|
||||
normalize_price_tick,
|
||||
format_price_by_tick,
|
||||
last_closed_bar_open_ms,
|
||||
retention_policy_meta,
|
||||
round_ohlcv_bars_to_tick,
|
||||
window_start_ms,
|
||||
seed_bar_target,
|
||||
sync_timeframe_for_display,
|
||||
)
|
||||
|
||||
_DEFAULT_RETENTION_DAYS = 15
|
||||
|
||||
|
||||
def retention_days() -> int:
|
||||
"""兼容旧配置;新策略见 retention_policy_meta。"""
|
||||
try:
|
||||
return max(1, int(os.getenv("HUB_KLINE_RETENTION_DAYS", str(_DEFAULT_RETENTION_DAYS))))
|
||||
except ValueError:
|
||||
@@ -145,18 +155,59 @@ def load_symbol_price_tick(
|
||||
conn.close()
|
||||
|
||||
|
||||
def purge_retention(db_path: Path | None = None, *, days: int | None = None) -> int:
|
||||
"""删除早于 retention 的 K 线;返回删除行数。"""
|
||||
keep = days if days is not None else retention_days()
|
||||
cutoff = int(time.time() * 1000) - keep * 86400000
|
||||
def purge_timeframe_by_days(
|
||||
timeframe: str,
|
||||
days: int,
|
||||
db_path: Path | None = None,
|
||||
) -> int:
|
||||
cutoff = int(time.time() * 1000) - max(1, int(days)) * 86400000
|
||||
tf = normalize_chart_timeframe(timeframe)
|
||||
conn = _connect(db_path)
|
||||
try:
|
||||
cur = conn.execute("DELETE FROM ohlcv_bars WHERE open_time_ms < ?", (cutoff,))
|
||||
cur = conn.execute(
|
||||
"DELETE FROM ohlcv_bars WHERE timeframe=? AND open_time_ms < ?",
|
||||
(tf, cutoff),
|
||||
)
|
||||
return int(cur.rowcount or 0)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def purge_1m_bar_cap(db_path: Path | None = None, *, max_bars: int | None = None) -> int:
|
||||
cap = max(100, int(max_bars or HUB_KLINE_1M_MAX_BARS))
|
||||
conn = _connect(db_path)
|
||||
try:
|
||||
cur = conn.execute(
|
||||
"""
|
||||
DELETE FROM ohlcv_bars
|
||||
WHERE timeframe='1m' AND rowid IN (
|
||||
SELECT rowid FROM (
|
||||
SELECT rowid,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY exchange_key, symbol
|
||||
ORDER BY open_time_ms DESC
|
||||
) AS rn
|
||||
FROM ohlcv_bars
|
||||
WHERE timeframe='1m'
|
||||
) WHERE rn > ?
|
||||
)
|
||||
""",
|
||||
(cap,),
|
||||
)
|
||||
return int(cur.rowcount or 0)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def purge_retention(db_path: Path | None = None) -> int:
|
||||
"""按周期策略清理:5m/1h 一年;1m 保留最近 N 根;12h/1d/1w 不删。"""
|
||||
n = 0
|
||||
n += purge_timeframe_by_days("5m", HUB_KLINE_5M_1H_RETENTION_DAYS, db_path)
|
||||
n += purge_timeframe_by_days("1h", HUB_KLINE_5M_1H_RETENTION_DAYS, db_path)
|
||||
n += purge_1m_bar_cap(db_path)
|
||||
return n
|
||||
|
||||
|
||||
def upsert_bars(
|
||||
exchange_key: str,
|
||||
symbol: str,
|
||||
@@ -233,21 +284,84 @@ def load_bars_range(
|
||||
""",
|
||||
(ex_k, sym, tf, int(start_ms), int(end_ms)),
|
||||
).fetchall()
|
||||
return [
|
||||
{
|
||||
"open_time_ms": int(r["open_time_ms"]),
|
||||
"open": float(r["open"]),
|
||||
"high": float(r["high"]),
|
||||
"low": float(r["low"]),
|
||||
"close": float(r["close"]),
|
||||
"volume": float(r["volume"] or 0),
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
return _rows_to_bars(rows)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def load_bars_latest(
|
||||
exchange_key: str,
|
||||
symbol: str,
|
||||
timeframe: str,
|
||||
limit: int,
|
||||
db_path: Path | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
ex_k = (exchange_key or "").strip().lower()
|
||||
sym = (symbol or "").strip().upper()
|
||||
tf = normalize_chart_timeframe(timeframe)
|
||||
lim = max(1, int(limit))
|
||||
conn = _connect(db_path)
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT open_time_ms, open, high, low, close, volume
|
||||
FROM ohlcv_bars
|
||||
WHERE exchange_key=? AND symbol=? AND timeframe=?
|
||||
ORDER BY open_time_ms DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
(ex_k, sym, tf, lim),
|
||||
).fetchall()
|
||||
return list(reversed(_rows_to_bars(rows)))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def load_bars_before(
|
||||
exchange_key: str,
|
||||
symbol: str,
|
||||
timeframe: str,
|
||||
before_ms: int,
|
||||
limit: int,
|
||||
db_path: Path | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
ex_k = (exchange_key or "").strip().lower()
|
||||
sym = (symbol or "").strip().upper()
|
||||
tf = normalize_chart_timeframe(timeframe)
|
||||
lim = max(1, int(limit))
|
||||
bms = int(before_ms)
|
||||
conn = _connect(db_path)
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT open_time_ms, open, high, low, close, volume
|
||||
FROM ohlcv_bars
|
||||
WHERE exchange_key=? AND symbol=? AND timeframe=?
|
||||
AND open_time_ms < ?
|
||||
ORDER BY open_time_ms DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
(ex_k, sym, tf, bms, lim),
|
||||
).fetchall()
|
||||
return list(reversed(_rows_to_bars(rows)))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _rows_to_bars(rows) -> list[dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"open_time_ms": int(r["open_time_ms"]),
|
||||
"open": float(r["open"]),
|
||||
"high": float(r["high"]),
|
||||
"low": float(r["low"]),
|
||||
"close": float(r["close"]),
|
||||
"volume": float(r["volume"] or 0),
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
def _to_chart_candles(bars: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
out = []
|
||||
for b in bars:
|
||||
@@ -267,15 +381,36 @@ def _to_chart_candles(bars: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
return out
|
||||
|
||||
|
||||
def _merge_bars(*groups: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
merged: dict[int, dict[str, Any]] = {}
|
||||
for g in groups:
|
||||
for b in g or []:
|
||||
try:
|
||||
merged[int(b["open_time_ms"])] = b
|
||||
except (KeyError, TypeError, ValueError):
|
||||
continue
|
||||
return [merged[k] for k in sorted(merged.keys())]
|
||||
def _trim_display_bars(
|
||||
bars: list[dict[str, Any]],
|
||||
*,
|
||||
need: int,
|
||||
before_ms: int | None,
|
||||
) -> list[dict[str, Any]]:
|
||||
if not bars:
|
||||
return []
|
||||
if before_ms is not None and int(before_ms) > 0:
|
||||
bms = int(before_ms)
|
||||
bars = [b for b in bars if int(b["open_time_ms"]) < bms]
|
||||
if len(bars) > need:
|
||||
bars = bars[-need:]
|
||||
return bars
|
||||
if len(bars) > need:
|
||||
bars = bars[-need:]
|
||||
return bars
|
||||
|
||||
|
||||
def _aggregate_display_bars(
|
||||
src_bars: list[dict[str, Any]],
|
||||
display_tf: str,
|
||||
*,
|
||||
need: int,
|
||||
before_ms: int | None,
|
||||
) -> list[dict[str, Any]]:
|
||||
if not src_bars:
|
||||
return []
|
||||
agg = aggregate_ohlcv_bars(src_bars, display_tf)
|
||||
return _trim_display_bars(agg, need=need, before_ms=before_ms)
|
||||
|
||||
|
||||
def resolve_chart_bars(
|
||||
@@ -287,39 +422,71 @@ def resolve_chart_bars(
|
||||
db_path: Path | None = None,
|
||||
force_refresh: bool = False,
|
||||
tail_refresh: bool = False,
|
||||
limit: int | None = None,
|
||||
before_ms: int | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
按需:先读库,不足则 remote_fetch(symbol, timeframe, since_ms, limit) 补齐并写库。
|
||||
tail_refresh=True 时即使库内「够新」也增量拉取尾部 K 线(未收盘 K 的 OHLC 更新)。
|
||||
分页读库:首屏 / 左拖 before_ms / 尾部 tail_refresh。
|
||||
15m←5m,2h/4h←1h 现场聚合;其余直读入库周期。
|
||||
"""
|
||||
init_db(db_path)
|
||||
purged = purge_retention(db_path)
|
||||
|
||||
sym = (symbol or "").strip().upper()
|
||||
ex_k = (exchange_key or "").strip().lower()
|
||||
tf = normalize_chart_timeframe(timeframe)
|
||||
display_tf = normalize_chart_timeframe(timeframe)
|
||||
if not sym or not ex_k:
|
||||
return {"ok": False, "msg": "缺少 exchange 或 symbol"}
|
||||
|
||||
need = bar_limit_for_timeframe(tf)
|
||||
agg_src = aggregation_source_for_display(display_tf)
|
||||
storage_tf = agg_src or sync_timeframe_for_display(display_tf)
|
||||
is_history = before_ms is not None and int(before_ms) > 0
|
||||
need = int(
|
||||
limit
|
||||
or (chart_chunk_limit(display_tf) if is_history else chart_initial_limit(display_tf))
|
||||
)
|
||||
need = max(1, min(need, chart_memory_cap(display_tf)))
|
||||
|
||||
now_ms = int(time.time() * 1000)
|
||||
fetch_start_ms = chart_fetch_start_ms(tf, need, now_ms)
|
||||
db_read_start_ms = window_start_ms(tf, need, retention_days(), now_ms)
|
||||
last_closed = last_closed_bar_open_ms(tf, now_ms)
|
||||
period_display = TIMEFRAME_MS[display_tf]
|
||||
period_storage = TIMEFRAME_MS[storage_tf]
|
||||
ratio = aggregate_ratio(display_tf, storage_tf) if agg_src else 1
|
||||
if tail_refresh and not is_history:
|
||||
need = min(need, max(30, ratio * 6 if agg_src else 20))
|
||||
src_need = need * ratio + ratio * 4
|
||||
cutoff = history_cutoff_ms_for_storage(storage_tf, now_ms)
|
||||
source_kind = "aggregate" if agg_src else "db"
|
||||
|
||||
def load_display_rows() -> list[dict[str, Any]]:
|
||||
if agg_src:
|
||||
if is_history:
|
||||
src = load_bars_before(ex_k, sym, storage_tf, int(before_ms), src_need, db_path)
|
||||
else:
|
||||
src = load_bars_latest(ex_k, sym, storage_tf, src_need, db_path)
|
||||
return _aggregate_display_bars(
|
||||
src, display_tf, need=need, before_ms=before_ms if is_history else None
|
||||
)
|
||||
if is_history:
|
||||
return load_bars_before(ex_k, sym, storage_tf, int(before_ms), need, db_path)
|
||||
return load_bars_latest(ex_k, sym, storage_tf, need, db_path)
|
||||
|
||||
db_rows: list[dict[str, Any]] = []
|
||||
if not force_refresh:
|
||||
period_ms = TIMEFRAME_MS[tf]
|
||||
db_rows = load_bars_range(
|
||||
ex_k, sym, tf, max(0, db_read_start_ms - period_ms), now_ms + period_ms, db_path
|
||||
)
|
||||
db_rows = load_display_rows()
|
||||
|
||||
last_closed = last_closed_bar_open_ms(display_tf, now_ms)
|
||||
newest_db = db_rows[-1]["open_time_ms"] if db_rows else None
|
||||
period_ms = TIMEFRAME_MS[tf]
|
||||
newest_ok = newest_db is not None and int(newest_db) >= int(last_closed) - period_ms
|
||||
need_fetch = force_refresh or len(db_rows) < need or not newest_ok
|
||||
if is_history:
|
||||
newest_ok = True
|
||||
else:
|
||||
newest_ok = newest_db is not None and int(newest_db) >= int(last_closed) - period_display
|
||||
|
||||
need_fetch = force_refresh or (not is_history and (len(db_rows) < need or not newest_ok))
|
||||
if is_history and len(db_rows) < need:
|
||||
need_fetch = True
|
||||
|
||||
tail_only = False
|
||||
if tail_refresh and db_rows and not force_refresh and not need_fetch:
|
||||
if tail_refresh and not is_history and db_rows and not force_refresh and not need_fetch:
|
||||
need_fetch = True
|
||||
tail_only = True
|
||||
|
||||
@@ -328,44 +495,66 @@ def resolve_chart_bars(
|
||||
remote_err: Optional[str] = None
|
||||
|
||||
if need_fetch:
|
||||
since = fetch_start_ms
|
||||
if tail_only and newest_db is not None:
|
||||
since = max(0, int(newest_db) - period_ms * 3)
|
||||
# 仅当库内根数已够且缺口在尾部时做增量拉取;否则全量回看,避免 Gate from>to
|
||||
elif (
|
||||
db_rows
|
||||
and not force_refresh
|
||||
and newest_ok
|
||||
and len(db_rows) >= need
|
||||
):
|
||||
since = max(0, int(newest_db) - period_ms * 2)
|
||||
if is_history:
|
||||
bms = int(before_ms)
|
||||
anchor = bms - period_display
|
||||
since = max(cutoff, anchor - period_storage * src_need)
|
||||
fetch_limit = min(src_need + 20, 1500)
|
||||
elif tail_only:
|
||||
if agg_src:
|
||||
src_tail = load_bars_latest(ex_k, sym, storage_tf, 5, db_path)
|
||||
anchor_ms = int(src_tail[-1]["open_time_ms"]) if src_tail else now_ms
|
||||
else:
|
||||
anchor_ms = int(newest_db) if newest_db is not None else now_ms
|
||||
since = max(cutoff, anchor_ms - period_storage * max(5, ratio * 3))
|
||||
fetch_limit = min(max(20, ratio * 8), 300)
|
||||
else:
|
||||
since = max(cutoff, now_ms - period_storage * min(src_need, seed_bar_target(storage_tf)))
|
||||
fetch_limit = min(
|
||||
seed_bar_target(storage_tf) if force_refresh else src_need + 20,
|
||||
1500,
|
||||
)
|
||||
|
||||
remote = remote_fetch(
|
||||
symbol=sym,
|
||||
timeframe=tf,
|
||||
timeframe=storage_tf,
|
||||
since_ms=since,
|
||||
limit=need + 20,
|
||||
limit=fetch_limit,
|
||||
)
|
||||
if remote.get("ok") and remote.get("bars"):
|
||||
fetched = upsert_bars(ex_k, sym, tf, remote["bars"], db_path)
|
||||
fetched = upsert_bars(ex_k, sym, storage_tf, remote["bars"], db_path)
|
||||
price_tick = remote.get("price_tick")
|
||||
if price_tick is not None:
|
||||
save_symbol_price_tick(ex_k, sym, price_tick, db_path)
|
||||
db_rows = load_bars_range(ex_k, sym, tf, fetch_start_ms, now_ms, db_path)
|
||||
db_rows = load_display_rows()
|
||||
if fetched:
|
||||
source_kind = "remote" if source_kind == "db" else source_kind
|
||||
else:
|
||||
remote_err = remote.get("msg") or remote.get("error") or "实例拉取 K 线失败"
|
||||
if not db_rows:
|
||||
return {"ok": False, "msg": remote_err, "purged": purged}
|
||||
if is_history:
|
||||
exhausted = True
|
||||
else:
|
||||
return {"ok": False, "msg": remote_err, "purged": purged}
|
||||
|
||||
if len(db_rows) > need:
|
||||
db_rows = db_rows[-need:]
|
||||
exhausted = False
|
||||
if is_history:
|
||||
if not db_rows:
|
||||
exhausted = True
|
||||
elif len(db_rows) < need:
|
||||
oldest = int(db_rows[0]["open_time_ms"])
|
||||
if cutoff > 0 and oldest <= cutoff + period_storage:
|
||||
exhausted = True
|
||||
elif fetched == 0:
|
||||
exhausted = True
|
||||
|
||||
if price_tick is None:
|
||||
price_tick = load_symbol_price_tick(ex_k, sym, db_path)
|
||||
if price_tick is None:
|
||||
if price_tick is None and not is_history:
|
||||
try:
|
||||
tick_probe = remote_fetch(
|
||||
symbol=sym,
|
||||
timeframe=tf,
|
||||
timeframe=storage_tf,
|
||||
since_ms=None,
|
||||
limit=3,
|
||||
)
|
||||
@@ -381,20 +570,27 @@ def resolve_chart_bars(
|
||||
round_ohlcv_bars_to_tick(db_rows, price_tick)
|
||||
|
||||
candles = _to_chart_candles(db_rows)
|
||||
if not candles:
|
||||
if not is_history and not candles and not exhausted:
|
||||
return {"ok": False, "msg": remote_err or "无 K 线数据", "purged": purged}
|
||||
|
||||
from_cache = max(0, len(candles) - (1 if fetched else 0))
|
||||
if fetched:
|
||||
from_cache = max(0, len(candles) - min(fetched, len(candles)))
|
||||
oldest_ms = int(db_rows[0]["open_time_ms"]) if db_rows else None
|
||||
newest_ms = int(db_rows[-1]["open_time_ms"]) if db_rows else None
|
||||
|
||||
from_cache = max(0, len(candles) - min(fetched, len(candles))) if fetched else len(candles)
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"symbol": sym,
|
||||
"exchange_key": ex_k,
|
||||
"timeframe": tf,
|
||||
"timeframe": display_tf,
|
||||
"storage_timeframe": storage_tf,
|
||||
"limit": need,
|
||||
"retention_days": retention_days(),
|
||||
"before_ms": int(before_ms) if is_history else None,
|
||||
"oldest_ms": oldest_ms,
|
||||
"newest_ms": newest_ms,
|
||||
"exhausted": exhausted,
|
||||
"source": "remote" if fetched else source_kind,
|
||||
"retention_policy": retention_policy_meta(),
|
||||
"candles": candles,
|
||||
"from_cache": from_cache,
|
||||
"fetched": fetched,
|
||||
|
||||
Reference in New Issue
Block a user