From 1ec51f2a46ad528130373acc22b8c10954a15234 Mon Sep 17 00:00:00 2001 From: cfdaily Date: Tue, 5 May 2026 11:25:53 +0800 Subject: [PATCH] auto-sync: 2026-05-05 11:25:53 --- data_platform/backfill_15min_baostock.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/data_platform/backfill_15min_baostock.py b/data_platform/backfill_15min_baostock.py index 4317883b..db70b1a4 100644 --- a/data_platform/backfill_15min_baostock.py +++ b/data_platform/backfill_15min_baostock.py @@ -92,14 +92,10 @@ def code_to_baostock(code: str) -> Tuple[str, str]: def is_backfilled(parquet_path: Path) -> bool: - """检查文件是否已经被BaoStock回补过(通过元数据标记)""" - if not parquet_path.exists(): - return False - try: - df = pd.read_parquet(parquet_path) - return df.attrs.get("source") == "baostock" - except Exception: - return False + """检查文件是否已经被BaoStock回补过""" + # 用标记文件判断 + marker = parquet_path.parent / f".{parquet_path.stem}.baostock" + return marker.exists() def load_progress() -> set: @@ -167,9 +163,6 @@ def fetch_bs_15min(bs_code: str, start_date: str, end_date: str) -> Optional[pd. if bad_ohlc.any(): df = df[~bad_ohlc] - # 标记数据来源 - df.attrs["source"] = "baostock" - return df if not df.empty else None @@ -209,8 +202,10 @@ def backfill_one(code: str, start_date: str, end_date: str, force: bool = False) # 写入新文件 try: df_new = df_new.sort_values("day").reset_index(drop=True) - # 保存时把attrs写入parquet metadata df_new.to_parquet(parquet_path, index=False) + # 写标记文件 + marker = parquet_path.parent / f".{parquet_path.stem}.baostock" + marker.write_text(datetime.now().isoformat()) return "ok", len(df_new) except Exception as e: logger.error("写入 %s 失败: %s", code, e)