Files
sanguo_quant_live/zhaoyun-data/scripts/data_acquisition/fix_missing_510300.py
T
2026-04-02 08:55:07 +08:00

87 lines
2.7 KiB
Python

#!/usr/bin/env python3
"""
补充下载缺失的 510300.SSE 沪深300ETF 日线数据
"""
import akshare as ak
import pandas as pd
import os
from pathlib import Path
# 配置
BASE_DIR = Path("/Users/chufeng/nas/stock-data/sanguo_quant_live/zhaoyun-data/data/raw/daily")
BASE_DIR.mkdir(parents=True, exist_ok=True)
# 下载沪深300ETF 510300 - 指数ETF,在上海交易所,代码格式: sh510300
symbol = "sh510300"
code = "510300"
print(f"🚀 开始下载 {symbol} 日线数据...")
try:
# 尝试 fund_etf_hist_sina - 新浪ETF历史行情,不需要start_date/end_date,直接全部下载
print("🔍 尝试 fund_etf_hist_sina 接口...")
df = ak.fund_etf_hist_sina(symbol=symbol)
print(f"✅ fund_etf_hist_sina: {len(df)}")
print(f"📋 列名: {list(df.columns)}")
if df.empty:
# 如果不行,试试 fund_etf_hist_em
print("\n🔍 尝试 fund_etf_hist_em 接口...")
df = ak.fund_etf_hist_em(symbol=symbol, period="daily", start_date="20100101", end_date="20260330", adjust="hfq")
print(f"✅ fund_etf_hist_em: {len(df)}")
print(f"📋 列名: {list(df.columns)}")
# 格式化日期
if "日期" in df.columns:
df["trade_date"] = pd.to_datetime(df["日期"])
elif "date" in df.columns:
df["trade_date"] = pd.to_datetime(df["date"])
else:
df["trade_date"] = pd.to_datetime(df.index)
# 格式化列名符合vnpy要求
column_mapping = {
"开盘": "open",
"最高": "high",
"最低": "low",
"收盘": "close",
"成交量": "volume",
"成交额": "amount",
"open": "open",
"high": "high",
"low": "low",
"close": "close",
"volume": "volume",
"amount": "amount",
}
df = df.rename(columns=column_mapping)
# 过滤日期 >= 2010-01-01
df = df[df["trade_date"] >= pd.to_datetime("2010-01-01")]
# 检查列
required_columns = ["trade_date", "open", "high", "low", "close", "volume", "amount"]
for col in required_columns:
if col not in df.columns:
print(f"⚠️ 缺失列: {col}")
# 保存
output_file = BASE_DIR / f"{symbol}_daily.parquet"
df.to_parquet(output_file, compression="snappy", index=False)
if not df.empty:
print(f"\n{symbol}: 下载成功,{len(df)} 条记录")
print(f"📦 保存到: {output_file}")
print(f"📊 数据日期范围: {df['trade_date'].min()}{df['trade_date'].max()}")
else:
print(f"\n{symbol}: 数据仍然为空")
except Exception as e:
import traceback
traceback.print_exc()
print(f"\n{symbol}: 下载失败 - {str(e)}")