Files
sanguo_vnpy/archive/2026-04-29-cleanup/scripts/utils/verify_510300_data.py
T
2026-04-29 20:15:43 +08:00

81 lines
2.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
验证赵云将军下载的 510300.SSE 数据
"""
import pandas as pd
import os
from datetime import datetime
def main():
print("🚀 验证赵云将军下载的 510300.SSE 数据")
print("="*60)
file_path = "/Users/chufeng/nas/stock-data/sanguo_quant_live/zhaoyun-data/data/raw/daily/sh510300_daily.parquet"
print(f"数据文件: {file_path}")
print(f"文件存在: {os.path.exists(file_path)}")
if not os.path.exists(file_path):
print("❌ 文件不存在")
return False
size_mb = os.path.getsize(file_path) / (1024*1024)
print(f"文件大小: {size_mb:.2f} MB")
# 读取parquet
print("\n📊 读取数据...")
df = pd.read_parquet(file_path)
print(f"数据总行数: {len(df)}")
print(f"数据列名: {list(df.columns)}")
print("\n数据预览(前5行):")
print(df.head())
print("\n数据尾部(后5行):")
print(df.tail())
# 检查日期范围
if 'date' in df.columns:
print(f"\n📅 日期范围:")
min_date = df['date'].min()
max_date = df['date'].max()
print(f" 最早日期: {min_date}")
print(f" 最新日期: {max_date}")
# 统计信息
print("\n📈 数据统计:")
print(df.describe())
# 检查必需字段
required_fields = ['open', 'high', 'low', 'close', 'volume']
missing = [f for f in required_fields if f not in df.columns]
if missing:
print(f"\n❌ 缺少必需字段: {missing}")
else:
print("\n✅ 所有必需字段都存在")
print(" - open")
print(" - high")
print(" - low")
print(" - close")
print(" - volume")
print("\n" + "="*60)
print("✅ 数据验证完成")
print(f"标的: 510300.SSE (沪深300ETF)")
print(f"文件: {file_path}")
print(f"行数: {len(df)}")
print(f"价格范围: {df['close'].min():.2f} ~ {df['close'].max():.2f}")
print("="*60)
print("\n💡 下一步:")
print("1. 将这个数据导入到vn.py数据库")
print("2. 配置回测API使用这个数据库")
print("3. 重启API服务")
print("4. 关羽将军开始回测")
return True
if __name__ == "__main__":
main()