#!/usr/bin/env python3 """ 验证赵云将军下载的 510300.SSE 数据 """ import pandas as pd import os from datetime import datetime def main(): print("🚀 验证赵云将军下载的 510300.SSE 数据") print("="*60) file_path = "/Users/chufeng/nas/stock-data/sanguo_quant_live/zhaoyun-data/data/raw/daily/sh510300_daily.parquet" print(f"数据文件: {file_path}") print(f"文件存在: {os.path.exists(file_path)}") if not os.path.exists(file_path): print("❌ 文件不存在") return False size_mb = os.path.getsize(file_path) / (1024*1024) print(f"文件大小: {size_mb:.2f} MB") # 读取parquet print("\n📊 读取数据...") df = pd.read_parquet(file_path) print(f"数据总行数: {len(df)}") print(f"数据列名: {list(df.columns)}") print("\n数据预览(前5行):") print(df.head()) print("\n数据尾部(后5行):") print(df.tail()) # 检查日期范围 if 'date' in df.columns: print(f"\n📅 日期范围:") min_date = df['date'].min() max_date = df['date'].max() print(f" 最早日期: {min_date}") print(f" 最新日期: {max_date}") # 统计信息 print("\n📈 数据统计:") print(df.describe()) # 检查必需字段 required_fields = ['open', 'high', 'low', 'close', 'volume'] missing = [f for f in required_fields if f not in df.columns] if missing: print(f"\n❌ 缺少必需字段: {missing}") else: print("\n✅ 所有必需字段都存在") print(" - open") print(" - high") print(" - low") print(" - close") print(" - volume") print("\n" + "="*60) print("✅ 数据验证完成") print(f"标的: 510300.SSE (沪深300ETF)") print(f"文件: {file_path}") print(f"行数: {len(df)}") print(f"价格范围: {df['close'].min():.2f} ~ {df['close'].max():.2f}") print("="*60) print("\n💡 下一步:") print("1. 将这个数据导入到vn.py数据库") print("2. 配置回测API使用这个数据库") print("3. 重启API服务") print("4. 关羽将军开始回测") return True if __name__ == "__main__": main()