Files
sanguo_quant_live/zhaoyun-data/scripts/data_acquisition/start_full_download.py
T
2026-04-06 22:45:02 +08:00

156 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""
启动全量分钟数据下载
"""
import os
import sys
import time
import json
import subprocess
from datetime import datetime
print("="*70)
print("🚀 赵云启动全量分钟数据下载")
print("="*70)
# 配置信息
config = {
"base_dir": "/Users/chufeng/nas/stock/minute_kline",
"data_source": "akshare",
"timeframe": "15min", # 从15分钟数据开始
"date_range": {
"start": "2021-01-01",
"end": "2026-03-27"
},
"stock_count": 5500, # 全市场A股数量
"download_mode": "full",
"batch_size": 100,
"max_workers": 5,
"retry_count": 3,
"log_file": "/Users/chufeng/nas/stock/minute_kline/logs/full_download_{}.log".format(
datetime.now().strftime("%Y%m%d_%H%M%S")
)
}
# 保存配置
config_file = os.path.join(config["base_dir"], "full_download_config.json")
with open(config_file, 'w', encoding='utf-8') as f:
json.dump(config, f, ensure_ascii=False, indent=2)
print("📊 配置信息:")
print(f" 存储路径: {config['base_dir']}")
print(f" 数据粒度: {config['timeframe']}")
print(f" 股票数量: {config['stock_count']}")
print(f" 时间范围: {config['date_range']['start']}{config['date_range']['end']}")
print(f" 下载模式: {config['download_mode']}")
print(f" 批次大小: {config['batch_size']} 只/批")
print(f" 并发数: {config['max_workers']}")
# 检查已有数据
print("\n🔍 检查已有数据...")
existing_files = 0
if os.path.exists(os.path.join(config["base_dir"], config["timeframe"])):
existing_files = len([f for f in os.listdir(os.path.join(config["base_dir"], config["timeframe"]))
if f.endswith('.parquet')])
print(f" 已下载股票: {existing_files}")
print(f" 待下载股票: {config['stock_count'] - existing_files}")
print("\n🎯 启动下载命令:")
print("="*70)
# 创建启动脚本
start_script = """#!/bin/bash
# 赵云全量分钟数据下载启动脚本
# 开始时间: {}
cd /Users/chufeng/.openclaw/sanguo_projects/sanguo_quant_live/zhaoyun-data/scripts/data_acquisition
echo "🚀 开始全量分钟数据下载..."
echo "📊 目标: 下载{}只A股的{}数据"
echo "⏱️ 开始时间: $(date)"
# 使用稳定下载器开始下载
python3 -c "
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from minute_kline_collector import MinuteKlineCollector
collector = MinuteKlineCollector(
base_dir='{}',
timeframe='{}',
start_date='{}',
end_date='{}',
batch_size={},
max_workers={},
retry_count={}
)
print('🎯 赵云开始全量下载任务...')
collector.download_all_stocks()
print('✅ 全量下载任务完成!')
"
echo "⏱️ 结束时间: $(date)"
echo "📈 下载总结: 请查看 {}/reports/"
""".format(
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
config["stock_count"],
config["timeframe"],
config["base_dir"],
config["timeframe"],
config["date_range"]["start"],
config["date_range"]["end"],
config["batch_size"],
config["max_workers"],
config["retry_count"],
config["base_dir"]
)
# 保存启动脚本
script_file = os.path.join(os.path.dirname(__file__), "start_full_download.sh")
with open(script_file, 'w', encoding='utf-8') as f:
f.write(start_script)
os.chmod(script_file, 0o755)
print(f"启动脚本: {script_file}")
print("="*70)
print("\n📋 执行步骤:")
print("1. ✅ 配置文件已生成")
print("2. ✅ 启动脚本已创建")
print("3. 🚀 准备开始全量下载")
print("\n⏱️ 时间预估:")
estimated_hours = (config["stock_count"] - existing_files) / (config["batch_size"] * 60) * 2
print(f" 预计完成时间: {estimated_hours:.1f} 小时")
estimated_completion = datetime.fromtimestamp(datetime.now().timestamp() + estimated_hours * 3600)
print(f" 预计完成日期: {estimated_completion:%Y-%m-%d %H:%M}")
print("\n" + "="*70)
print("🎯 赵云立即开始执行全量下载!")
print("="*70)
# 立即启动下载
print("\n🚀 启动下载进程...")
try:
# 启动下载进程
subprocess.Popen(
["nohup", "bash", script_file, "&"],
stdout=open(config["log_file"], 'w'),
stderr=open(config["log_file"], 'a')
)
print(f"✅ 下载进程已启动,日志文件: {config['log_file']}")
print("📊 可以使用以下命令监控进度:")
print(f" tail -f {config['log_file']}")
except Exception as e:
print(f"❌ 启动失败: {e}")
print("\n💡 备用方案: 手动运行启动脚本")
print(f" bash {script_file}")
print("\n" + "="*70)
print("📡 赵云确认: 全量分钟数据下载已启动!")
print("="*70)