156 lines
4.6 KiB
Python
156 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
启动全量分钟数据下载
|
|
"""
|
|
import os
|
|
import sys
|
|
import time
|
|
import json
|
|
import subprocess
|
|
from datetime import datetime
|
|
|
|
print("="*70)
|
|
print("🚀 赵云启动全量分钟数据下载")
|
|
print("="*70)
|
|
|
|
# 配置信息
|
|
config = {
|
|
"base_dir": "/Users/chufeng/nas/stock/minute_kline",
|
|
"data_source": "akshare",
|
|
"timeframe": "15min", # 从15分钟数据开始
|
|
"date_range": {
|
|
"start": "2021-01-01",
|
|
"end": "2026-03-27"
|
|
},
|
|
"stock_count": 5500, # 全市场A股数量
|
|
"download_mode": "full",
|
|
"batch_size": 100,
|
|
"max_workers": 5,
|
|
"retry_count": 3,
|
|
"log_file": "/Users/chufeng/nas/stock/minute_kline/logs/full_download_{}.log".format(
|
|
datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
)
|
|
}
|
|
|
|
# 保存配置
|
|
config_file = os.path.join(config["base_dir"], "full_download_config.json")
|
|
with open(config_file, 'w', encoding='utf-8') as f:
|
|
json.dump(config, f, ensure_ascii=False, indent=2)
|
|
|
|
print("📊 配置信息:")
|
|
print(f" 存储路径: {config['base_dir']}")
|
|
print(f" 数据粒度: {config['timeframe']}")
|
|
print(f" 股票数量: {config['stock_count']} 只")
|
|
print(f" 时间范围: {config['date_range']['start']} 至 {config['date_range']['end']}")
|
|
print(f" 下载模式: {config['download_mode']}")
|
|
print(f" 批次大小: {config['batch_size']} 只/批")
|
|
print(f" 并发数: {config['max_workers']}")
|
|
|
|
# 检查已有数据
|
|
print("\n🔍 检查已有数据...")
|
|
existing_files = 0
|
|
if os.path.exists(os.path.join(config["base_dir"], config["timeframe"])):
|
|
existing_files = len([f for f in os.listdir(os.path.join(config["base_dir"], config["timeframe"]))
|
|
if f.endswith('.parquet')])
|
|
print(f" 已下载股票: {existing_files} 只")
|
|
print(f" 待下载股票: {config['stock_count'] - existing_files} 只")
|
|
|
|
print("\n🎯 启动下载命令:")
|
|
print("="*70)
|
|
|
|
# 创建启动脚本
|
|
start_script = """#!/bin/bash
|
|
# 赵云全量分钟数据下载启动脚本
|
|
# 开始时间: {}
|
|
|
|
cd /Users/chufeng/.openclaw/sanguo_projects/sanguo_quant_live/zhaoyun-data/scripts/data_acquisition
|
|
|
|
echo "🚀 开始全量分钟数据下载..."
|
|
echo "📊 目标: 下载{}只A股的{}数据"
|
|
echo "⏱️ 开始时间: $(date)"
|
|
|
|
# 使用稳定下载器开始下载
|
|
python3 -c "
|
|
import sys
|
|
import os
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from minute_kline_collector import MinuteKlineCollector
|
|
|
|
collector = MinuteKlineCollector(
|
|
base_dir='{}',
|
|
timeframe='{}',
|
|
start_date='{}',
|
|
end_date='{}',
|
|
batch_size={},
|
|
max_workers={},
|
|
retry_count={}
|
|
)
|
|
|
|
print('🎯 赵云开始全量下载任务...')
|
|
collector.download_all_stocks()
|
|
print('✅ 全量下载任务完成!')
|
|
"
|
|
|
|
echo "⏱️ 结束时间: $(date)"
|
|
echo "📈 下载总结: 请查看 {}/reports/"
|
|
""".format(
|
|
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
config["stock_count"],
|
|
config["timeframe"],
|
|
config["base_dir"],
|
|
config["timeframe"],
|
|
config["date_range"]["start"],
|
|
config["date_range"]["end"],
|
|
config["batch_size"],
|
|
config["max_workers"],
|
|
config["retry_count"],
|
|
config["base_dir"]
|
|
)
|
|
|
|
# 保存启动脚本
|
|
script_file = os.path.join(os.path.dirname(__file__), "start_full_download.sh")
|
|
with open(script_file, 'w', encoding='utf-8') as f:
|
|
f.write(start_script)
|
|
|
|
os.chmod(script_file, 0o755)
|
|
|
|
print(f"启动脚本: {script_file}")
|
|
print("="*70)
|
|
|
|
print("\n📋 执行步骤:")
|
|
print("1. ✅ 配置文件已生成")
|
|
print("2. ✅ 启动脚本已创建")
|
|
print("3. 🚀 准备开始全量下载")
|
|
|
|
print("\n⏱️ 时间预估:")
|
|
estimated_hours = (config["stock_count"] - existing_files) / (config["batch_size"] * 60) * 2
|
|
print(f" 预计完成时间: {estimated_hours:.1f} 小时")
|
|
estimated_completion = datetime.fromtimestamp(datetime.now().timestamp() + estimated_hours * 3600)
|
|
print(f" 预计完成日期: {estimated_completion:%Y-%m-%d %H:%M}")
|
|
|
|
print("\n" + "="*70)
|
|
print("🎯 赵云立即开始执行全量下载!")
|
|
print("="*70)
|
|
|
|
# 立即启动下载
|
|
print("\n🚀 启动下载进程...")
|
|
try:
|
|
# 启动下载进程
|
|
subprocess.Popen(
|
|
["nohup", "bash", script_file, "&"],
|
|
stdout=open(config["log_file"], 'w'),
|
|
stderr=open(config["log_file"], 'a')
|
|
)
|
|
print(f"✅ 下载进程已启动,日志文件: {config['log_file']}")
|
|
print("📊 可以使用以下命令监控进度:")
|
|
print(f" tail -f {config['log_file']}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ 启动失败: {e}")
|
|
print("\n💡 备用方案: 手动运行启动脚本")
|
|
print(f" bash {script_file}")
|
|
|
|
print("\n" + "="*70)
|
|
print("📡 赵云确认: 全量分钟数据下载已启动!")
|
|
print("="*70) |