From 8ec36d47c7741fda617767a5f6c926d57ef4f18b Mon Sep 17 00:00:00 2001 From: cfdaily Date: Mon, 6 Apr 2026 22:45:02 +0800 Subject: [PATCH] auto-sync: 2026-04-06 22:45:02 --- ...ata_collection_report_20260406_224131.json | 35 +++++++++++ .../a_stock_daily_data_downloader.py | 27 ++++++-- .../data_acquisition/start_full_download.py | 3 +- .../data_acquisition/start_full_download.sh | 2 +- .../data_acquisition/test_daily_api.py | 61 +++++++++++++++++++ .../data_acquisition/test_fix_daily.py | 53 ++++++++++++++++ .../data_acquisition/test_nas_environment.py | 49 +++++++++++++++ 7 files changed, 222 insertions(+), 8 deletions(-) create mode 100644 zhaoyun-data/data/reports/daily_data_collection_report_20260406_224131.json create mode 100644 zhaoyun-data/scripts/data_acquisition/test_daily_api.py create mode 100644 zhaoyun-data/scripts/data_acquisition/test_fix_daily.py create mode 100644 zhaoyun-data/scripts/data_acquisition/test_nas_environment.py diff --git a/zhaoyun-data/data/reports/daily_data_collection_report_20260406_224131.json b/zhaoyun-data/data/reports/daily_data_collection_report_20260406_224131.json new file mode 100644 index 000000000..2b5a5ddda --- /dev/null +++ b/zhaoyun-data/data/reports/daily_data_collection_report_20260406_224131.json @@ -0,0 +1,35 @@ +{ + "report_time": "2026-04-06T22:41:31.783979", + "collection_summary": { + "start_time": "2026-04-06T22:41:19.715578", + "total_stocks": 20, + "collected_success": 0, + "collected_failed": 20, + "success_rate": "0.00%", + "total_records": 0 + }, + "data_files": [], + "failed_stocks": [ + "000004", + "000002", + "000007", + "000006", + "000001", + "000011", + "000012", + "000010", + "000008", + "000009", + "000019", + "000017", + "000016", + "000014", + "000020", + "000028", + "000021", + "000026", + "000027", + "000025" + ], + "collection_status": "✅ 成功" +} \ No newline at end of file diff --git a/zhaoyun-data/scripts/data_acquisition/a_stock_daily_data_downloader.py b/zhaoyun-data/scripts/data_acquisition/a_stock_daily_data_downloader.py index 96da2504a..12444a87f 100644 --- a/zhaoyun-data/scripts/data_acquisition/a_stock_daily_data_downloader.py +++ b/zhaoyun-data/scripts/data_acquisition/a_stock_daily_data_downloader.py @@ -168,9 +168,11 @@ class AStockDailyDownloader: try: logger.debug(f"下载 {symbol} ({name}) 尝试 {attempt + 1}/{self.retry_count}") - # 获取日线数据 - df = ak.stock_zh_a_daily( - symbol=symbol, + # 获取日线数据 - 使用 stock_zh_a_hist 接口(更稳定) + # symbol格式: 纯代码,不需要sh/sz前缀 + df = ak.stock_zh_a_hist( + symbol=code, + period="daily", start_date=self.start_date.strftime("%Y%m%d"), end_date=self.end_date.strftime("%Y%m%d"), adjust="hfq" # 后复权 @@ -184,9 +186,22 @@ class AStockDailyDownloader: # 数据清理 df = df.copy() - # 重置索引,确保日期列存在 - if 'date' not in df.columns: - df = df.reset_index() + # 转换中文列名到英文 + column_mapping = { + '日期': 'date', + '股票代码': 'code', + '开盘': 'open', + '收盘': 'close', + '最高': 'high', + '最低': 'low', + '成交量': 'volume', + '成交额': 'amount', + '振幅': 'amplitude', + '涨跌幅': 'change_percent', + '涨跌额': 'change_amount', + '换手率': 'turnover' + } + df = df.rename(columns=column_mapping) # 标准化列名 df.columns = [col.lower().strip() for col in df.columns] diff --git a/zhaoyun-data/scripts/data_acquisition/start_full_download.py b/zhaoyun-data/scripts/data_acquisition/start_full_download.py index 0e92eef2d..75b00fd0c 100644 --- a/zhaoyun-data/scripts/data_acquisition/start_full_download.py +++ b/zhaoyun-data/scripts/data_acquisition/start_full_download.py @@ -126,7 +126,8 @@ print("3. 🚀 准备开始全量下载") print("\n⏱️ 时间预估:") estimated_hours = (config["stock_count"] - existing_files) / (config["batch_size"] * 60) * 2 print(f" 预计完成时间: {estimated_hours:.1f} 小时") -print(f" 预计完成日期: {(datetime.now().timestamp() + estimated_hours * 3600):%Y-%m-%d %H:%M}") +estimated_completion = datetime.fromtimestamp(datetime.now().timestamp() + estimated_hours * 3600) +print(f" 预计完成日期: {estimated_completion:%Y-%m-%d %H:%M}") print("\n" + "="*70) print("🎯 赵云立即开始执行全量下载!") diff --git a/zhaoyun-data/scripts/data_acquisition/start_full_download.sh b/zhaoyun-data/scripts/data_acquisition/start_full_download.sh index a9258f5f4..dd737085f 100755 --- a/zhaoyun-data/scripts/data_acquisition/start_full_download.sh +++ b/zhaoyun-data/scripts/data_acquisition/start_full_download.sh @@ -1,6 +1,6 @@ #!/bin/bash # 赵云全量分钟数据下载启动脚本 -# 开始时间: 2026-03-27 12:58:32 +# 开始时间: 2026-04-06 22:44:42 cd /Users/chufeng/.openclaw/sanguo_projects/sanguo_quant_live/zhaoyun-data/scripts/data_acquisition diff --git a/zhaoyun-data/scripts/data_acquisition/test_daily_api.py b/zhaoyun-data/scripts/data_acquisition/test_daily_api.py new file mode 100644 index 000000000..1f54583d6 --- /dev/null +++ b/zhaoyun-data/scripts/data_acquisition/test_daily_api.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +测试日线数据API +""" +import akshare as ak +import pandas as pd +import sys + +print("测试AKShare日线数据API...") +print(f"AKShare版本: {ak.__version__}") + +# 测试不同的参数格式 +test_cases = [ + {"symbol": "sh000001", "name": "上证指数-sh格式"}, + {"symbol": "sz000001", "name": "平安银行-sz格式"}, + {"symbol": "000001", "name": "平安银行-纯代码"}, +] + +for test in test_cases: + print(f"\n{'='*60}") + print(f"测试: {test['name']} - {test['symbol']}") + print(f"{'='*60}") + + try: + df = ak.stock_zh_a_daily( + symbol=test['symbol'], + start_date="20240101", + end_date="20241231", + adjust="hfq" + ) + + if df is not None and not df.empty: + print(f"✅ 成功! 获取到 {len(df)} 条记录") + print(f"列名: {list(df.columns)}") + print(f"前5行:\n{df.head()}") + else: + print(f"❌ 失败! 返回空数据") + + except Exception as e: + print(f"❌ 异常: {e}") + +# 尝试新接口 +print(f"\n{'='*60}") +print("尝试新接口: stock_zh_a_hist") +print(f"{'='*60}") + +try: + df = ak.stock_zh_a_hist( + symbol="000001", + period="daily", + start_date="20240101", + end_date="20241231", + adjust="hfq" + ) + + if df is not None and not df.empty: + print(f"✅ 成功! 获取到 {len(df)} 条记录") + print(f"列名: {list(df.columns)}") + print(f"前5行:\n{df.head()}") +except Exception as e: + print(f"❌ 异常: {e}") diff --git a/zhaoyun-data/scripts/data_acquisition/test_fix_daily.py b/zhaoyun-data/scripts/data_acquisition/test_fix_daily.py new file mode 100644 index 000000000..1a08d8f6f --- /dev/null +++ b/zhaoyun-data/scripts/data_acquisition/test_fix_daily.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +测试修复后的日线数据下载 +""" +import sys +import os +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +from a_stock_daily_data_downloader import AStockDailyDownloader + +print("="*70) +print("🧪 测试修复后的日线数据下载") +print("="*70) + +# 创建下载器,只测试10只股票 +downloader = AStockDailyDownloader( + base_dir="/tmp/test_daily_download", + start_date="2024-01-01", + end_date="2024-12-31" +) + +# 获取股票列表 +stocks = downloader.get_all_a_stock_codes() +print(f"\n📊 获取到 {len(stocks)} 只股票") + +# 测试前10只 +print("\n🧪 测试前10只股票下载...") +success_count = 0 +fail_count = 0 + +for i, stock in enumerate(stocks[:10]): + print(f"\n{i+1}. {stock['symbol']} {stock['name']}:", end=" ") + df = downloader.download_stock_daily( + stock['symbol'], + stock['code'], + stock['name'] + ) + + if df is not None and not df.empty: + print(f"✅ {len(df)} 条记录") + success_count += 1 + else: + print(f"❌ 失败") + fail_count += 1 + +print(f"\n{'='*70}") +print(f"📊 测试结果: 成功 {success_count}, 失败 {fail_count}, 成功率 {success_count/10*100:.1f}%") +print(f"{'='*70}") + +if success_count > 0: + print("\n🎉 修复成功!可以开始全量下载了") +else: + print("\n❌ 还有问题,需要继续修复") diff --git a/zhaoyun-data/scripts/data_acquisition/test_nas_environment.py b/zhaoyun-data/scripts/data_acquisition/test_nas_environment.py new file mode 100644 index 000000000..be8a95c60 --- /dev/null +++ b/zhaoyun-data/scripts/data_acquisition/test_nas_environment.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +""" +测试NAS环境和数据源可用性 +""" +import sys +import os +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +from minute_kline_collector import MinuteKlineCollector + +print("="*70) +print("🧪 测试NAS环境和数据源") +print("="*70) + +# 使用NAS路径初始化收集器 +collector = MinuteKlineCollector(base_dir="/Users/chufeng/nas/stock/minute_kline") + +# 环境测试 +print("\n🔍 开始环境测试...") +test_results = collector.test_environment() + +print("\n📊 环境测试结果:") +for key, value in test_results.items(): + if isinstance(value, bool): + print(f" {key}: {'✅ 通过' if value else '❌ 失败'}") + +if test_results.get("all_passed"): + print("\n✅ 环境测试全部通过!") +else: + print("\n❌ 环境测试有失败项,请检查配置") + sys.exit(1) + +# 数据源测试 +print("\n🔍 开始数据源测试...") +source_results = collector.test_data_source() + +print("\n📊 数据源测试结果:") +for timeframe, result in source_results.get("timeframes", {}).items(): + print(f" {timeframe}: {result['status']} - {result['record_count']} 条记录") + +if all(result["status"] == "available" for result in source_results.get("timeframes", {}).values()): + print("\n✅ 数据源测试全部通过!") +else: + print("\n❌ 数据源测试有失败项,请检查网络") + sys.exit(1) + +print("\n" + "="*70) +print("🎉 所有测试通过!NAS环境就绪,可以开始下载") +print("="*70)