From 7f71b06a086900a251632b0df6cec42482f800ac Mon Sep 17 00:00:00 2001 From: cfdaily Date: Thu, 26 Mar 2026 11:29:53 +0800 Subject: [PATCH] auto-sync: 2026-03-26 11:29:53 --- .../data_acquisition/a_stock_daily_data.py | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 zhaoyun-data/scripts/data_acquisition/a_stock_daily_data.py diff --git a/zhaoyun-data/scripts/data_acquisition/a_stock_daily_data.py b/zhaoyun-data/scripts/data_acquisition/a_stock_daily_data.py new file mode 100644 index 000000000..d05546f3b --- /dev/null +++ b/zhaoyun-data/scripts/data_acquisition/a_stock_daily_data.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +""" +A股日线行情数据采集脚本 +获取全市场A股日线行情数据(2010年至今) +""" +import sys +import os +import time +import json +import pandas as pd +import numpy as np +from datetime import datetime, timedelta +from typing import List, Dict, Optional, Tuple +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed +import warnings + +warnings.filterwarnings('ignore') + +# 添加项目路径 +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from utils.data_utils import DataUtils +from utils.log_utils import LogUtils +from utils.progress_bar import ProgressBar + + +class AStockDailyDataCollector: + """A股日线行情数据采集器""" + + def __init__(self, config_path: str = None): + """初始化采集器 + + Args: + config_path: 配置文件路径 + """ + # 配置日志 + self.logger = LogUtils.setup_logger('a_stock_daily_data') + + # 加载配置 + self.config = self._load_config(config_path) + + # 基础路径 + self.base_dir = "/Users/chufeng/.openclaw/sanguo_projects/sanguo_quant_live/zhaoyun-data/data" + self.raw_dir = os.path.join(self.base_dir, "raw", "a_stock_daily") + self.processed_dir = os.path.join(self.base_dir, "processed", "a_stock_daily") + + # 确保目录存在 + os.makedirs(self.raw_dir, exist_ok=True) + os.makedirs(self.processed_dir, exist_ok=True) + + # 创建数据分区目录(按年) + + for year in range(2010, 2027): + year_dir = os.path.join(self.raw_dir, str(year)) + os.makedirs(year_dir, exist_ok=True) + + processed_year_dir = os.path.join(self.processed_dir, str(year)) + os.makedirs(processed_year_dir, exist_ok=True) + + self.logger.info("A股日线数据采集器初始化完成") + + def _load_config(self, config_path: Optional[str] = None) -> Dict: + """加载配置 + + Args: + config_path: 配置文件路径 + + Returns: + Dict: 配置信息 + """ + default_config = { + "data_range": { + "start_date": "2010-01-01", + "end_date": datetime.now().strftime('%Y-%m-%d'), + "include_delisted": True + }, + "data_fields": { + "daily": [ + "symbol", # 股票代码 + "date", # 交易日期 + "open", # 开盘价 + "high", # 最高价 + "low", # 最低价 + "close", # 收盘价 + "volume", # 成交量 + "amount", # 成交额 + "adj_factor", # 复权因子 + "trade_status" # 交易状态 + ] + }, + "collection": { + "batch_size": 50, + "max_workers": 10, + "retry_attempts": 3, + "chunk_size": 500, + "delay_between_requests": 0.5 + }, + "storage": { + "partition_by": ["year", "month"], + "compression": "snappy", + "file_format": "parquet" + } + } + + if config_path and os.path.exists(config_path): + try: + with open(config_path, 'r', encoding='utf-8') as f: + user_config = json.load(f) + default_config.update(user_config) + self.logger.info(f"从 {config_path} 加载用户配置") + except Exception as e: + self.logger.warning(f"加载用户配置文件失败: {e}") + + return default_config + + def collect_stock_list(self) -> List[str]: + """获取A股股票代码列表 + + Returns: + List[str]: 股票代码列表 + """ + self.logger.info("开始获取A股股票代码列表") + + try: + # 使用AKShare获取A股列表 + import akshare as ak + + self.logger.info("正在获取A股列表...") + + stock_list_df = ak.stock_info_a_code_name() + + if stock_list_df is not None and not stock_list_df.empty: + stock_codes = stock_list_df['code'].tolist() + self.logger.info(f"成功获取 {len(stock_codes)} 只A股代码") + return stock_codes + else: + self.logger.warning("未获取到A股列表") + return [] + + except ImportError: + self.logger.error("AKShare未安装,请安装: pip install akshare") + return [] + except Exception as e: + self.logger.error(f"获取股票列表失败: {e}") + return [] + + def collect_daily \ No newline at end of file