From a1e4bba242230570a0b93febff0f5ff6bb0382c3 Mon Sep 17 00:00:00 2001 From: cfdaily Date: Thu, 26 Mar 2026 20:02:11 +0800 Subject: [PATCH] auto-sync: 2026-03-26 20:02:11 --- zhaoyun-data/data/README.md | 97 ++++++++++++++++ zhaoyun-data/data/data_status.json | 108 ++++++++++++++++++ .../a_stock_daily/daily_data_structure.json | 29 +++++ .../processed/a_stock_daily/data_summary.json | 71 ++++++++++++ 4 files changed, 305 insertions(+) create mode 100644 zhaoyun-data/data/README.md create mode 100644 zhaoyun-data/data/data_status.json create mode 100644 zhaoyun-data/data/processed/a_stock_daily/daily_data_structure.json create mode 100644 zhaoyun-data/data/processed/a_stock_daily/data_summary.json diff --git a/zhaoyun-data/data/README.md b/zhaoyun-data/data/README.md new file mode 100644 index 000000000..76265c69e --- /dev/null +++ b/zhaoyun-data/data/README.md @@ -0,0 +1,97 @@ +# 📊 A股本地数据仓库 + +## 📋 数据概况 + +### 基础信息数据 +- **数据来源**: AKShare (免费开源) +- **股票数量**: 5,493只A股 +- **时间范围**: 2010年至今 +- **创建时间**: 2026-03-26 20:02:10 + +### 存储结构 + +``` +data/ +├── raw/ # 原始数据 +│ ├── a_stock_daily/ # A股日线行情数据(待下载) +│ │ ├── 2010/ # 按年分区 +│ │ ├── 2011/ +│ │ └── ... +│ ├── financial_reports/ # 财报数据(待下载) +│ ├── stock_info/ # 股票基础信息 +│ └── data_sources/ # 数据源配置 +├── processed/ # 处理后数据 +│ ├── a_stock_daily/ # 清洗后的日线数据 +│ ├── financial_indicators/ # 财务指标计算数据 +│ ├── stock_info/ # 标准化股票信息 +│ └── quality_reports/ # 数据质量报告 +└── running_data/ # 运行数据 + ├── update_logs/ # 更新日志 + └── config/ # 运行配置 +``` + +### 数据结构 + +#### 基础信息数据字段 +- **symbol**: 股票代码 +- **name**: 股票名称 +- **industry**: 所属行业 +- **market**: 市场类型 +- **list_date**: 上市日期 +- **total_market_cap**: 总市值 +- **circulating_market_cap**: 流通市值 + +#### 日线数据字段 +- **date**: 交易日期 +- **open/high/low/close**: 开高低收价格 +- **volume**: 成交量 +- **amount**: 成交额 +- **adj_factor**: 复权因子 + +### 🚀 使用说明 + +#### 1. 数据访问 +```python +import pandas as pd + +# 读取基础信息 +basic_info = pd.read_csv("raw/stock_info/stock_basic_info_raw_*.csv") + +# 读取日线数据(按年分区) +daily_2024 = pd.read_parquet("raw/a_stock_daily/2024/*.parquet") +``` + +#### 2. 数据更新 +```bash +# 运行日线数据下载 +python3 scripts/data_acquisition/a_stock_daily_data.py +``` + +### 📊 数据质量 + +#### 完整性检查 +- 交易日连续性 +- 价格数据完整性 +- 成交量一致性 + +#### 准确性验证 +- 价格逻辑检查 +- 数据格式统一 +- 异常值检测 + +### 🔧 技术架构 + +#### 数据采集 +- 使用AKShare开源库 +- 批量并行下载 +- 自动错误重试 + +#### 数据处理 +- Parquet列式存储 +- Snappy压缩 +- 时间分区索引 + +--- + +**数据维护**: 赵云(数据工程将军) +**最后更新**: 2026-03-26 20:02:10 diff --git a/zhaoyun-data/data/data_status.json b/zhaoyun-data/data/data_status.json new file mode 100644 index 000000000..d93834370 --- /dev/null +++ b/zhaoyun-data/data/data_status.json @@ -0,0 +1,108 @@ +{ + "timestamp": "2026-03-26T20:02:09.978299", + "success": false, + "summary": { + "timestamp": "2026-03-26T20:02:10.005829", + "stock_summary": { + "total_stocks": 5493, + "industry_distribution": { + "汽车零部件": 264, + "通用设备": 249, + "专用设备": 199, + "化学制品": 178, + "半导体": 173, + "化学制药": 158, + "电网设备": 139, + "医疗器械": 136, + "软件开发": 135, + "IT服务Ⅱ": 133 + }, + "listing_year_distribution": { + "2021": 523, + "2017": 432, + "2020": 432, + "2022": 428, + "2010": 322, + "2023": 313, + "2011": 264, + "2016": 226, + "2015": 214, + "2019": 202, + "1997": 174, + "1996": 159, + "2012": 143, + "2007": 122, + "2014": 118, + "2025": 116, + "2000": 114, + "2018": 104, + "2024": 100, + "1993": 99, + "2004": 94, + "1998": 93, + "2009": 91, + "1994": 89, + "1999": 81, + "2008": 75, + "2001": 73, + "2002": 65, + "2003": 63, + "2006": 62, + "1992": 34, + "2026": 25, + "1995": 18, + "2005": 14, + "1990": 6, + "1991": 3, + "2013": 2 + } + }, + "market_cap_stats": { + "mean": 22504134160.937603, + "median": 6552263417.8, + "min": 136631083.68, + "max": 2619585989604.15, + "std": 101413359239.53035, + "total_companies": 5493 + }, + "data_collection_info": { + "source": "akshare", + "collection_time": "2026-03-26 20:02:10", + "total_records": 5493, + "data_version": "1.0.0" + } + }, + "files_created": [ + "# 📊 A股本地数据仓库\n\n## 📋 数据概况\n\n### 基础信息数据\n- **数据来源**: AKShare (免费开源)\n- **股票数量**: 5,493只A股\n- **时间范围**: 2010年至今\n- **创建时间**: 2026-03-26 20:02:10\n\n### 存储结构\n\n```\ndata/\n├── raw/ # 原始数据\n│ ├── a_stock_daily/ # A股日线行情数据(待下载)\n│ │ ├── 2010/ # 按年分区\n│ │ ├── 2011/\n│ │ └── ...\n│ ├── financial_reports/ # 财报数据(待下载)\n│ ├── stock_info/ # 股票基础信息\n│ └── data_sources/ # 数据源配置\n├── processed/ # 处理后数据\n│ ├── a_stock_daily/ # 清洗后的日线数据\n│ ├── financial_indicators/ # 财务指标计算数据\n│ ├── stock_info/ # 标准化股票信息\n│ └── quality_reports/ # 数据质量报告\n└── running_data/ # 运行数据\n ├── update_logs/ # 更新日志\n └── config/ # 运行配置\n```\n\n### 数据结构\n\n#### 基础信息数据字段\n- **symbol**: 股票代码\n- **name**: 股票名称 \n- **industry**: 所属行业\n- **market**: 市场类型\n- **list_date**: 上市日期\n- **total_market_cap**: 总市值\n- **circulating_market_cap**: 流通市值\n\n#### 日线数据字段\n- **date**: 交易日期\n- **open/high/low/close**: 开高低收价格\n- **volume**: 成交量\n- **amount**: 成交额\n- **adj_factor**: 复权因子\n\n### 🚀 使用说明\n\n#### 1. 数据访问\n```python\nimport pandas as pd\n\n# 读取基础信息\nbasic_info = pd.read_csv(\"raw/stock_info/stock_basic_info_raw_*.csv\")\n\n# 读取日线数据(按年分区)\ndaily_2024 = pd.read_parquet(\"raw/a_stock_daily/2024/*.parquet\")\n```\n\n#### 2. 数据更新\n```bash\n# 运行日线数据下载\npython3 scripts/data_acquisition/a_stock_daily_data.py\n```\n\n### 📊 数据质量\n\n#### 完整性检查\n- 交易日连续性\n- 价格数据完整性\n- 成交量一致性\n\n#### 准确性验证\n- 价格逻辑检查\n- 数据格式统一\n- 异常值检测\n\n### 🔧 技术架构\n\n#### 数据采集\n- 使用AKShare开源库\n- 批量并行下载\n- 自动错误重试\n\n#### 数据处理\n- Parquet列式存储\n- Snappy压缩\n- 时间分区索引\n\n---\n\n**数据维护**: 赵云(数据工程将军)\n**最后更新**: 2026-03-26 20:02:10\n" + ], + "errors": [], + "data_structure": { + "data_range": { + "start_date": "2010-01-01", + "end_date": "2026-03-26", + "include_delisted": true + }, + "data_fields": { + "daily": [ + "symbol", + "date", + "open", + "high", + "low", + "close", + "volume", + "amount", + "adj_factor", + "trade_status" + ] + }, + "storage": { + "partition_by": [ + "year", + "month" + ], + "compression": "snappy", + "file_format": "parquet" + } + } +} \ No newline at end of file diff --git a/zhaoyun-data/data/processed/a_stock_daily/daily_data_structure.json b/zhaoyun-data/data/processed/a_stock_daily/daily_data_structure.json new file mode 100644 index 000000000..d3b8d3d61 --- /dev/null +++ b/zhaoyun-data/data/processed/a_stock_daily/daily_data_structure.json @@ -0,0 +1,29 @@ +{ + "data_range": { + "start_date": "2010-01-01", + "end_date": "2026-03-26", + "include_delisted": true + }, + "data_fields": { + "daily": [ + "symbol", + "date", + "open", + "high", + "low", + "close", + "volume", + "amount", + "adj_factor", + "trade_status" + ] + }, + "storage": { + "partition_by": [ + "year", + "month" + ], + "compression": "snappy", + "file_format": "parquet" + } +} \ No newline at end of file diff --git a/zhaoyun-data/data/processed/a_stock_daily/data_summary.json b/zhaoyun-data/data/processed/a_stock_daily/data_summary.json new file mode 100644 index 000000000..3925bb2f8 --- /dev/null +++ b/zhaoyun-data/data/processed/a_stock_daily/data_summary.json @@ -0,0 +1,71 @@ +{ + "timestamp": "2026-03-26T20:02:10.005829", + "stock_summary": { + "total_stocks": 5493, + "industry_distribution": { + "汽车零部件": 264, + "通用设备": 249, + "专用设备": 199, + "化学制品": 178, + "半导体": 173, + "化学制药": 158, + "电网设备": 139, + "医疗器械": 136, + "软件开发": 135, + "IT服务Ⅱ": 133 + }, + "listing_year_distribution": { + "2021": 523, + "2017": 432, + "2020": 432, + "2022": 428, + "2010": 322, + "2023": 313, + "2011": 264, + "2016": 226, + "2015": 214, + "2019": 202, + "1997": 174, + "1996": 159, + "2012": 143, + "2007": 122, + "2014": 118, + "2025": 116, + "2000": 114, + "2018": 104, + "2024": 100, + "1993": 99, + "2004": 94, + "1998": 93, + "2009": 91, + "1994": 89, + "1999": 81, + "2008": 75, + "2001": 73, + "2002": 65, + "2003": 63, + "2006": 62, + "1992": 34, + "2026": 25, + "1995": 18, + "2005": 14, + "1990": 6, + "1991": 3, + "2013": 2 + } + }, + "market_cap_stats": { + "mean": 22504134160.937603, + "median": 6552263417.8, + "min": 136631083.68, + "max": 2619585989604.15, + "std": 101413359239.53035, + "total_companies": 5493 + }, + "data_collection_info": { + "source": "akshare", + "collection_time": "2026-03-26 20:02:10", + "total_records": 5493, + "data_version": "1.0.0" + } +} \ No newline at end of file