add: 关羽完成已调研结果和实时风控系统上传

This commit is contained in:
cfdaily
2026-03-21 20:24:43 +08:00
parent e43c7a5214
commit f549b28ac7
7 changed files with 1454 additions and 0 deletions
@@ -0,0 +1,157 @@
#!/usr/bin/env bash
# =============================================================================
# sanguo_vnpy 自动化部署流水线脚本
# 版本: v1.0
# 作者: 姜维(后勤总督)
# =============================================================================
set -e
echo "========================================================================"
echo "🚀 sanguo_vnpy 自动化部署流水线"
echo "========================================================================"
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
print_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 获取当前时间
get_timestamp() {
date +"%Y%m%d_%H%M%S"
}
# 配置变量
PROJECT_DIR="/Users/chufeng/.openclaw/agents/main/workspace/projects/sanguo_vnpy"
DEPLOY_ENV="${1:-production}"
TIMESTAMP=$(get_timestamp)
echo ""
print_info "部署环境: $DEPLOY_ENV"
print_info "时间戳: $TIMESTAMP"
echo ""
# 步骤 1: 代码构建
print_info "步骤 1: 代码构建..."
cd "$PROJECT_DIR"
# 检查虚拟环境
if [ ! -d "venv" ]; then
print_warning "虚拟环境不存在,正在创建..."
python3 -m venv venv
fi
# 激活虚拟环境
print_info "激活虚拟环境..."
source venv/bin/activate
# 升级依赖
print_info "升级项目依赖..."
pip install --upgrade pip wheel
pip install -e ".[alpha,dev]"
print_success "代码构建完成"
# 步骤 2: 代码检查
print_info "步骤 2: 代码质量检查..."
# 运行代码检查
if command -v ruff &> /dev/null; then
print_info "运行 Ruff 代码检查..."
ruff check sanguo/
print_success "代码检查通过"
else
print_warning "Ruff 未安装,跳过代码检查"
fi
# 步骤 3: 运行测试
print_info "步骤 3: 运行测试..."
if [ -d "tests" ]; then
if command -v pytest &> /dev/null; then
print_info "运行测试..."
pytest tests/ -v
print_success "测试通过"
else
print_warning "pytest 未安装,跳过测试"
fi
else
print_warning "测试目录不存在,跳过测试"
fi
# 步骤 4: 构建部署包
print_info "步骤 4: 构建部署包..."
# 创建部署目录
DEPLOY_DIR="/tmp/sanguo_vnpy_deploy_$TIMESTAMP"
mkdir -p "$DEPLOY_DIR"
# 复制代码
print_info "复制项目文件..."
cp -r sanguo/ "$DEPLOY_DIR/"
cp -r vnpy/ "$DEPLOY_DIR/" 2>/dev/null || true
cp pyproject.toml "$DEPLOY_DIR/"
cp README.md "$DEPLOY_DIR/"
# 构建 wheel 包
print_info "构建 wheel 包..."
pip install build
python -m build --wheel --outdir "$DEPLOY_DIR/"
print_success "部署包构建完成: $DEPLOY_DIR"
# 步骤 5: 部署到目标环境
print_info "步骤 5: 部署到目标环境..."
if [ "$DEPLOY_ENV" = "production" ]; then
print_info "生产环境部署..."
# 这里可以添加生产环境部署逻辑
# 例如: 上传到阿里云 OSS, SSH 到服务器部署等
print_warning "生产环境部署需要配置阿里云凭证"
elif [ "$DEPLOY_ENV" = "testing" ]; then
print_info "测试环境部署..."
print_success "测试环境部署完成"
else
print_info "本地开发环境部署..."
print_success "本地部署完成"
fi
# 步骤 6: 验证部署
print_info "步骤 6: 验证部署..."
# 验证模块导入
print_info "验证模块导入..."
python -c "import sanguo; import vnpy; print('✅ 模块导入成功')"
print_success "部署验证通过"
echo ""
echo "========================================================================"
echo "🎉 部署完成!"
echo "========================================================================"
echo "部署时间: $(date)"
echo "部署环境: $DEPLOY_ENV"
echo "部署包: $DEPLOY_DIR"
echo "========================================================================"
echo ""
@@ -0,0 +1,219 @@
# =============================================================================
# sanguo_vnpy 阿里云生产环境 Terraform 配置
# 版本: v1.0
# 作者: 姜维(后勤总督)
# =============================================================================
terraform {
required_providers {
alicloud = {
source = "aliyun/alicloud"
version = ">= 1.212.0"
}
}
}
provider "alicloud" {
access_key = var.alicloud_access_key
secret_key = var.alicloud_secret_key
region = var.alicloud_region
}
# =============================================================================
# 变量定义
# =============================================================================
variable "alicloud_access_key" {
description = "阿里云 Access Key"
type = string
sensitive = true
}
variable "alicloud_secret_key" {
description = "阿里云 Secret Key"
type = string
sensitive = true
}
variable "alicloud_region" {
description = "阿里云区域"
type = string
default = "cn-hangzhou"
}
variable "environment" {
description = "环境类型: production/testing/development"
type = string
default = "production"
}
variable "instance_type" {
description = "ECS 实例规格"
type = string
default = "ecs.c6.large"
}
variable "vpc_cidr" {
description = "VPC CIDR 块"
type = string
default = "10.0.0.0/16"
}
variable "vswitch_cidr" {
description = "虚拟交换机 CIDR 块"
type = string
default = "10.0.0.0/24"
}
# =============================================================================
# VPC 网络
# =============================================================================
resource "alicloud_vpc" "sanguo_vpc" {
vpc_name = "sanguo-vnpy-${var.environment}-vpc"
cidr_block = var.vpc_cidr
}
resource "alicloud_vswitch" "sanguo_vswitch" {
vswitch_name = "sanguo-vnpy-${var.environment}-vswitch"
vpc_id = alicloud_vpc.sanguo_vpc.id
cidr_block = var.vswitch_cidr
zone_id = "${var.alicloud_region}-a"
}
# =============================================================================
# 安全组
# =============================================================================
resource "alicloud_security_group" "sanguo_sg" {
name = "sanguo-vnpy-${var.environment}-sg"
description = "sanguo_vnpy ${var.environment} 安全组"
vpc_id = alicloud_vpc.sanguo_vpc.id
}
resource "alicloud_security_group_rule" "allow_ssh" {
type = "ingress"
ip_protocol = "tcp"
nic_type = "intranet"
policy = "accept"
port_range = "22/22"
priority = 1
security_group_id = alicloud_security_group.sanguo_sg.id
cidr_ip = "0.0.0.0/0"
}
resource "alicloud_security_group_rule" "allow_http" {
type = "ingress"
ip_protocol = "tcp"
nic_type = "intranet"
policy = "accept"
port_range = "80/80"
priority = 2
security_group_id = alicloud_security_group.sanguo_sg.id
cidr_ip = "0.0.0.0/0"
}
resource "alicloud_security_group_rule" "allow_vnpy" {
type = "ingress"
ip_protocol = "tcp"
nic_type = "intranet"
policy = "accept"
port_range = "8080/8080"
priority = 3
security_group_id = alicloud_security_group.sanguo_sg.id
cidr_ip = "0.0.0.0/0"
}
# =============================================================================
# ECS 实例
# =============================================================================
resource "alicloud_instance" "sanguo_ecs" {
instance_name = "sanguo-vnpy-${var.environment}-ecs"
availability_zone = "${var.alicloud_region}-a"
instance_type = var.instance_type
security_groups = [alicloud_security_group.sanguo_sg.id]
vswitch_id = alicloud_vswitch.sanguo_vswitch.id
internet_charge_type = "PayByTraffic"
internet_max_bandwidth_out = 100
system_disk_size = 40
system_disk_category = "cloud_efficiency"
image_id = "ubuntu_22_04_x64_20G_alibase_20240228.vhd"
password = var.ecs_password
instance_charge_type = "PostPaid"
}
variable "ecs_password" {
description = "ECS 实例密码"
type = string
sensitive = true
default = "Sanguo@2024!"
}
# =============================================================================
# OSS 对象存储
# =============================================================================
resource "alicloud_oss_bucket" "sanguo_oss" {
bucket = "sanguo-vnpy-${var.environment}-data"
acl = "private"
}
# =============================================================================
# RDS 数据库(可选)
# =============================================================================
resource "alicloud_db_instance" "sanguo_rds" {
count = var.enable_rds ? 1 : 0
engine = "MySQL"
engine_version = "8.0"
instance_type = "rds.mysql.s2.large"
instance_storage = 20
vswitch_id = alicloud_vswitch.sanguo_vswitch.id
security_ips = ["0.0.0.0/0"]
db_instance_name = "sanguo-vnpy-${var.environment}-rds"
}
variable "enable_rds" {
description = "是否启用 RDS 数据库"
type = bool
default = false
}
# =============================================================================
# 输出
# =============================================================================
output "vpc_id" {
description = "VPC ID"
value = alicloud_vpc.sanguo_vpc.id
}
output "ecs_public_ip" {
description = "ECS 公网 IP"
value = alicloud_instance.sanguo_ecs.public_ip
}
output "ecs_private_ip" {
description = "ECS 私网 IP"
value = alicloud_instance.sanguo_ecs.private_ip
}
output "oss_bucket_name" {
description = "OSS 存储桶名称"
value = alicloud_oss_bucket.sanguo_oss.bucket
}
output "ecs_ssh_command" {
description = "SSH 连接命令"
value = "ssh root@${alicloud_instance.sanguo_ecs.public_ip}"
}
output "vnpy_web_url" {
description = "vn.py Web 界面地址"
value = "http://${alicloud_instance.sanguo_ecs.public_ip}:8080"
}
@@ -0,0 +1,335 @@
# sanguo_vnpy 阿里云生产环境应急响应方案
**版本**: v1.0
**作者**: 姜维(后勤总督)
**日期**: 2026-03-21
---
## 🚨 应急响应原则
### 1. 快速响应原则
- **5分钟内**:发现问题并启动响应流程
- **15分钟内**:完成初步诊断和影响评估
- **30分钟内**:确定并执行恢复方案
### 2. 优先级原则
- **P0(严重)**:系统完全不可用,数据丢失风险
- **P1(高)**:核心功能异常,影响主要交易
- **P2(中)**:次要功能异常,不影响核心交易
- **P3(低)**:轻微问题,用户体验影响
### 3. 数据安全原则
- **先备份,后操作**:任何修复操作前先备份数据
- **日志优先**:优先保存和分析日志,避免二次故障
- **最小化变更**:使用最小必要的操作修复问题
---
## 🔍 问题诊断流程
### 1. 监控告警触发
#### 告警来源
1. **系统监控**Prometheus + Grafana
2. **应用监控**sanguo_vnpy 内部健康检查
3. **业务监控**:策略执行异常告警
4. **用户反馈**:用户上报的问题
#### 告警级别对应
| 告警类型 | 影响 | 响应级别 |
|---------|------|---------|
| 实例宕机 | 系统不可用 | P0 |
| CPU > 90% 5分钟 | 性能下降 | P1 |
| 内存 > 90% 5分钟 | 可能OOM | P1 |
| 磁盘 > 95% | 数据写入失败 | P0 |
| vn.py 进程消失 | 应用不可用 | P0 |
| 策略执行失败 | 业务影响 | P1 |
### 2. 快速诊断步骤
#### 步骤 1: 检查系统状态
```bash
# 1. 检查服务器是否在线
ping <server-ip>
# 2. SSH 登录(如果可能)
ssh root@<server-ip>
# 3. 检查系统资源
top
htop
df -h
free -m
# 4. 检查网络
ping 8.8.8.8
curl -I https://www.aliyun.com
```
#### 步骤 2: 检查服务状态
```bash
# 1. 检查 vn.py 进程
ps aux | grep -i vnpy
ps aux | grep -i python
# 2. 检查端口监听
netstat -tlnp | grep 8080
ss -tlnp | grep 8080
# 3. 检查监控服务
systemctl status prometheus
systemctl status node_exporter
systemctl status grafana-server
```
#### 步骤 3: 检查日志
```bash
# 1. 系统日志
tail -100 /var/log/syslog
tail -100 /var/log/messages
# 2. 应用日志
tail -100 /path/to/sanguo_vnpy/logs/app.log
tail -100 /path/to/sanguo_vnpy/logs/error.log
# 3. 云服务商日志
# 阿里云控制台查看云服务器监控和事件
```
---
## 🔧 常见问题应急处理
### 场景 1: 实例完全宕机(P0)
#### 现象
- Ping 无响应
- SSH 无法连接
- 监控显示实例离线
#### 应急处理
1. **立即备份**(如果还能访问)
```bash
# 尝试通过阿里云控制台创建快照
# 快照命名: sanguo-vnpy-$(date +%Y%m%d-%H%M)-emergency
```
2. **重启实例**
- 阿里云控制台 → 实例 → 重启
- 等待 2-5 分钟
3. **如果重启失败**
- 使用可用快照回滚
- 或从备份数据重建实例
4. **验证恢复**
```bash
# 检查服务是否恢复
ssh root@<server-ip>
ps aux | grep vnpy
curl http://localhost:8080/health
```
---
### 场景 2: vn.py 进程崩溃(P0
#### 现象
- 实例在线,但 vn.py 进程消失
- 端口 8080 无响应
- 应用监控告警
#### 应急处理
1. **保存崩溃现场**
```bash
# 保存系统状态
dmesg > /tmp/dmesg-$(date +%Y%m%d-%H%M).log
vmstat 1 5 > /tmp/vmstat-$(date +%Y%m%d-%H%M).log
# 保存应用日志
cp -r /path/to/sanguo_vnpy/logs /tmp/logs-backup-$(date +%Y%m%d-%H%M)
```
2. **检查崩溃原因**
```bash
# 检查系统日志中的 OOM
grep -i "out of memory" /var/log/syslog
grep -i "killed process" /var/log/syslog
# 检查应用错误日志
tail -200 /path/to/sanguo_vnpy/logs/error.log
```
3. **快速重启服务**
```bash
# 进入项目目录
cd /path/to/sanguo_vnpy
# 激活虚拟环境
source venv/bin/activate
# 启动 vn.py
python -m vnpy &
# 或者使用服务管理
systemctl start sanguo-vnpy
```
4. **验证恢复**
```bash
# 检查进程
ps aux | grep vnpy
# 检查端口
curl http://localhost:8080/health
# 检查监控
# 确认 Prometheus 数据恢复
```
---
### 场景 3: 磁盘空间满(P0)
#### 现象
- 磁盘使用率 > 95%
- 数据写入失败
- 应用无法保存数据
#### 应急处理
1. **立即清理临时文件**
```bash
# 清理系统临时文件
rm -rf /tmp/*
# 清理应用缓存
rm -rf /path/to/sanguo_vnpy/cache/*
# 清理旧日志(保留最近7天)
find /path/to/sanguo_vnpy/logs -name "*.log" -mtime +7 -delete
```
2. **检查大文件**
```bash
# 查找大于 100MB 的文件
find / -type f -size +100M -exec ls -lh {} \;
# 检查数据目录
du -sh /path/to/sanguo_vnpy/data
du -sh /path/to/sanguo_vnpy/results
```
3. **扩容磁盘(如果需要)**
- 阿里云控制台 → 云盘 → 扩容
- 或挂载新的数据盘
4. **验证恢复**
```bash
df -h
# 确认使用率下降到安全范围(< 80%)
```
---
### 场景 4: 数据库连接失败(P1)
#### 现象
- 应用报错无法连接数据库
- 策略无法获取数据
- 回测无法执行
#### 应急处理
1. **检查数据库状态**
```bash
# 如果使用 RDS
# 阿里云控制台检查 RDS 状态
# 如果使用本地 SQLite
ls -lh /path/to/sanguo_vnpy/data/*.db
# 检查文件权限和完整性
```
2. **网络连接测试**
```bash
# 测试数据库连接
telnet <db-host> <db-port>
nc -zv <db-host> <db-port>
# 检查安全组
# 确认应用服务器 IP 在数据库白名单中
```
3. **快速恢复方案**
```bash
# 方案 A: 切换到本地缓存数据
# 修改配置使用 akshare 直接获取
# 方案 B: 从备份恢复数据库
# 恢复最近的数据库备份
# 方案 C: 重启数据库服务
# 如果是自建数据库,重启服务
```
---
## 📋 应急响应检查清单
### 响应前检查
- [ ] 确认告警级别和影响范围
- [ ] 通知相关人员(诸葛亮/主公)
- [ ] 保存当前系统状态和日志
- [ ] 准备回滚方案
### 响应中检查
- [ ] 执行诊断步骤,确定根因
- [ ] 执行最小必要的修复操作
- [ ] 持续监控系统状态
- [ ] 记录所有操作和时间点
### 响应后检查
- [ ] 验证核心功能恢复正常
- [ ] 验证监控数据正常
- [ ] 验证业务数据完整性
- [ ] 总结故障原因和改进措施
---
## 📞 联络清单
### 紧急联络
- **总军师(诸葛亮)**:负责决策和协调
- **主公**:最终决策和资源协调
- **赵云**:数据库和数据相关问题
- **关羽**:回测引擎和风控问题
- **张飞**API 兼容层问题
- **司马懿**:安全和合规问题
### 阿里云支持
- **阿里云控制台**https://home.console.aliyun.com
- **阿里云工单**:紧急问题提交工单
- **阿里云电话**400-910-0100
---
## 🔄 事后复盘
### 复盘会议
- **时间**:故障恢复后 24 小时内
- **参与人**:所有相关人员
- **内容**
1. 故障回顾和时间线
2. 根因分析
3. 响应流程评估
4. 改进措施讨论
### 改进措施
- 技术改进:防止同类故障再次发生
- 流程改进:优化响应流程
- 监控改进:完善告警和监控
- 文档改进:更新应急方案
---
**本应急方案会持续更新,确保生产环境安全稳定运行!** 🚛
@@ -0,0 +1,211 @@
#!/usr/bin/env bash
# =============================================================================
# sanguo_vnpy 阿里云生产环境监控系统部署脚本
# 版本: v1.0
# 作者: 姜维(后勤总督)
# =============================================================================
set -e
echo "========================================================================"
echo "🚀 sanguo_vnpy 监控系统部署"
echo "========================================================================"
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
print_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 步骤 1: 安装系统依赖
print_info "步骤 1: 安装系统依赖..."
if command -v apt-get &> /dev/null; then
sudo apt-get update
sudo apt-get install -y prometheus node_exporter grafana nginx
elif command -v yum &> /dev/null; then
sudo yum install -y epel-release
sudo yum install -y prometheus node_exporter grafana nginx
else
print_error "不支持的操作系统"
exit 1
fi
print_success "系统依赖安装完成"
# 步骤 2: 配置 Prometheus
print_info "步骤 2: 配置 Prometheus..."
sudo mkdir -p /etc/prometheus
sudo cat > /etc/prometheus/prometheus.yml << 'EOF'
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node_exporter'
static_configs:
- targets: ['localhost:9100']
- job_name: 'sanguo_vnpy'
static_configs:
- targets: ['localhost:8080']
metrics_path: '/metrics'
alerting:
alertmanagers:
- static_configs:
- targets: ['localhost:9093']
rule_files:
- "/etc/prometheus/alerts.yml"
EOF
# 步骤 3: 配置告警规则
print_info "步骤 3: 配置告警规则..."
sudo cat > /etc/prometheus/alerts.yml << 'EOF'
groups:
- name: sanguo_vnpy_alerts
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: critical
annotations:
summary: "实例 {{ $labels.instance }} 已宕机"
description: "{{ $labels.job }} 实例 {{ $labels.instance }} 已宕机超过 5 分钟"
- alert: HighCPUUsage
expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} CPU 使用率过高"
description: "{{ $labels.instance }} CPU 使用率超过 80%"
- alert: HighMemoryUsage
expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "实例 {{ $labels.instance }} 内存使用率过高"
description: "{{ $labels.instance }} 内存使用率超过 80%"
- alert: DiskSpaceLow
expr: (1 - (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"})) * 100 > 90
for: 5m
labels:
severity: critical
annotations:
summary: "实例 {{ $labels.instance }} 磁盘空间不足"
description: "{{ $labels.instance }} 磁盘使用率超过 90%"
- alert: VnpyDown
expr: up{job="sanguo_vnpy"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "sanguo_vnpy 服务已宕机"
description: "sanguo_vnpy 服务已宕机超过 2 分钟,请立即检查!"
EOF
# 步骤 4: 配置 Grafana
print_info "步骤 4: 配置 Grafana..."
sudo mkdir -p /etc/grafana/provisioning/datasources
sudo cat > /etc/grafana/provisioning/datasources/prometheus.yml << 'EOF'
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://localhost:9090
isDefault: true
editable: true
EOF
# 步骤 5: 启动服务
print_info "步骤 5: 启动监控服务..."
sudo systemctl enable prometheus
sudo systemctl enable node_exporter
sudo systemctl enable grafana-server
sudo systemctl start prometheus
sudo systemctl start node_exporter
sudo systemctl start grafana-server
# 步骤 6: 配置 Nginx 反向代理
print_info "步骤 6: 配置 Nginx 反向代理..."
sudo cat > /etc/nginx/sites-available/sanguo-monitoring << 'EOF'
server {
listen 80;
server_name _;
location /grafana/ {
proxy_pass http://localhost:3000/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
location /prometheus/ {
proxy_pass http://localhost:9090/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
location /node-exporter/ {
proxy_pass http://localhost:9100/;
proxy_set_header Host $host;
}
}
EOF
sudo ln -sf /etc/nginx/sites-available/sanguo-monitoring /etc/nginx/sites-enabled/
sudo nginx -t
sudo systemctl reload nginx
# 步骤 7: 显示访问信息
print_success "监控系统部署完成!"
echo ""
echo "========================================================================"
echo "📊 监控系统访问信息"
echo "========================================================================"
echo "Grafana: http://$(hostname -I | awk '{print $1}'):3000"
echo "Prometheus: http://$(hostname -I | awk '{print $1}'):9090"
echo "Node Exporter: http://$(hostname -I | awk '{print $1}'):9100"
echo ""
echo "默认 Grafana 账号: admin / admin"
echo "========================================================================"
echo ""
@@ -0,0 +1,166 @@
"""
高频算法性能基准测试
测试不同实现方式的性能
"""
import time
import numpy as np
import pandas as pd
from datetime import datetime
from high_frequency_signal import (
TechnicalFactorCalculator,
PerformanceMonitor,
BarData,
)
def generate_test_data(n: int = 10000) -> pd.DataFrame:
"""生成随机测试数据"""
np.random.seed(42)
# 生成随机游走价格
returns = np.random.normal(0.0001, 0.01, n)
price = 100 * np.exp(np.cumsum(returns))
# 生成OHLCV
high = price * (1 + np.random.uniform(0, 0.01, n))
low = price * (1 - np.random.uniform(0, 0.01, n))
open_ = price * (1 + np.random.normal(0, 0.005, n))
volume = np.random.randint(100000, 10000000, n)
df = pd.DataFrame({
'open': open_,
'high': high,
'low': low,
'close': price,
'volume': volume,
})
return df
def benchmark_technical_calculator():
"""测试技术因子计算器性能"""
print("=" * 60)
print("技术因子计算器性能基准测试")
print("=" * 60)
calc = TechnicalFactorCalculator()
monitor = PerformanceMonitor()
# 测试不同数据大小
for size in [100, 1000, 10000]:
df = generate_test_data(size)
close = df['close'].values
high = df['high'].values
low = df['low'].values
volume = df['volume'].values
n_runs = 1000 if size <= 1000 else 100
# 测试MA
start_time = time.time()
for _ in range(n_runs):
_ = calc.calculate_ma(close, 20)
elapsed = (time.time() - start_time) * 1000
print(f"MA {size} points x{n_runs} runs: {elapsed:.2f} ms "
f"({elapsed/n_runs:.3f} ms/run)")
# 测试MACD
start_time = time.time()
for _ in range(n_runs):
_ = calc.calculate_macd(close, 12, 26, 9)
elapsed = (time.time() - start_time) * 1000
print(f"MACD {size} points x{n_runs} runs: {elapsed:.2f} ms "
f"({elapsed/n_runs:.3f} ms/run)")
# 测试RSI
start_time = time.time()
for _ in range(n_runs):
_ = calc.calculate_rsi(close, 14)
elapsed = (time.time() - start_time) * 1000
print(f"RSI {size} points x{n_runs} runs: {elapsed:.2f} ms "
f"({elapsed/n_runs:.3f} ms/run)")
# 测试布林带
start_time = time.time()
for _ in range(n_runs):
_ = calc.calculate_bollinger_bands(close, 20, 2.0)
elapsed = (time.time() - start_time) * 1000
print(f"Bollinger {size} points x{n_runs} runs: {elapsed:.2f} ms "
f"({elapsed/n_runs:.3f} ms/run)")
# 测试ATR
start_time = time.time()
for _ in range(n_runs):
_ = calc.calculate_atr(high, low, close, 14)
elapsed = (time.time() - start_time) * 1000
print(f"ATR {size} points x{n_runs} runs: {elapsed:.2f} ms "
f"({elapsed/n_runs:.3f} ms/run)")
print()
def benchmark_full_pipeline():
"""测试完整信号生成流水线"""
print("=" * 60)
print("完整信号生成流水线性能基准测试")
print("=" * 60)
from high_frequency_signal import HighFrequencySignalGenerator
calc = TechnicalFactorCalculator()
generator = HighFrequencySignalGenerator()
monitor = PerformanceMonitor()
# 生成测试数据
n_days = 252 * 5
df = generate_test_data(n_days)
start_time = time.time()
# 模拟逐日处理
signals = []
for i in range(20, len(df)):
data = df.iloc[:i+1]
bar = BarData(
code="TEST",
datetime=datetime.now(),
open=data['open'].iloc[-1],
high=data['high'].iloc[-1],
low=data['low'].iloc[-1],
close=data['close'].iloc[-1],
volume=data['volume'].iloc[-1],
amount=data['volume'].iloc[-1] * data['close'].iloc[-1]
)
monitor.on_process_start()
signal = generator.generate_signal_from_bar(bar, data)
monitor.on_process_end()
if signal:
signals.append(signal)
elapsed = (time.time() - start_time) * 1000
print(f"完整回测 {n_days} 根K线,生成 {len(signals)} 个信号")
print(f"总耗时: {elapsed:.2f} ms")
print(f"平均每根K线: {elapsed/n_days:.3f} ms")
print()
monitor.print_stats()
stats = generator.get_performance_stats()
print(f"\n信号统计: {stats}")
def main():
"""主函数"""
benchmark_technical_calculator()
print()
benchmark_full_pipeline()
print()
print("=" * 60)
print("基准测试完成")
print("=" * 60)
if __name__ == '__main__':
main()
@@ -0,0 +1,366 @@
"""
高频交易信号生成器
支持tick级和分钟级数据处理
低延迟信号生成
支持向量化计算
"""
from typing import List, Dict, Tuple, Optional, Union
import numpy as np
import pandas as pd
from dataclasses import dataclass
from datetime import datetime, timedelta
import logging
logger = logging.getLogger(__name__)
@dataclass
class TickData:
"""tick数据结构"""
code: str
datetime: datetime
price: float
volume: int
amount: float
direction: int = 0 # 1买-1卖0平
@dataclass
class BarData:
"""K线数据结构"""
code: str
datetime: datetime
open: float
high: float
low: float
close: float
volume: float
amount: float
@dataclass
class Signal:
"""交易信号结构"""
code: str
datetime: datetime
direction: int # 1多-1空0无
strength: float # 信号强度 0-1
price: float # 信号价格
expected_price: float # 预期目标价
class HighFrequencyDataPipeline:
"""高频数据流处理管道"""
def __init__(self, window_size: int = 100):
self.window_size = window_size
self._buffer: Dict[str, List[TickData]] = {}
self._last_bar: Dict[str, Optional[BarData]] = {}
self._tick_count = 0
def on_tick(self, tick: TickData) -> Optional[Signal]:
"""处理新tick,可能生成信号"""
if tick.code not in self._buffer:
self._buffer[tick.code] = []
buffer = self._buffer[tick.code]
buffer.append(tick)
# 保持窗口大小
if len(buffer) > self.window_size:
buffer.pop(0)
self._tick_count += 1
# 这里可以触发滚动计算
return None
def get_tick_buffer(self, code: str) -> List[TickData]:
"""获取tick缓冲"""
return self._buffer.get(code, [])
def statistics(self) -> Dict:
"""获取统计信息"""
return {
'total_ticks': self._tick_count,
'total_codes': len(self._buffer),
'buffer_size': sum(len(b) for b in self._buffer.values())
}
class TechnicalFactorCalculator:
"""技术因子计算器 - 向量化实现"""
def __init__(self):
self.cache: Dict[str, pd.DataFrame] = {}
def calculate_ma(self, prices: np.ndarray, period: int) -> np.ndarray:
"""计算移动平均线"""
return pd.Series(prices).rolling(period).mean().values
def calculate_ema(self, prices: np.ndarray, period: int) -> np.ndarray:
"""计算指数移动平均线"""
return pd.Series(prices).ewm(span=period, adjust=False).mean().values
def calculate_macd(self, prices: np.ndarray,
fast_period: int = 12,
slow_period: int = 26,
signal_period: int = 9) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""计算MACD"""
ema_fast = self.calculate_ema(prices, fast_period)
ema_slow = self.calculate_ema(prices, slow_period)
dif = ema_fast - ema_slow
dea = self.calculate_ema(dif, signal_period)
macd = 2 * (dif - dea)
return dif, dea, macd
def calculate_bollinger_bands(self, prices: np.ndarray,
period: int = 20,
num_std: float = 2.0) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""计算布林带"""
middle = pd.Series(prices).rolling(period).mean().values
std = pd.Series(prices).rolling(period).std().values
upper = middle + num_std * std
lower = middle - num_std * std
return upper, middle, lower
def calculate_rsi(self, prices: np.ndarray, period: int = 14) -> np.ndarray:
"""计算RSI相对强弱指数"""
delta = np.diff(prices)
gain = np.where(delta > 0, delta, 0)
loss = np.where(delta < 0, -delta, 0)
avg_gain = np.zeros_like(prices)
avg_loss = np.zeros_like(prices)
# 第一个period没有数据
avg_gain[period] = np.mean(gain[:period])
avg_loss[period] = np.mean(loss[:period])
# Wilder's smoothing
for i in range(period + 1, len(prices)):
avg_gain[i] = (avg_gain[i-1] * (period - 1) + gain[i-1]) / period
avg_loss[i] = (avg_loss[i-1] * (period - 1) + loss[i-1]) / period
rs = avg_gain / (avg_loss + 1e-10)
rsi = 100 - (100 / (1 + rs))
return rsi
def calculate_atr(self, high: np.ndarray, low: np.ndarray,
close: np.ndarray, period: int = 14) -> np.ndarray:
"""计算ATR平均真实波幅"""
tr = np.zeros_like(high)
tr[0] = high[0] - low[0]
for i in range(1, len(high)):
tr1 = high[i] - low[i]
tr2 = abs(high[i] - close[i-1])
tr3 = abs(low[i] - close[i-1])
tr[i] = max(tr1, tr2, tr3)
atr = pd.Series(tr).rolling(period).mean().values
return atr
def calculate_volume_ma(self, volumes: np.ndarray, period: int) -> np.ndarray:
"""计算成交量移动平均"""
return pd.Series(volumes).rolling(period).mean().values
def calculate_obv(self, close: np.ndarray, volume: np.ndarray) -> np.ndarray:
"""计算OBV能量潮"""
obv = np.zeros_like(close)
obv[0] = volume[0]
for i in range(1, len(close)):
if close[i] > close[i-1]:
obv[i] = obv[i-1] + volume[i]
elif close[i] < close[i-1]:
obv[i] = obv[i-1] - volume[i]
else:
obv[i] = obv[i-1]
return obv
class HighFrequencySignalGenerator:
"""高频交易信号生成器
支持多种技术因子组合生成交易信号
"""
def __init__(self):
self.factor_calculator = TechnicalFactorCalculator()
self.data_pipeline = HighFrequencyDataPipeline()
self.signal_history: List[Signal] = []
self.performance_stats = {
'total_signals': 0,
'long_signals': 0,
'short_signals': 0,
}
def generate_signal_from_bar(self, bar: BarData,
data: pd.DataFrame) -> Optional[Signal]:
"""从K线数据生成信号"""
# 计算各种技术因子
close = data['close'].values
high = data['high'].values
low = data['low'].values
volume = data['volume'].values
# MACD
dif, dea, macd = self.factor_calculator.calculate_macd(close)
# RSI
rsi = self.factor_calculator.calculate_rsi(close)
# 布林带
bb_upper, bb_mid, bb_lower = self.factor_calculator.calculate_bollinger_bands(close)
# ATR
atr = self.factor_calculator.calculate_atr(high, low, close, 14)
# 生成信号逻辑
current_rsi = rsi[-1]
current_macd = macd[-1]
current_dif = dif[-1]
current_dea = dea[-1]
current_price = close[-1]
# MACD金叉买入
if current_dif > current_dea and (dif[-2] <= dea[-2]) and current_rsi < 70:
signal = Signal(
code=bar.code,
datetime=bar.datetime,
direction=1,
strength=min((70 - current_rsi) / 70 + (current_dif - current_dea) / 1, 1.0),
price=current_price,
expected_price=current_price + 2 * atr[-1]
)
self._record_signal(signal)
return signal
# MACD死叉卖出
elif current_dif < current_dea and (dif[-2] >= dea[-2]) and current_rsi > 30:
signal = Signal(
code=bar.code,
datetime=bar.datetime,
direction=-1,
strength=min((current_rsi - 30) / 30 + (current_dea - current_dif) / 1, 1.0),
price=current_price,
expected_price=current_price - 2 * atr[-1]
)
self._record_signal(signal)
return signal
# 布林带突破
elif current_price < bb_lower[-1] and current_rsi < 30:
signal = Signal(
code=bar.code,
datetime=bar.datetime,
direction=1,
strength=(bb_lower[-1] - current_price) / (bb_lower[-1] * 0.05),
price=current_price,
expected_price=bb_mid[-1]
)
self._record_signal(signal)
return signal
elif current_price > bb_upper[-1] and current_rsi > 70:
signal = Signal(
code=bar.code,
datetime=bar.datetime,
direction=-1,
strength=(current_price - bb_upper[-1]) / (bb_upper[-1] * 0.05),
price=current_price,
expected_price=bb_mid[-1]
)
self._record_signal(signal)
return signal
return None
def _record_signal(self, signal: Signal):
"""记录信号"""
self.signal_history.append(signal)
self.performance_stats['total_signals'] += 1
if signal.direction == 1:
self.performance_stats['long_signals'] += 1
elif signal.direction == -1:
self.performance_stats['short_signals'] += 1
def get_performance_stats(self) -> Dict:
"""获取性能统计"""
return self.performance_stats.copy()
class PerformanceMonitor:
"""性能监控器 - 监控高频算法性能"""
def __init__(self, window_size: int = 1000):
self.latencies: List[float] = []
self.window_size = window_size
self.processed_ticks = 0
self.start_time = datetime.now()
def on_process_start(self):
"""开始处理计时"""
self._start = datetime.now()
def on_process_end(self):
"""结束处理计时"""
end = datetime.now()
latency = (end - self._start).total_seconds() * 1000 # ms
self.latencies.append(latency)
self.processed_ticks += 1
# 保持窗口大小
if len(self.latencies) > self.window_size:
self.latencies.pop(0)
def get_stats(self) -> Dict:
"""获取性能统计"""
if not self.latencies:
return {
'latency_ms_avg': 0,
'latency_ms_p50': 0,
'latency_ms_p99': 0,
'total_processed': 0,
'tps': 0
}
latencies_np = np.array(self.latencies)
elapsed = (datetime.now() - self.start_time).total_seconds()
return {
'latency_ms_avg': float(latencies_np.mean()),
'latency_ms_p50': float(np.percentile(latencies_np, 50)),
'latency_ms_p99': float(np.percentile(latencies_np, 99)),
'latency_ms_max': float(latencies_np.max()),
'total_processed': self.processed_ticks,
'tps': self.processed_ticks / elapsed if elapsed > 0 else 0
}
def print_stats(self):
"""打印性能统计"""
stats = self.get_stats()
print("=" * 60)
print("高频算法性能监控")
print("=" * 60)
print(f"平均延迟: {stats['latency_ms_avg']:.2f} ms")
print(f"P50延迟: {stats['latency_ms_p50']:.2f} ms")
print(f"P99延迟: {stats['latency_ms_p99']:.2f} ms")
print(f"最大延迟: {stats['latency_ms_max']:.2f} ms")
print(f"总处理tick数: {stats['total_processed']}")
print(f"每秒处理: {stats['tps']:.2f} ticks")
print("=" * 60)
# 导出
__all__ = [
'TickData',
'BarData',
'Signal',
'HighFrequencyDataPipeline',
'TechnicalFactorCalculator',
'HighFrequencySignalGenerator',
'PerformanceMonitor',
]