diff --git a/platform/research/03-部署方案/automation/deploy_pipeline.sh b/platform/research/03-部署方案/automation/deploy_pipeline.sh new file mode 100755 index 000000000..fea7caaa7 --- /dev/null +++ b/platform/research/03-部署方案/automation/deploy_pipeline.sh @@ -0,0 +1,157 @@ +#!/usr/bin/env bash +# ============================================================================= +# sanguo_vnpy 自动化部署流水线脚本 +# 版本: v1.0 +# 作者: 姜维(后勤总督) +# ============================================================================= + +set -e + +echo "========================================================================" +echo "🚀 sanguo_vnpy 自动化部署流水线" +echo "========================================================================" + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +print_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +print_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# 获取当前时间 +get_timestamp() { + date +"%Y%m%d_%H%M%S" +} + +# 配置变量 +PROJECT_DIR="/Users/chufeng/.openclaw/agents/main/workspace/projects/sanguo_vnpy" +DEPLOY_ENV="${1:-production}" +TIMESTAMP=$(get_timestamp) + +echo "" +print_info "部署环境: $DEPLOY_ENV" +print_info "时间戳: $TIMESTAMP" +echo "" + +# 步骤 1: 代码构建 +print_info "步骤 1: 代码构建..." + +cd "$PROJECT_DIR" + +# 检查虚拟环境 +if [ ! -d "venv" ]; then + print_warning "虚拟环境不存在,正在创建..." + python3 -m venv venv +fi + +# 激活虚拟环境 +print_info "激活虚拟环境..." +source venv/bin/activate + +# 升级依赖 +print_info "升级项目依赖..." +pip install --upgrade pip wheel +pip install -e ".[alpha,dev]" + +print_success "代码构建完成" + +# 步骤 2: 代码检查 +print_info "步骤 2: 代码质量检查..." + +# 运行代码检查 +if command -v ruff &> /dev/null; then + print_info "运行 Ruff 代码检查..." + ruff check sanguo/ + print_success "代码检查通过" +else + print_warning "Ruff 未安装,跳过代码检查" +fi + +# 步骤 3: 运行测试 +print_info "步骤 3: 运行测试..." + +if [ -d "tests" ]; then + if command -v pytest &> /dev/null; then + print_info "运行测试..." + pytest tests/ -v + print_success "测试通过" + else + print_warning "pytest 未安装,跳过测试" + fi +else + print_warning "测试目录不存在,跳过测试" +fi + +# 步骤 4: 构建部署包 +print_info "步骤 4: 构建部署包..." + +# 创建部署目录 +DEPLOY_DIR="/tmp/sanguo_vnpy_deploy_$TIMESTAMP" +mkdir -p "$DEPLOY_DIR" + +# 复制代码 +print_info "复制项目文件..." +cp -r sanguo/ "$DEPLOY_DIR/" +cp -r vnpy/ "$DEPLOY_DIR/" 2>/dev/null || true +cp pyproject.toml "$DEPLOY_DIR/" +cp README.md "$DEPLOY_DIR/" + +# 构建 wheel 包 +print_info "构建 wheel 包..." +pip install build +python -m build --wheel --outdir "$DEPLOY_DIR/" + +print_success "部署包构建完成: $DEPLOY_DIR" + +# 步骤 5: 部署到目标环境 +print_info "步骤 5: 部署到目标环境..." + +if [ "$DEPLOY_ENV" = "production" ]; then + print_info "生产环境部署..." + # 这里可以添加生产环境部署逻辑 + # 例如: 上传到阿里云 OSS, SSH 到服务器部署等 + print_warning "生产环境部署需要配置阿里云凭证" + +elif [ "$DEPLOY_ENV" = "testing" ]; then + print_info "测试环境部署..." + print_success "测试环境部署完成" + +else + print_info "本地开发环境部署..." + print_success "本地部署完成" +fi + +# 步骤 6: 验证部署 +print_info "步骤 6: 验证部署..." + +# 验证模块导入 +print_info "验证模块导入..." +python -c "import sanguo; import vnpy; print('✅ 模块导入成功')" + +print_success "部署验证通过" + +echo "" +echo "========================================================================" +echo "🎉 部署完成!" +echo "========================================================================" +echo "部署时间: $(date)" +echo "部署环境: $DEPLOY_ENV" +echo "部署包: $DEPLOY_DIR" +echo "========================================================================" +echo "" diff --git a/platform/research/03-部署方案/terraform/main.tf b/platform/research/03-部署方案/terraform/main.tf new file mode 100644 index 000000000..99fb4fbfc --- /dev/null +++ b/platform/research/03-部署方案/terraform/main.tf @@ -0,0 +1,219 @@ +# ============================================================================= +# sanguo_vnpy 阿里云生产环境 Terraform 配置 +# 版本: v1.0 +# 作者: 姜维(后勤总督) +# ============================================================================= + +terraform { + required_providers { + alicloud = { + source = "aliyun/alicloud" + version = ">= 1.212.0" + } + } +} + +provider "alicloud" { + access_key = var.alicloud_access_key + secret_key = var.alicloud_secret_key + region = var.alicloud_region +} + +# ============================================================================= +# 变量定义 +# ============================================================================= + +variable "alicloud_access_key" { + description = "阿里云 Access Key" + type = string + sensitive = true +} + +variable "alicloud_secret_key" { + description = "阿里云 Secret Key" + type = string + sensitive = true +} + +variable "alicloud_region" { + description = "阿里云区域" + type = string + default = "cn-hangzhou" +} + +variable "environment" { + description = "环境类型: production/testing/development" + type = string + default = "production" +} + +variable "instance_type" { + description = "ECS 实例规格" + type = string + default = "ecs.c6.large" +} + +variable "vpc_cidr" { + description = "VPC CIDR 块" + type = string + default = "10.0.0.0/16" +} + +variable "vswitch_cidr" { + description = "虚拟交换机 CIDR 块" + type = string + default = "10.0.0.0/24" +} + +# ============================================================================= +# VPC 网络 +# ============================================================================= + +resource "alicloud_vpc" "sanguo_vpc" { + vpc_name = "sanguo-vnpy-${var.environment}-vpc" + cidr_block = var.vpc_cidr +} + +resource "alicloud_vswitch" "sanguo_vswitch" { + vswitch_name = "sanguo-vnpy-${var.environment}-vswitch" + vpc_id = alicloud_vpc.sanguo_vpc.id + cidr_block = var.vswitch_cidr + zone_id = "${var.alicloud_region}-a" +} + +# ============================================================================= +# 安全组 +# ============================================================================= + +resource "alicloud_security_group" "sanguo_sg" { + name = "sanguo-vnpy-${var.environment}-sg" + description = "sanguo_vnpy ${var.environment} 安全组" + vpc_id = alicloud_vpc.sanguo_vpc.id +} + +resource "alicloud_security_group_rule" "allow_ssh" { + type = "ingress" + ip_protocol = "tcp" + nic_type = "intranet" + policy = "accept" + port_range = "22/22" + priority = 1 + security_group_id = alicloud_security_group.sanguo_sg.id + cidr_ip = "0.0.0.0/0" +} + +resource "alicloud_security_group_rule" "allow_http" { + type = "ingress" + ip_protocol = "tcp" + nic_type = "intranet" + policy = "accept" + port_range = "80/80" + priority = 2 + security_group_id = alicloud_security_group.sanguo_sg.id + cidr_ip = "0.0.0.0/0" +} + +resource "alicloud_security_group_rule" "allow_vnpy" { + type = "ingress" + ip_protocol = "tcp" + nic_type = "intranet" + policy = "accept" + port_range = "8080/8080" + priority = 3 + security_group_id = alicloud_security_group.sanguo_sg.id + cidr_ip = "0.0.0.0/0" +} + +# ============================================================================= +# ECS 实例 +# ============================================================================= + +resource "alicloud_instance" "sanguo_ecs" { + instance_name = "sanguo-vnpy-${var.environment}-ecs" + availability_zone = "${var.alicloud_region}-a" + instance_type = var.instance_type + security_groups = [alicloud_security_group.sanguo_sg.id] + vswitch_id = alicloud_vswitch.sanguo_vswitch.id + internet_charge_type = "PayByTraffic" + internet_max_bandwidth_out = 100 + + system_disk_size = 40 + system_disk_category = "cloud_efficiency" + + image_id = "ubuntu_22_04_x64_20G_alibase_20240228.vhd" + + password = var.ecs_password + instance_charge_type = "PostPaid" +} + +variable "ecs_password" { + description = "ECS 实例密码" + type = string + sensitive = true + default = "Sanguo@2024!" +} + +# ============================================================================= +# OSS 对象存储 +# ============================================================================= + +resource "alicloud_oss_bucket" "sanguo_oss" { + bucket = "sanguo-vnpy-${var.environment}-data" + acl = "private" +} + +# ============================================================================= +# RDS 数据库(可选) +# ============================================================================= + +resource "alicloud_db_instance" "sanguo_rds" { + count = var.enable_rds ? 1 : 0 + + engine = "MySQL" + engine_version = "8.0" + instance_type = "rds.mysql.s2.large" + instance_storage = 20 + vswitch_id = alicloud_vswitch.sanguo_vswitch.id + security_ips = ["0.0.0.0/0"] + db_instance_name = "sanguo-vnpy-${var.environment}-rds" +} + +variable "enable_rds" { + description = "是否启用 RDS 数据库" + type = bool + default = false +} + +# ============================================================================= +# 输出 +# ============================================================================= + +output "vpc_id" { + description = "VPC ID" + value = alicloud_vpc.sanguo_vpc.id +} + +output "ecs_public_ip" { + description = "ECS 公网 IP" + value = alicloud_instance.sanguo_ecs.public_ip +} + +output "ecs_private_ip" { + description = "ECS 私网 IP" + value = alicloud_instance.sanguo_ecs.private_ip +} + +output "oss_bucket_name" { + description = "OSS 存储桶名称" + value = alicloud_oss_bucket.sanguo_oss.bucket +} + +output "ecs_ssh_command" { + description = "SSH 连接命令" + value = "ssh root@${alicloud_instance.sanguo_ecs.public_ip}" +} + +output "vnpy_web_url" { + description = "vn.py Web 界面地址" + value = "http://${alicloud_instance.sanguo_ecs.public_ip}:8080" +} diff --git a/platform/research/04-运维方案/disaster-recovery/emergency_response.md b/platform/research/04-运维方案/disaster-recovery/emergency_response.md new file mode 100644 index 000000000..4f1d4fd78 --- /dev/null +++ b/platform/research/04-运维方案/disaster-recovery/emergency_response.md @@ -0,0 +1,335 @@ +# sanguo_vnpy 阿里云生产环境应急响应方案 + +**版本**: v1.0 +**作者**: 姜维(后勤总督) +**日期**: 2026-03-21 + +--- + +## 🚨 应急响应原则 + +### 1. 快速响应原则 +- **5分钟内**:发现问题并启动响应流程 +- **15分钟内**:完成初步诊断和影响评估 +- **30分钟内**:确定并执行恢复方案 + +### 2. 优先级原则 +- **P0(严重)**:系统完全不可用,数据丢失风险 +- **P1(高)**:核心功能异常,影响主要交易 +- **P2(中)**:次要功能异常,不影响核心交易 +- **P3(低)**:轻微问题,用户体验影响 + +### 3. 数据安全原则 +- **先备份,后操作**:任何修复操作前先备份数据 +- **日志优先**:优先保存和分析日志,避免二次故障 +- **最小化变更**:使用最小必要的操作修复问题 + +--- + +## 🔍 问题诊断流程 + +### 1. 监控告警触发 + +#### 告警来源 +1. **系统监控**:Prometheus + Grafana +2. **应用监控**:sanguo_vnpy 内部健康检查 +3. **业务监控**:策略执行异常告警 +4. **用户反馈**:用户上报的问题 + +#### 告警级别对应 +| 告警类型 | 影响 | 响应级别 | +|---------|------|---------| +| 实例宕机 | 系统不可用 | P0 | +| CPU > 90% 5分钟 | 性能下降 | P1 | +| 内存 > 90% 5分钟 | 可能OOM | P1 | +| 磁盘 > 95% | 数据写入失败 | P0 | +| vn.py 进程消失 | 应用不可用 | P0 | +| 策略执行失败 | 业务影响 | P1 | + +### 2. 快速诊断步骤 + +#### 步骤 1: 检查系统状态 +```bash +# 1. 检查服务器是否在线 +ping + +# 2. SSH 登录(如果可能) +ssh root@ + +# 3. 检查系统资源 +top +htop +df -h +free -m + +# 4. 检查网络 +ping 8.8.8.8 +curl -I https://www.aliyun.com +``` + +#### 步骤 2: 检查服务状态 +```bash +# 1. 检查 vn.py 进程 +ps aux | grep -i vnpy +ps aux | grep -i python + +# 2. 检查端口监听 +netstat -tlnp | grep 8080 +ss -tlnp | grep 8080 + +# 3. 检查监控服务 +systemctl status prometheus +systemctl status node_exporter +systemctl status grafana-server +``` + +#### 步骤 3: 检查日志 +```bash +# 1. 系统日志 +tail -100 /var/log/syslog +tail -100 /var/log/messages + +# 2. 应用日志 +tail -100 /path/to/sanguo_vnpy/logs/app.log +tail -100 /path/to/sanguo_vnpy/logs/error.log + +# 3. 云服务商日志 +# 阿里云控制台查看云服务器监控和事件 +``` + +--- + +## 🔧 常见问题应急处理 + +### 场景 1: 实例完全宕机(P0) + +#### 现象 +- Ping 无响应 +- SSH 无法连接 +- 监控显示实例离线 + +#### 应急处理 +1. **立即备份**(如果还能访问) + ```bash + # 尝试通过阿里云控制台创建快照 + # 快照命名: sanguo-vnpy-$(date +%Y%m%d-%H%M)-emergency + ``` + +2. **重启实例** + - 阿里云控制台 → 实例 → 重启 + - 等待 2-5 分钟 + +3. **如果重启失败** + - 使用可用快照回滚 + - 或从备份数据重建实例 + +4. **验证恢复** + ```bash + # 检查服务是否恢复 + ssh root@ + ps aux | grep vnpy + curl http://localhost:8080/health + ``` + +--- + +### 场景 2: vn.py 进程崩溃(P0) + +#### 现象 +- 实例在线,但 vn.py 进程消失 +- 端口 8080 无响应 +- 应用监控告警 + +#### 应急处理 +1. **保存崩溃现场** + ```bash + # 保存系统状态 + dmesg > /tmp/dmesg-$(date +%Y%m%d-%H%M).log + vmstat 1 5 > /tmp/vmstat-$(date +%Y%m%d-%H%M).log + + # 保存应用日志 + cp -r /path/to/sanguo_vnpy/logs /tmp/logs-backup-$(date +%Y%m%d-%H%M) + ``` + +2. **检查崩溃原因** + ```bash + # 检查系统日志中的 OOM + grep -i "out of memory" /var/log/syslog + grep -i "killed process" /var/log/syslog + + # 检查应用错误日志 + tail -200 /path/to/sanguo_vnpy/logs/error.log + ``` + +3. **快速重启服务** + ```bash + # 进入项目目录 + cd /path/to/sanguo_vnpy + + # 激活虚拟环境 + source venv/bin/activate + + # 启动 vn.py + python -m vnpy & + + # 或者使用服务管理 + systemctl start sanguo-vnpy + ``` + +4. **验证恢复** + ```bash + # 检查进程 + ps aux | grep vnpy + + # 检查端口 + curl http://localhost:8080/health + + # 检查监控 + # 确认 Prometheus 数据恢复 + ``` + +--- + +### 场景 3: 磁盘空间满(P0) + +#### 现象 +- 磁盘使用率 > 95% +- 数据写入失败 +- 应用无法保存数据 + +#### 应急处理 +1. **立即清理临时文件** + ```bash + # 清理系统临时文件 + rm -rf /tmp/* + + # 清理应用缓存 + rm -rf /path/to/sanguo_vnpy/cache/* + + # 清理旧日志(保留最近7天) + find /path/to/sanguo_vnpy/logs -name "*.log" -mtime +7 -delete + ``` + +2. **检查大文件** + ```bash + # 查找大于 100MB 的文件 + find / -type f -size +100M -exec ls -lh {} \; + + # 检查数据目录 + du -sh /path/to/sanguo_vnpy/data + du -sh /path/to/sanguo_vnpy/results + ``` + +3. **扩容磁盘(如果需要)** + - 阿里云控制台 → 云盘 → 扩容 + - 或挂载新的数据盘 + +4. **验证恢复** + ```bash + df -h + # 确认使用率下降到安全范围(< 80%) + ``` + +--- + +### 场景 4: 数据库连接失败(P1) + +#### 现象 +- 应用报错无法连接数据库 +- 策略无法获取数据 +- 回测无法执行 + +#### 应急处理 +1. **检查数据库状态** + ```bash + # 如果使用 RDS + # 阿里云控制台检查 RDS 状态 + + # 如果使用本地 SQLite + ls -lh /path/to/sanguo_vnpy/data/*.db + # 检查文件权限和完整性 + ``` + +2. **网络连接测试** + ```bash + # 测试数据库连接 + telnet + nc -zv + + # 检查安全组 + # 确认应用服务器 IP 在数据库白名单中 + ``` + +3. **快速恢复方案** + ```bash + # 方案 A: 切换到本地缓存数据 + # 修改配置使用 akshare 直接获取 + + # 方案 B: 从备份恢复数据库 + # 恢复最近的数据库备份 + + # 方案 C: 重启数据库服务 + # 如果是自建数据库,重启服务 + ``` + +--- + +## 📋 应急响应检查清单 + +### 响应前检查 +- [ ] 确认告警级别和影响范围 +- [ ] 通知相关人员(诸葛亮/主公) +- [ ] 保存当前系统状态和日志 +- [ ] 准备回滚方案 + +### 响应中检查 +- [ ] 执行诊断步骤,确定根因 +- [ ] 执行最小必要的修复操作 +- [ ] 持续监控系统状态 +- [ ] 记录所有操作和时间点 + +### 响应后检查 +- [ ] 验证核心功能恢复正常 +- [ ] 验证监控数据正常 +- [ ] 验证业务数据完整性 +- [ ] 总结故障原因和改进措施 + +--- + +## 📞 联络清单 + +### 紧急联络 +- **总军师(诸葛亮)**:负责决策和协调 +- **主公**:最终决策和资源协调 +- **赵云**:数据库和数据相关问题 +- **关羽**:回测引擎和风控问题 +- **张飞**:API 兼容层问题 +- **司马懿**:安全和合规问题 + +### 阿里云支持 +- **阿里云控制台**:https://home.console.aliyun.com +- **阿里云工单**:紧急问题提交工单 +- **阿里云电话**:400-910-0100 + +--- + +## 🔄 事后复盘 + +### 复盘会议 +- **时间**:故障恢复后 24 小时内 +- **参与人**:所有相关人员 +- **内容**: + 1. 故障回顾和时间线 + 2. 根因分析 + 3. 响应流程评估 + 4. 改进措施讨论 + +### 改进措施 +- 技术改进:防止同类故障再次发生 +- 流程改进:优化响应流程 +- 监控改进:完善告警和监控 +- 文档改进:更新应急方案 + +--- + +**本应急方案会持续更新,确保生产环境安全稳定运行!** 🚛 diff --git a/platform/research/04-运维方案/monitoring/deploy_monitoring.sh b/platform/research/04-运维方案/monitoring/deploy_monitoring.sh new file mode 100755 index 000000000..83a51afb6 --- /dev/null +++ b/platform/research/04-运维方案/monitoring/deploy_monitoring.sh @@ -0,0 +1,211 @@ +#!/usr/bin/env bash +# ============================================================================= +# sanguo_vnpy 阿里云生产环境监控系统部署脚本 +# 版本: v1.0 +# 作者: 姜维(后勤总督) +# ============================================================================= + +set -e + +echo "========================================================================" +echo "🚀 sanguo_vnpy 监控系统部署" +echo "========================================================================" + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +print_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +print_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# 步骤 1: 安装系统依赖 +print_info "步骤 1: 安装系统依赖..." + +if command -v apt-get &> /dev/null; then + sudo apt-get update + sudo apt-get install -y prometheus node_exporter grafana nginx +elif command -v yum &> /dev/null; then + sudo yum install -y epel-release + sudo yum install -y prometheus node_exporter grafana nginx +else + print_error "不支持的操作系统" + exit 1 +fi + +print_success "系统依赖安装完成" + +# 步骤 2: 配置 Prometheus +print_info "步骤 2: 配置 Prometheus..." + +sudo mkdir -p /etc/prometheus +sudo cat > /etc/prometheus/prometheus.yml << 'EOF' +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'node_exporter' + static_configs: + - targets: ['localhost:9100'] + + - job_name: 'sanguo_vnpy' + static_configs: + - targets: ['localhost:8080'] + metrics_path: '/metrics' + +alerting: + alertmanagers: + - static_configs: + - targets: ['localhost:9093'] + +rule_files: + - "/etc/prometheus/alerts.yml" +EOF + +# 步骤 3: 配置告警规则 +print_info "步骤 3: 配置告警规则..." + +sudo cat > /etc/prometheus/alerts.yml << 'EOF' +groups: + - name: sanguo_vnpy_alerts + rules: + - alert: InstanceDown + expr: up == 0 + for: 5m + labels: + severity: critical + annotations: + summary: "实例 {{ $labels.instance }} 已宕机" + description: "{{ $labels.job }} 实例 {{ $labels.instance }} 已宕机超过 5 分钟" + + - alert: HighCPUUsage + expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "实例 {{ $labels.instance }} CPU 使用率过高" + description: "{{ $labels.instance }} CPU 使用率超过 80%" + + - alert: HighMemoryUsage + expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "实例 {{ $labels.instance }} 内存使用率过高" + description: "{{ $labels.instance }} 内存使用率超过 80%" + + - alert: DiskSpaceLow + expr: (1 - (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"})) * 100 > 90 + for: 5m + labels: + severity: critical + annotations: + summary: "实例 {{ $labels.instance }} 磁盘空间不足" + description: "{{ $labels.instance }} 磁盘使用率超过 90%" + + - alert: VnpyDown + expr: up{job="sanguo_vnpy"} == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "sanguo_vnpy 服务已宕机" + description: "sanguo_vnpy 服务已宕机超过 2 分钟,请立即检查!" +EOF + +# 步骤 4: 配置 Grafana +print_info "步骤 4: 配置 Grafana..." + +sudo mkdir -p /etc/grafana/provisioning/datasources +sudo cat > /etc/grafana/provisioning/datasources/prometheus.yml << 'EOF' +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://localhost:9090 + isDefault: true + editable: true +EOF + +# 步骤 5: 启动服务 +print_info "步骤 5: 启动监控服务..." + +sudo systemctl enable prometheus +sudo systemctl enable node_exporter +sudo systemctl enable grafana-server + +sudo systemctl start prometheus +sudo systemctl start node_exporter +sudo systemctl start grafana-server + +# 步骤 6: 配置 Nginx 反向代理 +print_info "步骤 6: 配置 Nginx 反向代理..." + +sudo cat > /etc/nginx/sites-available/sanguo-monitoring << 'EOF' +server { + listen 80; + server_name _; + + location /grafana/ { + proxy_pass http://localhost:3000/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /prometheus/ { + proxy_pass http://localhost:9090/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + + location /node-exporter/ { + proxy_pass http://localhost:9100/; + proxy_set_header Host $host; + } +} +EOF + +sudo ln -sf /etc/nginx/sites-available/sanguo-monitoring /etc/nginx/sites-enabled/ +sudo nginx -t +sudo systemctl reload nginx + +# 步骤 7: 显示访问信息 +print_success "监控系统部署完成!" + +echo "" +echo "========================================================================" +echo "📊 监控系统访问信息" +echo "========================================================================" +echo "Grafana: http://$(hostname -I | awk '{print $1}'):3000" +echo "Prometheus: http://$(hostname -I | awk '{print $1}'):9090" +echo "Node Exporter: http://$(hostname -I | awk '{print $1}'):9100" +echo "" +echo "默认 Grafana 账号: admin / admin" +echo "========================================================================" +echo "" diff --git a/technical-strategy/02-algorithms/__pycache__/high_frequency_signal.cpython-314.pyc b/technical-strategy/02-algorithms/__pycache__/high_frequency_signal.cpython-314.pyc new file mode 100644 index 000000000..fc001650b Binary files /dev/null and b/technical-strategy/02-algorithms/__pycache__/high_frequency_signal.cpython-314.pyc differ diff --git a/technical-strategy/02-algorithms/benchmark_test.py b/technical-strategy/02-algorithms/benchmark_test.py new file mode 100644 index 000000000..1bc5d16bb --- /dev/null +++ b/technical-strategy/02-algorithms/benchmark_test.py @@ -0,0 +1,166 @@ +""" +高频算法性能基准测试 +测试不同实现方式的性能 +""" + +import time +import numpy as np +import pandas as pd +from datetime import datetime +from high_frequency_signal import ( + TechnicalFactorCalculator, + PerformanceMonitor, + BarData, +) + + +def generate_test_data(n: int = 10000) -> pd.DataFrame: + """生成随机测试数据""" + np.random.seed(42) + + # 生成随机游走价格 + returns = np.random.normal(0.0001, 0.01, n) + price = 100 * np.exp(np.cumsum(returns)) + + # 生成OHLCV + high = price * (1 + np.random.uniform(0, 0.01, n)) + low = price * (1 - np.random.uniform(0, 0.01, n)) + open_ = price * (1 + np.random.normal(0, 0.005, n)) + volume = np.random.randint(100000, 10000000, n) + + df = pd.DataFrame({ + 'open': open_, + 'high': high, + 'low': low, + 'close': price, + 'volume': volume, + }) + + return df + + +def benchmark_technical_calculator(): + """测试技术因子计算器性能""" + print("=" * 60) + print("技术因子计算器性能基准测试") + print("=" * 60) + + calc = TechnicalFactorCalculator() + monitor = PerformanceMonitor() + + # 测试不同数据大小 + for size in [100, 1000, 10000]: + df = generate_test_data(size) + close = df['close'].values + high = df['high'].values + low = df['low'].values + volume = df['volume'].values + + n_runs = 1000 if size <= 1000 else 100 + + # 测试MA + start_time = time.time() + for _ in range(n_runs): + _ = calc.calculate_ma(close, 20) + elapsed = (time.time() - start_time) * 1000 + print(f"MA {size} points x{n_runs} runs: {elapsed:.2f} ms " + f"({elapsed/n_runs:.3f} ms/run)") + + # 测试MACD + start_time = time.time() + for _ in range(n_runs): + _ = calc.calculate_macd(close, 12, 26, 9) + elapsed = (time.time() - start_time) * 1000 + print(f"MACD {size} points x{n_runs} runs: {elapsed:.2f} ms " + f"({elapsed/n_runs:.3f} ms/run)") + + # 测试RSI + start_time = time.time() + for _ in range(n_runs): + _ = calc.calculate_rsi(close, 14) + elapsed = (time.time() - start_time) * 1000 + print(f"RSI {size} points x{n_runs} runs: {elapsed:.2f} ms " + f"({elapsed/n_runs:.3f} ms/run)") + + # 测试布林带 + start_time = time.time() + for _ in range(n_runs): + _ = calc.calculate_bollinger_bands(close, 20, 2.0) + elapsed = (time.time() - start_time) * 1000 + print(f"Bollinger {size} points x{n_runs} runs: {elapsed:.2f} ms " + f"({elapsed/n_runs:.3f} ms/run)") + + # 测试ATR + start_time = time.time() + for _ in range(n_runs): + _ = calc.calculate_atr(high, low, close, 14) + elapsed = (time.time() - start_time) * 1000 + print(f"ATR {size} points x{n_runs} runs: {elapsed:.2f} ms " + f"({elapsed/n_runs:.3f} ms/run)") + + print() + + +def benchmark_full_pipeline(): + """测试完整信号生成流水线""" + print("=" * 60) + print("完整信号生成流水线性能基准测试") + print("=" * 60) + + from high_frequency_signal import HighFrequencySignalGenerator + + calc = TechnicalFactorCalculator() + generator = HighFrequencySignalGenerator() + monitor = PerformanceMonitor() + + # 生成测试数据 + n_days = 252 * 5 + df = generate_test_data(n_days) + + start_time = time.time() + + # 模拟逐日处理 + signals = [] + for i in range(20, len(df)): + data = df.iloc[:i+1] + bar = BarData( + code="TEST", + datetime=datetime.now(), + open=data['open'].iloc[-1], + high=data['high'].iloc[-1], + low=data['low'].iloc[-1], + close=data['close'].iloc[-1], + volume=data['volume'].iloc[-1], + amount=data['volume'].iloc[-1] * data['close'].iloc[-1] + ) + monitor.on_process_start() + signal = generator.generate_signal_from_bar(bar, data) + monitor.on_process_end() + if signal: + signals.append(signal) + + elapsed = (time.time() - start_time) * 1000 + print(f"完整回测 {n_days} 根K线,生成 {len(signals)} 个信号") + print(f"总耗时: {elapsed:.2f} ms") + print(f"平均每根K线: {elapsed/n_days:.3f} ms") + print() + + monitor.print_stats() + + stats = generator.get_performance_stats() + print(f"\n信号统计: {stats}") + + +def main(): + """主函数""" + benchmark_technical_calculator() + print() + benchmark_full_pipeline() + print() + print("=" * 60) + print("基准测试完成") + print("=" * 60) + + +if __name__ == '__main__': + main() diff --git a/technical-strategy/02-algorithms/high_frequency_signal.py b/technical-strategy/02-algorithms/high_frequency_signal.py new file mode 100644 index 000000000..4581a2f8c --- /dev/null +++ b/technical-strategy/02-algorithms/high_frequency_signal.py @@ -0,0 +1,366 @@ +""" +高频交易信号生成器 +支持tick级和分钟级数据处理 +低延迟信号生成 +支持向量化计算 +""" + +from typing import List, Dict, Tuple, Optional, Union +import numpy as np +import pandas as pd +from dataclasses import dataclass +from datetime import datetime, timedelta +import logging + +logger = logging.getLogger(__name__) + + +@dataclass +class TickData: + """tick数据结构""" + code: str + datetime: datetime + price: float + volume: int + amount: float + direction: int = 0 # 1买-1卖0平 + + +@dataclass +class BarData: + """K线数据结构""" + code: str + datetime: datetime + open: float + high: float + low: float + close: float + volume: float + amount: float + + +@dataclass +class Signal: + """交易信号结构""" + code: str + datetime: datetime + direction: int # 1多-1空0无 + strength: float # 信号强度 0-1 + price: float # 信号价格 + expected_price: float # 预期目标价 + + +class HighFrequencyDataPipeline: + """高频数据流处理管道""" + + def __init__(self, window_size: int = 100): + self.window_size = window_size + self._buffer: Dict[str, List[TickData]] = {} + self._last_bar: Dict[str, Optional[BarData]] = {} + self._tick_count = 0 + + def on_tick(self, tick: TickData) -> Optional[Signal]: + """处理新tick,可能生成信号""" + if tick.code not in self._buffer: + self._buffer[tick.code] = [] + + buffer = self._buffer[tick.code] + buffer.append(tick) + + # 保持窗口大小 + if len(buffer) > self.window_size: + buffer.pop(0) + + self._tick_count += 1 + + # 这里可以触发滚动计算 + return None + + def get_tick_buffer(self, code: str) -> List[TickData]: + """获取tick缓冲""" + return self._buffer.get(code, []) + + def statistics(self) -> Dict: + """获取统计信息""" + return { + 'total_ticks': self._tick_count, + 'total_codes': len(self._buffer), + 'buffer_size': sum(len(b) for b in self._buffer.values()) + } + + +class TechnicalFactorCalculator: + """技术因子计算器 - 向量化实现""" + + def __init__(self): + self.cache: Dict[str, pd.DataFrame] = {} + + def calculate_ma(self, prices: np.ndarray, period: int) -> np.ndarray: + """计算移动平均线""" + return pd.Series(prices).rolling(period).mean().values + + def calculate_ema(self, prices: np.ndarray, period: int) -> np.ndarray: + """计算指数移动平均线""" + return pd.Series(prices).ewm(span=period, adjust=False).mean().values + + def calculate_macd(self, prices: np.ndarray, + fast_period: int = 12, + slow_period: int = 26, + signal_period: int = 9) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """计算MACD""" + ema_fast = self.calculate_ema(prices, fast_period) + ema_slow = self.calculate_ema(prices, slow_period) + dif = ema_fast - ema_slow + dea = self.calculate_ema(dif, signal_period) + macd = 2 * (dif - dea) + return dif, dea, macd + + def calculate_bollinger_bands(self, prices: np.ndarray, + period: int = 20, + num_std: float = 2.0) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """计算布林带""" + middle = pd.Series(prices).rolling(period).mean().values + std = pd.Series(prices).rolling(period).std().values + upper = middle + num_std * std + lower = middle - num_std * std + return upper, middle, lower + + def calculate_rsi(self, prices: np.ndarray, period: int = 14) -> np.ndarray: + """计算RSI相对强弱指数""" + delta = np.diff(prices) + gain = np.where(delta > 0, delta, 0) + loss = np.where(delta < 0, -delta, 0) + + avg_gain = np.zeros_like(prices) + avg_loss = np.zeros_like(prices) + + # 第一个period没有数据 + avg_gain[period] = np.mean(gain[:period]) + avg_loss[period] = np.mean(loss[:period]) + + # Wilder's smoothing + for i in range(period + 1, len(prices)): + avg_gain[i] = (avg_gain[i-1] * (period - 1) + gain[i-1]) / period + avg_loss[i] = (avg_loss[i-1] * (period - 1) + loss[i-1]) / period + + rs = avg_gain / (avg_loss + 1e-10) + rsi = 100 - (100 / (1 + rs)) + return rsi + + def calculate_atr(self, high: np.ndarray, low: np.ndarray, + close: np.ndarray, period: int = 14) -> np.ndarray: + """计算ATR平均真实波幅""" + tr = np.zeros_like(high) + tr[0] = high[0] - low[0] + + for i in range(1, len(high)): + tr1 = high[i] - low[i] + tr2 = abs(high[i] - close[i-1]) + tr3 = abs(low[i] - close[i-1]) + tr[i] = max(tr1, tr2, tr3) + + atr = pd.Series(tr).rolling(period).mean().values + return atr + + def calculate_volume_ma(self, volumes: np.ndarray, period: int) -> np.ndarray: + """计算成交量移动平均""" + return pd.Series(volumes).rolling(period).mean().values + + def calculate_obv(self, close: np.ndarray, volume: np.ndarray) -> np.ndarray: + """计算OBV能量潮""" + obv = np.zeros_like(close) + obv[0] = volume[0] + + for i in range(1, len(close)): + if close[i] > close[i-1]: + obv[i] = obv[i-1] + volume[i] + elif close[i] < close[i-1]: + obv[i] = obv[i-1] - volume[i] + else: + obv[i] = obv[i-1] + + return obv + + +class HighFrequencySignalGenerator: + """高频交易信号生成器 + 支持多种技术因子组合生成交易信号 + """ + + def __init__(self): + self.factor_calculator = TechnicalFactorCalculator() + self.data_pipeline = HighFrequencyDataPipeline() + self.signal_history: List[Signal] = [] + self.performance_stats = { + 'total_signals': 0, + 'long_signals': 0, + 'short_signals': 0, + } + + def generate_signal_from_bar(self, bar: BarData, + data: pd.DataFrame) -> Optional[Signal]: + """从K线数据生成信号""" + # 计算各种技术因子 + close = data['close'].values + high = data['high'].values + low = data['low'].values + volume = data['volume'].values + + # MACD + dif, dea, macd = self.factor_calculator.calculate_macd(close) + + # RSI + rsi = self.factor_calculator.calculate_rsi(close) + + # 布林带 + bb_upper, bb_mid, bb_lower = self.factor_calculator.calculate_bollinger_bands(close) + + # ATR + atr = self.factor_calculator.calculate_atr(high, low, close, 14) + + # 生成信号逻辑 + current_rsi = rsi[-1] + current_macd = macd[-1] + current_dif = dif[-1] + current_dea = dea[-1] + current_price = close[-1] + + # MACD金叉买入 + if current_dif > current_dea and (dif[-2] <= dea[-2]) and current_rsi < 70: + signal = Signal( + code=bar.code, + datetime=bar.datetime, + direction=1, + strength=min((70 - current_rsi) / 70 + (current_dif - current_dea) / 1, 1.0), + price=current_price, + expected_price=current_price + 2 * atr[-1] + ) + self._record_signal(signal) + return signal + + # MACD死叉卖出 + elif current_dif < current_dea and (dif[-2] >= dea[-2]) and current_rsi > 30: + signal = Signal( + code=bar.code, + datetime=bar.datetime, + direction=-1, + strength=min((current_rsi - 30) / 30 + (current_dea - current_dif) / 1, 1.0), + price=current_price, + expected_price=current_price - 2 * atr[-1] + ) + self._record_signal(signal) + return signal + + # 布林带突破 + elif current_price < bb_lower[-1] and current_rsi < 30: + signal = Signal( + code=bar.code, + datetime=bar.datetime, + direction=1, + strength=(bb_lower[-1] - current_price) / (bb_lower[-1] * 0.05), + price=current_price, + expected_price=bb_mid[-1] + ) + self._record_signal(signal) + return signal + + elif current_price > bb_upper[-1] and current_rsi > 70: + signal = Signal( + code=bar.code, + datetime=bar.datetime, + direction=-1, + strength=(current_price - bb_upper[-1]) / (bb_upper[-1] * 0.05), + price=current_price, + expected_price=bb_mid[-1] + ) + self._record_signal(signal) + return signal + + return None + + def _record_signal(self, signal: Signal): + """记录信号""" + self.signal_history.append(signal) + self.performance_stats['total_signals'] += 1 + if signal.direction == 1: + self.performance_stats['long_signals'] += 1 + elif signal.direction == -1: + self.performance_stats['short_signals'] += 1 + + def get_performance_stats(self) -> Dict: + """获取性能统计""" + return self.performance_stats.copy() + + +class PerformanceMonitor: + """性能监控器 - 监控高频算法性能""" + + def __init__(self, window_size: int = 1000): + self.latencies: List[float] = [] + self.window_size = window_size + self.processed_ticks = 0 + self.start_time = datetime.now() + + def on_process_start(self): + """开始处理计时""" + self._start = datetime.now() + + def on_process_end(self): + """结束处理计时""" + end = datetime.now() + latency = (end - self._start).total_seconds() * 1000 # ms + self.latencies.append(latency) + self.processed_ticks += 1 + + # 保持窗口大小 + if len(self.latencies) > self.window_size: + self.latencies.pop(0) + + def get_stats(self) -> Dict: + """获取性能统计""" + if not self.latencies: + return { + 'latency_ms_avg': 0, + 'latency_ms_p50': 0, + 'latency_ms_p99': 0, + 'total_processed': 0, + 'tps': 0 + } + + latencies_np = np.array(self.latencies) + elapsed = (datetime.now() - self.start_time).total_seconds() + + return { + 'latency_ms_avg': float(latencies_np.mean()), + 'latency_ms_p50': float(np.percentile(latencies_np, 50)), + 'latency_ms_p99': float(np.percentile(latencies_np, 99)), + 'latency_ms_max': float(latencies_np.max()), + 'total_processed': self.processed_ticks, + 'tps': self.processed_ticks / elapsed if elapsed > 0 else 0 + } + + def print_stats(self): + """打印性能统计""" + stats = self.get_stats() + print("=" * 60) + print("高频算法性能监控") + print("=" * 60) + print(f"平均延迟: {stats['latency_ms_avg']:.2f} ms") + print(f"P50延迟: {stats['latency_ms_p50']:.2f} ms") + print(f"P99延迟: {stats['latency_ms_p99']:.2f} ms") + print(f"最大延迟: {stats['latency_ms_max']:.2f} ms") + print(f"总处理tick数: {stats['total_processed']}") + print(f"每秒处理: {stats['tps']:.2f} ticks") + print("=" * 60) + + +# 导出 +__all__ = [ + 'TickData', + 'BarData', + 'Signal', + 'HighFrequencyDataPipeline', + 'TechnicalFactorCalculator', + 'HighFrequencySignalGenerator', + 'PerformanceMonitor', +]