add: 关羽完成已调研结果和实时风控系统上传

2026-03-21 20:24:43 +08:00
parent e43c7a5214
commit f549b28ac7
7 changed files with 1454 additions and 0 deletions
@@ -0,0 +1,157 @@
+#!/usr/bin/env bash
+# =============================================================================
+# sanguo_vnpy 自动化部署流水线脚本
+# 版本: v1.0
+# 作者: 姜维（后勤总督）
+# =============================================================================
+
+set -e
+
+echo "========================================================================"
+echo "🚀 sanguo_vnpy 自动化部署流水线"
+echo "========================================================================"
+
+# 颜色定义
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m'
+
+print_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+print_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# 获取当前时间
+get_timestamp() {
+    date +"%Y%m%d_%H%M%S"
+}
+
+# 配置变量
+PROJECT_DIR="/Users/chufeng/.openclaw/agents/main/workspace/projects/sanguo_vnpy"
+DEPLOY_ENV="${1:-production}"
+TIMESTAMP=$(get_timestamp)
+
+echo ""
+print_info "部署环境: $DEPLOY_ENV"
+print_info "时间戳: $TIMESTAMP"
+echo ""
+
+# 步骤 1: 代码构建
+print_info "步骤 1: 代码构建..."
+
+cd "$PROJECT_DIR"
+
+# 检查虚拟环境
+if [ ! -d "venv" ]; then
+    print_warning "虚拟环境不存在，正在创建..."
+    python3 -m venv venv
+fi
+
+# 激活虚拟环境
+print_info "激活虚拟环境..."
+source venv/bin/activate
+
+# 升级依赖
+print_info "升级项目依赖..."
+pip install --upgrade pip wheel
+pip install -e ".[alpha,dev]"
+
+print_success "代码构建完成"
+
+# 步骤 2: 代码检查
+print_info "步骤 2: 代码质量检查..."
+
+# 运行代码检查
+if command -v ruff &> /dev/null; then
+    print_info "运行 Ruff 代码检查..."
+    ruff check sanguo/
+    print_success "代码检查通过"
+else
+    print_warning "Ruff 未安装，跳过代码检查"
+fi
+
+# 步骤 3: 运行测试
+print_info "步骤 3: 运行测试..."
+
+if [ -d "tests" ]; then
+    if command -v pytest &> /dev/null; then
+        print_info "运行测试..."
+        pytest tests/ -v
+        print_success "测试通过"
+    else
+        print_warning "pytest 未安装，跳过测试"
+    fi
+else
+    print_warning "测试目录不存在，跳过测试"
+fi
+
+# 步骤 4: 构建部署包
+print_info "步骤 4: 构建部署包..."
+
+# 创建部署目录
+DEPLOY_DIR="/tmp/sanguo_vnpy_deploy_$TIMESTAMP"
+mkdir -p "$DEPLOY_DIR"
+
+# 复制代码
+print_info "复制项目文件..."
+cp -r sanguo/ "$DEPLOY_DIR/"
+cp -r vnpy/ "$DEPLOY_DIR/" 2>/dev/null || true
+cp pyproject.toml "$DEPLOY_DIR/"
+cp README.md "$DEPLOY_DIR/"
+
+# 构建 wheel 包
+print_info "构建 wheel 包..."
+pip install build
+python -m build --wheel --outdir "$DEPLOY_DIR/"
+
+print_success "部署包构建完成: $DEPLOY_DIR"
+
+# 步骤 5: 部署到目标环境
+print_info "步骤 5: 部署到目标环境..."
+
+if [ "$DEPLOY_ENV" = "production" ]; then
+    print_info "生产环境部署..."
+    # 这里可以添加生产环境部署逻辑
+    # 例如: 上传到阿里云 OSS, SSH 到服务器部署等
+    print_warning "生产环境部署需要配置阿里云凭证"
+    
+elif [ "$DEPLOY_ENV" = "testing" ]; then
+    print_info "测试环境部署..."
+    print_success "测试环境部署完成"
+    
+else
+    print_info "本地开发环境部署..."
+    print_success "本地部署完成"
+fi
+
+# 步骤 6: 验证部署
+print_info "步骤 6: 验证部署..."
+
+# 验证模块导入
+print_info "验证模块导入..."
+python -c "import sanguo; import vnpy; print('✅ 模块导入成功')"
+
+print_success "部署验证通过"
+
+echo ""
+echo "========================================================================"
+echo "🎉 部署完成！"
+echo "========================================================================"
+echo "部署时间: $(date)"
+echo "部署环境: $DEPLOY_ENV"
+echo "部署包: $DEPLOY_DIR"
+echo "========================================================================"
+echo ""
@@ -0,0 +1,219 @@
+# =============================================================================
+# sanguo_vnpy 阿里云生产环境 Terraform 配置
+# 版本: v1.0
+# 作者: 姜维（后勤总督）
+# =============================================================================
+
+terraform {
+  required_providers {
+    alicloud = {
+      source  = "aliyun/alicloud"
+      version = ">= 1.212.0"
+    }
+  }
+}
+
+provider "alicloud" {
+  access_key = var.alicloud_access_key
+  secret_key = var.alicloud_secret_key
+  region     = var.alicloud_region
+}
+
+# =============================================================================
+# 变量定义
+# =============================================================================
+
+variable "alicloud_access_key" {
+  description = "阿里云 Access Key"
+  type        = string
+  sensitive   = true
+}
+
+variable "alicloud_secret_key" {
+  description = "阿里云 Secret Key"
+  type        = string
+  sensitive   = true
+}
+
+variable "alicloud_region" {
+  description = "阿里云区域"
+  type        = string
+  default     = "cn-hangzhou"
+}
+
+variable "environment" {
+  description = "环境类型: production/testing/development"
+  type        = string
+  default     = "production"
+}
+
+variable "instance_type" {
+  description = "ECS 实例规格"
+  type        = string
+  default     = "ecs.c6.large"
+}
+
+variable "vpc_cidr" {
+  description = "VPC CIDR 块"
+  type        = string
+  default     = "10.0.0.0/16"
+}
+
+variable "vswitch_cidr" {
+  description = "虚拟交换机 CIDR 块"
+  type        = string
+  default     = "10.0.0.0/24"
+}
+
+# =============================================================================
+# VPC 网络
+# =============================================================================
+
+resource "alicloud_vpc" "sanguo_vpc" {
+  vpc_name   = "sanguo-vnpy-${var.environment}-vpc"
+  cidr_block = var.vpc_cidr
+}
+
+resource "alicloud_vswitch" "sanguo_vswitch" {
+  vswitch_name = "sanguo-vnpy-${var.environment}-vswitch"
+  vpc_id       = alicloud_vpc.sanguo_vpc.id
+  cidr_block   = var.vswitch_cidr
+  zone_id      = "${var.alicloud_region}-a"
+}
+
+# =============================================================================
+# 安全组
+# =============================================================================
+
+resource "alicloud_security_group" "sanguo_sg" {
+  name        = "sanguo-vnpy-${var.environment}-sg"
+  description = "sanguo_vnpy ${var.environment} 安全组"
+  vpc_id      = alicloud_vpc.sanguo_vpc.id
+}
+
+resource "alicloud_security_group_rule" "allow_ssh" {
+  type              = "ingress"
+  ip_protocol       = "tcp"
+  nic_type          = "intranet"
+  policy            = "accept"
+  port_range        = "22/22"
+  priority          = 1
+  security_group_id = alicloud_security_group.sanguo_sg.id
+  cidr_ip           = "0.0.0.0/0"
+}
+
+resource "alicloud_security_group_rule" "allow_http" {
+  type              = "ingress"
+  ip_protocol       = "tcp"
+  nic_type          = "intranet"
+  policy            = "accept"
+  port_range        = "80/80"
+  priority          = 2
+  security_group_id = alicloud_security_group.sanguo_sg.id
+  cidr_ip           = "0.0.0.0/0"
+}
+
+resource "alicloud_security_group_rule" "allow_vnpy" {
+  type              = "ingress"
+  ip_protocol       = "tcp"
+  nic_type          = "intranet"
+  policy            = "accept"
+  port_range        = "8080/8080"
+  priority          = 3
+  security_group_id = alicloud_security_group.sanguo_sg.id
+  cidr_ip           = "0.0.0.0/0"
+}
+
+# =============================================================================
+# ECS 实例
+# =============================================================================
+
+resource "alicloud_instance" "sanguo_ecs" {
+  instance_name        = "sanguo-vnpy-${var.environment}-ecs"
+  availability_zone    = "${var.alicloud_region}-a"
+  instance_type        = var.instance_type
+  security_groups      = [alicloud_security_group.sanguo_sg.id]
+  vswitch_id           = alicloud_vswitch.sanguo_vswitch.id
+  internet_charge_type = "PayByTraffic"
+  internet_max_bandwidth_out = 100
+  
+  system_disk_size    = 40
+  system_disk_category = "cloud_efficiency"
+  
+  image_id = "ubuntu_22_04_x64_20G_alibase_20240228.vhd"
+  
+  password = var.ecs_password
+  instance_charge_type = "PostPaid"
+}
+
+variable "ecs_password" {
+  description = "ECS 实例密码"
+  type        = string
+  sensitive   = true
+  default     = "Sanguo@2024!"
+}
+
+# =============================================================================
+# OSS 对象存储
+# =============================================================================
+
+resource "alicloud_oss_bucket" "sanguo_oss" {
+  bucket = "sanguo-vnpy-${var.environment}-data"
+  acl    = "private"
+}
+
+# =============================================================================
+# RDS 数据库（可选）
+# =============================================================================
+
+resource "alicloud_db_instance" "sanguo_rds" {
+  count             = var.enable_rds ? 1 : 0
+  
+  engine           = "MySQL"
+  engine_version   = "8.0"
+  instance_type    = "rds.mysql.s2.large"
+  instance_storage = 20
+  vswitch_id       = alicloud_vswitch.sanguo_vswitch.id
+  security_ips     = ["0.0.0.0/0"]
+  db_instance_name = "sanguo-vnpy-${var.environment}-rds"
+}
+
+variable "enable_rds" {
+  description = "是否启用 RDS 数据库"
+  type        = bool
+  default     = false
+}
+
+# =============================================================================
+# 输出
+# =============================================================================
+
+output "vpc_id" {
+  description = "VPC ID"
+  value       = alicloud_vpc.sanguo_vpc.id
+}
+
+output "ecs_public_ip" {
+  description = "ECS 公网 IP"
+  value       = alicloud_instance.sanguo_ecs.public_ip
+}
+
+output "ecs_private_ip" {
+  description = "ECS 私网 IP"
+  value       = alicloud_instance.sanguo_ecs.private_ip
+}
+
+output "oss_bucket_name" {
+  description = "OSS 存储桶名称"
+  value       = alicloud_oss_bucket.sanguo_oss.bucket
+}
+
+output "ecs_ssh_command" {
+  description = "SSH 连接命令"
+  value       = "ssh root@${alicloud_instance.sanguo_ecs.public_ip}"
+}
+
+output "vnpy_web_url" {
+  description = "vn.py Web 界面地址"
+  value       = "http://${alicloud_instance.sanguo_ecs.public_ip}:8080"
+}
@@ -0,0 +1,335 @@
+# sanguo_vnpy 阿里云生产环境应急响应方案
+
+**版本**: v1.0  
+**作者**: 姜维（后勤总督）  
+**日期**: 2026-03-21
+
+---
+
+## 🚨 应急响应原则
+
+### 1. 快速响应原则
+- **5分钟内**：发现问题并启动响应流程
+- **15分钟内**：完成初步诊断和影响评估
+- **30分钟内**：确定并执行恢复方案
+
+### 2. 优先级原则
+- **P0（严重）**：系统完全不可用，数据丢失风险
+- **P1（高）**：核心功能异常，影响主要交易
+- **P2（中）**：次要功能异常，不影响核心交易
+- **P3（低）**：轻微问题，用户体验影响
+
+### 3. 数据安全原则
+- **先备份，后操作**：任何修复操作前先备份数据
+- **日志优先**：优先保存和分析日志，避免二次故障
+- **最小化变更**：使用最小必要的操作修复问题
+
+---
+
+## 🔍 问题诊断流程
+
+### 1. 监控告警触发
+
+#### 告警来源
+1. **系统监控**：Prometheus + Grafana
+2. **应用监控**：sanguo_vnpy 内部健康检查
+3. **业务监控**：策略执行异常告警
+4. **用户反馈**：用户上报的问题
+
+#### 告警级别对应
+| 告警类型 | 影响 | 响应级别 |
+|---------|------|---------|
+| 实例宕机 | 系统不可用 | P0 |
+| CPU > 90% 5分钟 | 性能下降 | P1 |
+| 内存 > 90% 5分钟 | 可能OOM | P1 |
+| 磁盘 > 95% | 数据写入失败 | P0 |
+| vn.py 进程消失 | 应用不可用 | P0 |
+| 策略执行失败 | 业务影响 | P1 |
+
+### 2. 快速诊断步骤
+
+#### 步骤 1: 检查系统状态
+```bash
+# 1. 检查服务器是否在线
+ping <server-ip>
+
+# 2. SSH 登录（如果可能）
+ssh root@<server-ip>
+
+# 3. 检查系统资源
+top
+htop
+df -h
+free -m
+
+# 4. 检查网络
+ping 8.8.8.8
+curl -I https://www.aliyun.com
+```
+
+#### 步骤 2: 检查服务状态
+```bash
+# 1. 检查 vn.py 进程
+ps aux | grep -i vnpy
+ps aux | grep -i python
+
+# 2. 检查端口监听
+netstat -tlnp | grep 8080
+ss -tlnp | grep 8080
+
+# 3. 检查监控服务
+systemctl status prometheus
+systemctl status node_exporter
+systemctl status grafana-server
+```
+
+#### 步骤 3: 检查日志
+```bash
+# 1. 系统日志
+tail -100 /var/log/syslog
+tail -100 /var/log/messages
+
+# 2. 应用日志
+tail -100 /path/to/sanguo_vnpy/logs/app.log
+tail -100 /path/to/sanguo_vnpy/logs/error.log
+
+# 3. 云服务商日志
+# 阿里云控制台查看云服务器监控和事件
+```
+
+---
+
+## 🔧 常见问题应急处理
+
+### 场景 1: 实例完全宕机（P0）
+
+#### 现象
+- Ping 无响应
+- SSH 无法连接
+- 监控显示实例离线
+
+#### 应急处理
+1. **立即备份**（如果还能访问）
+   ```bash
+   # 尝试通过阿里云控制台创建快照
+   # 快照命名: sanguo-vnpy-$(date +%Y%m%d-%H%M)-emergency
+   ```
+
+2. **重启实例**
+   - 阿里云控制台 → 实例 → 重启
+   - 等待 2-5 分钟
+
+3. **如果重启失败**
+   - 使用可用快照回滚
+   - 或从备份数据重建实例
+
+4. **验证恢复**
+   ```bash
+   # 检查服务是否恢复
+   ssh root@<server-ip>
+   ps aux | grep vnpy
+   curl http://localhost:8080/health
+   ```
+
+---
+
+### 场景 2: vn.py 进程崩溃（P0）
+
+#### 现象
+- 实例在线，但 vn.py 进程消失
+- 端口 8080 无响应
+- 应用监控告警
+
+#### 应急处理
+1. **保存崩溃现场**
+   ```bash
+   # 保存系统状态
+   dmesg > /tmp/dmesg-$(date +%Y%m%d-%H%M).log
+   vmstat 1 5 > /tmp/vmstat-$(date +%Y%m%d-%H%M).log
+   
+   # 保存应用日志
+   cp -r /path/to/sanguo_vnpy/logs /tmp/logs-backup-$(date +%Y%m%d-%H%M)
+   ```
+
+2. **检查崩溃原因**
+   ```bash
+   # 检查系统日志中的 OOM
+   grep -i "out of memory" /var/log/syslog
+   grep -i "killed process" /var/log/syslog
+   
+   # 检查应用错误日志
+   tail -200 /path/to/sanguo_vnpy/logs/error.log
+   ```
+
+3. **快速重启服务**
+   ```bash
+   # 进入项目目录
+   cd /path/to/sanguo_vnpy
+   
+   # 激活虚拟环境
+   source venv/bin/activate
+   
+   # 启动 vn.py
+   python -m vnpy &
+   
+   # 或者使用服务管理
+   systemctl start sanguo-vnpy
+   ```
+
+4. **验证恢复**
+   ```bash
+   # 检查进程
+   ps aux | grep vnpy
+   
+   # 检查端口
+   curl http://localhost:8080/health
+   
+   # 检查监控
+   # 确认 Prometheus 数据恢复
+   ```
+
+---
+
+### 场景 3: 磁盘空间满（P0）
+
+#### 现象
+- 磁盘使用率 > 95%
+- 数据写入失败
+- 应用无法保存数据
+
+#### 应急处理
+1. **立即清理临时文件**
+   ```bash
+   # 清理系统临时文件
+   rm -rf /tmp/*
+   
+   # 清理应用缓存
+   rm -rf /path/to/sanguo_vnpy/cache/*
+   
+   # 清理旧日志（保留最近7天）
+   find /path/to/sanguo_vnpy/logs -name "*.log" -mtime +7 -delete
+   ```
+
+2. **检查大文件**
+   ```bash
+   # 查找大于 100MB 的文件
+   find / -type f -size +100M -exec ls -lh {} \;
+   
+   # 检查数据目录
+   du -sh /path/to/sanguo_vnpy/data
+   du -sh /path/to/sanguo_vnpy/results
+   ```
+
+3. **扩容磁盘（如果需要）**
+   - 阿里云控制台 → 云盘 → 扩容
+   - 或挂载新的数据盘
+
+4. **验证恢复**
+   ```bash
+   df -h
+   # 确认使用率下降到安全范围（< 80%）
+   ```
+
+---
+
+### 场景 4: 数据库连接失败（P1）
+
+#### 现象
+- 应用报错无法连接数据库
+- 策略无法获取数据
+- 回测无法执行
+
+#### 应急处理
+1. **检查数据库状态**
+   ```bash
+   # 如果使用 RDS
+   # 阿里云控制台检查 RDS 状态
+   
+   # 如果使用本地 SQLite
+   ls -lh /path/to/sanguo_vnpy/data/*.db
+   # 检查文件权限和完整性
+   ```
+
+2. **网络连接测试**
+   ```bash
+   # 测试数据库连接
+   telnet <db-host> <db-port>
+   nc -zv <db-host> <db-port>
+   
+   # 检查安全组
+   # 确认应用服务器 IP 在数据库白名单中
+   ```
+
+3. **快速恢复方案**
+   ```bash
+   # 方案 A: 切换到本地缓存数据
+   # 修改配置使用 akshare 直接获取
+   
+   # 方案 B: 从备份恢复数据库
+   # 恢复最近的数据库备份
+   
+   # 方案 C: 重启数据库服务
+   # 如果是自建数据库，重启服务
+   ```
+
+---
+
+## 📋 应急响应检查清单
+
+### 响应前检查
+- [ ] 确认告警级别和影响范围
+- [ ] 通知相关人员（诸葛亮/主公）
+- [ ] 保存当前系统状态和日志
+- [ ] 准备回滚方案
+
+### 响应中检查
+- [ ] 执行诊断步骤，确定根因
+- [ ] 执行最小必要的修复操作
+- [ ] 持续监控系统状态
+- [ ] 记录所有操作和时间点
+
+### 响应后检查
+- [ ] 验证核心功能恢复正常
+- [ ] 验证监控数据正常
+- [ ] 验证业务数据完整性
+- [ ] 总结故障原因和改进措施
+
+---
+
+## 📞 联络清单
+
+### 紧急联络
+- **总军师（诸葛亮）**：负责决策和协调
+- **主公**：最终决策和资源协调
+- **赵云**：数据库和数据相关问题
+- **关羽**：回测引擎和风控问题
+- **张飞**：API 兼容层问题
+- **司马懿**：安全和合规问题
+
+### 阿里云支持
+- **阿里云控制台**：https://home.console.aliyun.com
+- **阿里云工单**：紧急问题提交工单
+- **阿里云电话**：400-910-0100
+
+---
+
+## 🔄 事后复盘
+
+### 复盘会议
+- **时间**：故障恢复后 24 小时内
+- **参与人**：所有相关人员
+- **内容**：
+  1. 故障回顾和时间线
+  2. 根因分析
+  3. 响应流程评估
+  4. 改进措施讨论
+
+### 改进措施
+- 技术改进：防止同类故障再次发生
+- 流程改进：优化响应流程
+- 监控改进：完善告警和监控
+- 文档改进：更新应急方案
+
+---
+
+**本应急方案会持续更新，确保生产环境安全稳定运行！** 🚛
@@ -0,0 +1,211 @@
+#!/usr/bin/env bash
+# =============================================================================
+# sanguo_vnpy 阿里云生产环境监控系统部署脚本
+# 版本: v1.0
+# 作者: 姜维（后勤总督）
+# =============================================================================
+
+set -e
+
+echo "========================================================================"
+echo "🚀 sanguo_vnpy 监控系统部署"
+echo "========================================================================"
+
+# 颜色定义
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m'
+
+print_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+print_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# 步骤 1: 安装系统依赖
+print_info "步骤 1: 安装系统依赖..."
+
+if command -v apt-get &> /dev/null; then
+    sudo apt-get update
+    sudo apt-get install -y prometheus node_exporter grafana nginx
+elif command -v yum &> /dev/null; then
+    sudo yum install -y epel-release
+    sudo yum install -y prometheus node_exporter grafana nginx
+else
+    print_error "不支持的操作系统"
+    exit 1
+fi
+
+print_success "系统依赖安装完成"
+
+# 步骤 2: 配置 Prometheus
+print_info "步骤 2: 配置 Prometheus..."
+
+sudo mkdir -p /etc/prometheus
+sudo cat > /etc/prometheus/prometheus.yml << 'EOF'
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+
+  - job_name: 'node_exporter'
+    static_configs:
+      - targets: ['localhost:9100']
+
+  - job_name: 'sanguo_vnpy'
+    static_configs:
+      - targets: ['localhost:8080']
+    metrics_path: '/metrics'
+
+alerting:
+  alertmanagers:
+    - static_configs:
+        - targets: ['localhost:9093']
+
+rule_files:
+  - "/etc/prometheus/alerts.yml"
+EOF
+
+# 步骤 3: 配置告警规则
+print_info "步骤 3: 配置告警规则..."
+
+sudo cat > /etc/prometheus/alerts.yml << 'EOF'
+groups:
+  - name: sanguo_vnpy_alerts
+    rules:
+      - alert: InstanceDown
+        expr: up == 0
+        for: 5m
+        labels:
+          severity: critical
+        annotations:
+          summary: "实例 {{ $labels.instance }} 已宕机"
+          description: "{{ $labels.job }} 实例 {{ $labels.instance }} 已宕机超过 5 分钟"
+
+      - alert: HighCPUUsage
+        expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "实例 {{ $labels.instance }} CPU 使用率过高"
+          description: "{{ $labels.instance }} CPU 使用率超过 80%"
+
+      - alert: HighMemoryUsage
+        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 80
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "实例 {{ $labels.instance }} 内存使用率过高"
+          description: "{{ $labels.instance }} 内存使用率超过 80%"
+
+      - alert: DiskSpaceLow
+        expr: (1 - (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"})) * 100 > 90
+        for: 5m
+        labels:
+          severity: critical
+        annotations:
+          summary: "实例 {{ $labels.instance }} 磁盘空间不足"
+          description: "{{ $labels.instance }} 磁盘使用率超过 90%"
+
+      - alert: VnpyDown
+        expr: up{job="sanguo_vnpy"} == 0
+        for: 2m
+        labels:
+          severity: critical
+        annotations:
+          summary: "sanguo_vnpy 服务已宕机"
+          description: "sanguo_vnpy 服务已宕机超过 2 分钟，请立即检查！"
+EOF
+
+# 步骤 4: 配置 Grafana
+print_info "步骤 4: 配置 Grafana..."
+
+sudo mkdir -p /etc/grafana/provisioning/datasources
+sudo cat > /etc/grafana/provisioning/datasources/prometheus.yml << 'EOF'
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://localhost:9090
+    isDefault: true
+    editable: true
+EOF
+
+# 步骤 5: 启动服务
+print_info "步骤 5: 启动监控服务..."
+
+sudo systemctl enable prometheus
+sudo systemctl enable node_exporter
+sudo systemctl enable grafana-server
+
+sudo systemctl start prometheus
+sudo systemctl start node_exporter
+sudo systemctl start grafana-server
+
+# 步骤 6: 配置 Nginx 反向代理
+print_info "步骤 6: 配置 Nginx 反向代理..."
+
+sudo cat > /etc/nginx/sites-available/sanguo-monitoring << 'EOF'
+server {
+    listen 80;
+    server_name _;
+
+    location /grafana/ {
+        proxy_pass http://localhost:3000/;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+    }
+
+    location /prometheus/ {
+        proxy_pass http://localhost:9090/;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+    }
+
+    location /node-exporter/ {
+        proxy_pass http://localhost:9100/;
+        proxy_set_header Host $host;
+    }
+}
+EOF
+
+sudo ln -sf /etc/nginx/sites-available/sanguo-monitoring /etc/nginx/sites-enabled/
+sudo nginx -t
+sudo systemctl reload nginx
+
+# 步骤 7: 显示访问信息
+print_success "监控系统部署完成！"
+
+echo ""
+echo "========================================================================"
+echo "📊 监控系统访问信息"
+echo "========================================================================"
+echo "Grafana:      http://$(hostname -I | awk '{print $1}'):3000"
+echo "Prometheus:   http://$(hostname -I | awk '{print $1}'):9090"
+echo "Node Exporter: http://$(hostname -I | awk '{print $1}'):9100"
+echo ""
+echo "默认 Grafana 账号: admin / admin"
+echo "========================================================================"
+echo ""
@@ -0,0 +1,166 @@
+"""
+高频算法性能基准测试
+测试不同实现方式的性能
+"""
+
+import time
+import numpy as np
+import pandas as pd
+from datetime import datetime
+from high_frequency_signal import (
+    TechnicalFactorCalculator,
+    PerformanceMonitor,
+    BarData,
+)
+
+
+def generate_test_data(n: int = 10000) -> pd.DataFrame:
+    """生成随机测试数据"""
+    np.random.seed(42)
+    
+    # 生成随机游走价格
+    returns = np.random.normal(0.0001, 0.01, n)
+    price = 100 * np.exp(np.cumsum(returns))
+    
+    # 生成OHLCV
+    high = price * (1 + np.random.uniform(0, 0.01, n))
+    low = price * (1 - np.random.uniform(0, 0.01, n))
+    open_ = price * (1 + np.random.normal(0, 0.005, n))
+    volume = np.random.randint(100000, 10000000, n)
+    
+    df = pd.DataFrame({
+        'open': open_,
+        'high': high,
+        'low': low,
+        'close': price,
+        'volume': volume,
+    })
+    
+    return df
+
+
+def benchmark_technical_calculator():
+    """测试技术因子计算器性能"""
+    print("=" * 60)
+    print("技术因子计算器性能基准测试")
+    print("=" * 60)
+    
+    calc = TechnicalFactorCalculator()
+    monitor = PerformanceMonitor()
+    
+    # 测试不同数据大小
+    for size in [100, 1000, 10000]:
+        df = generate_test_data(size)
+        close = df['close'].values
+        high = df['high'].values
+        low = df['low'].values
+        volume = df['volume'].values
+        
+        n_runs = 1000 if size <= 1000 else 100
+        
+        # 测试MA
+        start_time = time.time()
+        for _ in range(n_runs):
+            _ = calc.calculate_ma(close, 20)
+        elapsed = (time.time() - start_time) * 1000
+        print(f"MA {size} points x{n_runs} runs: {elapsed:.2f} ms "
+              f"({elapsed/n_runs:.3f} ms/run)")
+        
+        # 测试MACD
+        start_time = time.time()
+        for _ in range(n_runs):
+            _ = calc.calculate_macd(close, 12, 26, 9)
+        elapsed = (time.time() - start_time) * 1000
+        print(f"MACD {size} points x{n_runs} runs: {elapsed:.2f} ms "
+              f"({elapsed/n_runs:.3f} ms/run)")
+        
+        # 测试RSI
+        start_time = time.time()
+        for _ in range(n_runs):
+            _ = calc.calculate_rsi(close, 14)
+        elapsed = (time.time() - start_time) * 1000
+        print(f"RSI {size} points x{n_runs} runs: {elapsed:.2f} ms "
+              f"({elapsed/n_runs:.3f} ms/run)")
+        
+        # 测试布林带
+        start_time = time.time()
+        for _ in range(n_runs):
+            _ = calc.calculate_bollinger_bands(close, 20, 2.0)
+        elapsed = (time.time() - start_time) * 1000
+        print(f"Bollinger {size} points x{n_runs} runs: {elapsed:.2f} ms "
+              f"({elapsed/n_runs:.3f} ms/run)")
+        
+        # 测试ATR
+        start_time = time.time()
+        for _ in range(n_runs):
+            _ = calc.calculate_atr(high, low, close, 14)
+        elapsed = (time.time() - start_time) * 1000
+        print(f"ATR {size} points x{n_runs} runs: {elapsed:.2f} ms "
+              f"({elapsed/n_runs:.3f} ms/run)")
+        
+        print()
+
+
+def benchmark_full_pipeline():
+    """测试完整信号生成流水线"""
+    print("=" * 60)
+    print("完整信号生成流水线性能基准测试")
+    print("=" * 60)
+    
+    from high_frequency_signal import HighFrequencySignalGenerator
+    
+    calc = TechnicalFactorCalculator()
+    generator = HighFrequencySignalGenerator()
+    monitor = PerformanceMonitor()
+    
+    # 生成测试数据
+    n_days = 252 * 5
+    df = generate_test_data(n_days)
+    
+    start_time = time.time()
+    
+    # 模拟逐日处理
+    signals = []
+    for i in range(20, len(df)):
+        data = df.iloc[:i+1]
+        bar = BarData(
+            code="TEST",
+            datetime=datetime.now(),
+            open=data['open'].iloc[-1],
+            high=data['high'].iloc[-1],
+            low=data['low'].iloc[-1],
+            close=data['close'].iloc[-1],
+            volume=data['volume'].iloc[-1],
+            amount=data['volume'].iloc[-1] * data['close'].iloc[-1]
+        )
+        monitor.on_process_start()
+        signal = generator.generate_signal_from_bar(bar, data)
+        monitor.on_process_end()
+        if signal:
+            signals.append(signal)
+    
+    elapsed = (time.time() - start_time) * 1000
+    print(f"完整回测 {n_days} 根K线，生成 {len(signals)} 个信号")
+    print(f"总耗时: {elapsed:.2f} ms")
+    print(f"平均每根K线: {elapsed/n_days:.3f} ms")
+    print()
+    
+    monitor.print_stats()
+    
+    stats = generator.get_performance_stats()
+    print(f"\n信号统计: {stats}")
+
+
+def main():
+    """主函数"""
+    benchmark_technical_calculator()
+    print()
+    benchmark_full_pipeline()
+    print()
+    print("=" * 60)
+    print("基准测试完成")
+    print("=" * 60)
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,366 @@
+"""
+高频交易信号生成器
+支持tick级和分钟级数据处理
+低延迟信号生成
+支持向量化计算
+"""
+
+from typing import List, Dict, Tuple, Optional, Union
+import numpy as np
+import pandas as pd
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TickData:
+    """tick数据结构"""
+    code: str
+    datetime: datetime
+    price: float
+    volume: int
+    amount: float
+    direction: int = 0  # 1买-1卖0平
+    
+
+@dataclass
+class BarData:
+    """K线数据结构"""
+    code: str
+    datetime: datetime
+    open: float
+    high: float
+    low: float
+    close: float
+    volume: float
+    amount: float
+
+
+@dataclass
+class Signal:
+    """交易信号结构"""
+    code: str
+    datetime: datetime
+    direction: int  # 1多-1空0无
+    strength: float  # 信号强度 0-1
+    price: float  # 信号价格
+    expected_price: float  # 预期目标价
+
+
+class HighFrequencyDataPipeline:
+    """高频数据流处理管道"""
+    
+    def __init__(self, window_size: int = 100):
+        self.window_size = window_size
+        self._buffer: Dict[str, List[TickData]] = {}
+        self._last_bar: Dict[str, Optional[BarData]] = {}
+        self._tick_count = 0
+        
+    def on_tick(self, tick: TickData) -> Optional[Signal]:
+        """处理新tick，可能生成信号"""
+        if tick.code not in self._buffer:
+            self._buffer[tick.code] = []
+        
+        buffer = self._buffer[tick.code]
+        buffer.append(tick)
+        
+        # 保持窗口大小
+        if len(buffer) > self.window_size:
+            buffer.pop(0)
+            
+        self._tick_count += 1
+        
+        # 这里可以触发滚动计算
+        return None
+    
+    def get_tick_buffer(self, code: str) -> List[TickData]:
+        """获取tick缓冲"""
+        return self._buffer.get(code, [])
+    
+    def statistics(self) -> Dict:
+        """获取统计信息"""
+        return {
+            'total_ticks': self._tick_count,
+            'total_codes': len(self._buffer),
+            'buffer_size': sum(len(b) for b in self._buffer.values())
+        }
+
+
+class TechnicalFactorCalculator:
+    """技术因子计算器 - 向量化实现"""
+    
+    def __init__(self):
+        self.cache: Dict[str, pd.DataFrame] = {}
+        
+    def calculate_ma(self, prices: np.ndarray, period: int) -> np.ndarray:
+        """计算移动平均线"""
+        return pd.Series(prices).rolling(period).mean().values
+    
+    def calculate_ema(self, prices: np.ndarray, period: int) -> np.ndarray:
+        """计算指数移动平均线"""
+        return pd.Series(prices).ewm(span=period, adjust=False).mean().values
+    
+    def calculate_macd(self, prices: np.ndarray, 
+                     fast_period: int = 12, 
+                     slow_period: int = 26, 
+                     signal_period: int = 9) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """计算MACD"""
+        ema_fast = self.calculate_ema(prices, fast_period)
+        ema_slow = self.calculate_ema(prices, slow_period)
+        dif = ema_fast - ema_slow
+        dea = self.calculate_ema(dif, signal_period)
+        macd = 2 * (dif - dea)
+        return dif, dea, macd
+    
+    def calculate_bollinger_bands(self, prices: np.ndarray, 
+                                period: int = 20, 
+                                num_std: float = 2.0) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """计算布林带"""
+        middle = pd.Series(prices).rolling(period).mean().values
+        std = pd.Series(prices).rolling(period).std().values
+        upper = middle + num_std * std
+        lower = middle - num_std * std
+        return upper, middle, lower
+    
+    def calculate_rsi(self, prices: np.ndarray, period: int = 14) -> np.ndarray:
+        """计算RSI相对强弱指数"""
+        delta = np.diff(prices)
+        gain = np.where(delta > 0, delta, 0)
+        loss = np.where(delta < 0, -delta, 0)
+        
+        avg_gain = np.zeros_like(prices)
+        avg_loss = np.zeros_like(prices)
+        
+        # 第一个period没有数据
+        avg_gain[period] = np.mean(gain[:period])
+        avg_loss[period] = np.mean(loss[:period])
+        
+        # Wilder's smoothing
+        for i in range(period + 1, len(prices)):
+            avg_gain[i] = (avg_gain[i-1] * (period - 1) + gain[i-1]) / period
+            avg_loss[i] = (avg_loss[i-1] * (period - 1) + loss[i-1]) / period
+        
+        rs = avg_gain / (avg_loss + 1e-10)
+        rsi = 100 - (100 / (1 + rs))
+        return rsi
+    
+    def calculate_atr(self, high: np.ndarray, low: np.ndarray, 
+                     close: np.ndarray, period: int = 14) -> np.ndarray:
+        """计算ATR平均真实波幅"""
+        tr = np.zeros_like(high)
+        tr[0] = high[0] - low[0]
+        
+        for i in range(1, len(high)):
+            tr1 = high[i] - low[i]
+            tr2 = abs(high[i] - close[i-1])
+            tr3 = abs(low[i] - close[i-1])
+            tr[i] = max(tr1, tr2, tr3)
+        
+        atr = pd.Series(tr).rolling(period).mean().values
+        return atr
+    
+    def calculate_volume_ma(self, volumes: np.ndarray, period: int) -> np.ndarray:
+        """计算成交量移动平均"""
+        return pd.Series(volumes).rolling(period).mean().values
+    
+    def calculate_obv(self, close: np.ndarray, volume: np.ndarray) -> np.ndarray:
+        """计算OBV能量潮"""
+        obv = np.zeros_like(close)
+        obv[0] = volume[0]
+        
+        for i in range(1, len(close)):
+            if close[i] > close[i-1]:
+                obv[i] = obv[i-1] + volume[i]
+            elif close[i] < close[i-1]:
+                obv[i] = obv[i-1] - volume[i]
+            else:
+                obv[i] = obv[i-1]
+        
+        return obv
+
+
+class HighFrequencySignalGenerator:
+    """高频交易信号生成器
+    支持多种技术因子组合生成交易信号
+    """
+    
+    def __init__(self):
+        self.factor_calculator = TechnicalFactorCalculator()
+        self.data_pipeline = HighFrequencyDataPipeline()
+        self.signal_history: List[Signal] = []
+        self.performance_stats = {
+            'total_signals': 0,
+            'long_signals': 0,
+            'short_signals': 0,
+        }
+        
+    def generate_signal_from_bar(self, bar: BarData, 
+                               data: pd.DataFrame) -> Optional[Signal]:
+        """从K线数据生成信号"""
+        # 计算各种技术因子
+        close = data['close'].values
+        high = data['high'].values
+        low = data['low'].values
+        volume = data['volume'].values
+        
+        # MACD
+        dif, dea, macd = self.factor_calculator.calculate_macd(close)
+        
+        # RSI
+        rsi = self.factor_calculator.calculate_rsi(close)
+        
+        # 布林带
+        bb_upper, bb_mid, bb_lower = self.factor_calculator.calculate_bollinger_bands(close)
+        
+        # ATR
+        atr = self.factor_calculator.calculate_atr(high, low, close, 14)
+        
+        # 生成信号逻辑
+        current_rsi = rsi[-1]
+        current_macd = macd[-1]
+        current_dif = dif[-1]
+        current_dea = dea[-1]
+        current_price = close[-1]
+        
+        # MACD金叉买入
+        if current_dif > current_dea and (dif[-2] <= dea[-2]) and current_rsi < 70:
+            signal = Signal(
+                code=bar.code,
+                datetime=bar.datetime,
+                direction=1,
+                strength=min((70 - current_rsi) / 70 + (current_dif - current_dea) / 1, 1.0),
+                price=current_price,
+                expected_price=current_price + 2 * atr[-1]
+            )
+            self._record_signal(signal)
+            return signal
+        
+        # MACD死叉卖出
+        elif current_dif < current_dea and (dif[-2] >= dea[-2]) and current_rsi > 30:
+            signal = Signal(
+                code=bar.code,
+                datetime=bar.datetime,
+                direction=-1,
+                strength=min((current_rsi - 30) / 30 + (current_dea - current_dif) / 1, 1.0),
+                price=current_price,
+                expected_price=current_price - 2 * atr[-1]
+            )
+            self._record_signal(signal)
+            return signal
+        
+        # 布林带突破
+        elif current_price < bb_lower[-1] and current_rsi < 30:
+            signal = Signal(
+                code=bar.code,
+                datetime=bar.datetime,
+                direction=1,
+                strength=(bb_lower[-1] - current_price) / (bb_lower[-1] * 0.05),
+                price=current_price,
+                expected_price=bb_mid[-1]
+            )
+            self._record_signal(signal)
+            return signal
+        
+        elif current_price > bb_upper[-1] and current_rsi > 70:
+            signal = Signal(
+                code=bar.code,
+                datetime=bar.datetime,
+                direction=-1,
+                strength=(current_price - bb_upper[-1]) / (bb_upper[-1] * 0.05),
+                price=current_price,
+                expected_price=bb_mid[-1]
+            )
+            self._record_signal(signal)
+            return signal
+        
+        return None
+    
+    def _record_signal(self, signal: Signal):
+        """记录信号"""
+        self.signal_history.append(signal)
+        self.performance_stats['total_signals'] += 1
+        if signal.direction == 1:
+            self.performance_stats['long_signals'] += 1
+        elif signal.direction == -1:
+            self.performance_stats['short_signals'] += 1
+    
+    def get_performance_stats(self) -> Dict:
+        """获取性能统计"""
+        return self.performance_stats.copy()
+
+
+class PerformanceMonitor:
+    """性能监控器 - 监控高频算法性能"""
+    
+    def __init__(self, window_size: int = 1000):
+        self.latencies: List[float] = []
+        self.window_size = window_size
+        self.processed_ticks = 0
+        self.start_time = datetime.now()
+        
+    def on_process_start(self):
+        """开始处理计时"""
+        self._start = datetime.now()
+        
+    def on_process_end(self):
+        """结束处理计时"""
+        end = datetime.now()
+        latency = (end - self._start).total_seconds() * 1000  # ms
+        self.latencies.append(latency)
+        self.processed_ticks += 1
+        
+        # 保持窗口大小
+        if len(self.latencies) > self.window_size:
+            self.latencies.pop(0)
+    
+    def get_stats(self) -> Dict:
+        """获取性能统计"""
+        if not self.latencies:
+            return {
+                'latency_ms_avg': 0,
+                'latency_ms_p50': 0,
+                'latency_ms_p99': 0,
+                'total_processed': 0,
+                'tps': 0
+            }
+        
+        latencies_np = np.array(self.latencies)
+        elapsed = (datetime.now() - self.start_time).total_seconds()
+        
+        return {
+            'latency_ms_avg': float(latencies_np.mean()),
+            'latency_ms_p50': float(np.percentile(latencies_np, 50)),
+            'latency_ms_p99': float(np.percentile(latencies_np, 99)),
+            'latency_ms_max': float(latencies_np.max()),
+            'total_processed': self.processed_ticks,
+            'tps': self.processed_ticks / elapsed if elapsed > 0 else 0
+        }
+    
+    def print_stats(self):
+        """打印性能统计"""
+        stats = self.get_stats()
+        print("=" * 60)
+        print("高频算法性能监控")
+        print("=" * 60)
+        print(f"平均延迟: {stats['latency_ms_avg']:.2f} ms")
+        print(f"P50延迟: {stats['latency_ms_p50']:.2f} ms")
+        print(f"P99延迟: {stats['latency_ms_p99']:.2f} ms")
+        print(f"最大延迟: {stats['latency_ms_max']:.2f} ms")
+        print(f"总处理tick数: {stats['total_processed']}")
+        print(f"每秒处理: {stats['tps']:.2f} ticks")
+        print("=" * 60)
+
+
+# 导出
+__all__ = [
+    'TickData',
+    'BarData',
+    'Signal',
+    'HighFrequencyDataPipeline',
+    'TechnicalFactorCalculator',
+    'HighFrequencySignalGenerator',
+    'PerformanceMonitor',
+]