From 73454c07877cce78874a556567e15fc9e2c48f9e Mon Sep 17 00:00:00 2001 From: cfdaily Date: Fri, 12 Jun 2026 13:45:31 +0800 Subject: [PATCH] refactor(auto-deploy): YAML config + post_deploy list + deploy failure mail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New config/deploy-targets.yaml: centralized deploy target config - Rewrite auto-deploy in _handle_pr_closed to use YAML config - Add _send_deploy_failure_mail helper (reuses deploy_failure template) - Support post_deploy command list (not just pm2 restart) - Docs-only changes skip post_deploy - Add pyyaml to pyproject.toml dependencies - Update design doc §23 with new architecture --- config/deploy-targets.yaml | 16 +++++ docs/design/23-toolchain-pr-lifecycle.md | 40 ++++++++---- pyproject.toml | 3 + src/api/toolchain_routes.py | 83 +++++++++++++++++------- 4 files changed, 104 insertions(+), 38 deletions(-) create mode 100644 config/deploy-targets.yaml diff --git a/config/deploy-targets.yaml b/config/deploy-targets.yaml new file mode 100644 index 0000000..f307116 --- /dev/null +++ b/config/deploy-targets.yaml @@ -0,0 +1,16 @@ +# 部署目标配置 — PR 合并后自动部署(集中管理) +# 平台级能力,所有仓库的部署配置统一在这里维护 + +targets: + sanguo/sanguo_moziplus_v2: + dev_dir: ~/.openclaw/sanguo_projects/sanguo_moziplus_v2 + install_dir: ~/.sanguo_projects/sanguo_moziplus_v2 + pm2_name: sanguo-moziplus-v2 + rsync_exclude: + - .git/ + - node_modules/ + - __pycache__/ + - data/ + health_check: http://localhost:8083/health + post_deploy: + - pm2 restart sanguo-moziplus-v2 diff --git a/docs/design/23-toolchain-pr-lifecycle.md b/docs/design/23-toolchain-pr-lifecycle.md index bcae021..913bbba 100644 --- a/docs/design/23-toolchain-pr-lifecycle.md +++ b/docs/design/23-toolchain-pr-lifecycle.md @@ -141,21 +141,35 @@ async def _handle_pull_request(payload: Dict[str, Any]) -> None: **触发**:`_handle_pr_closed` 合并事件处理完成后 **逻辑**: -1. 仓库白名单检查(仅 `sanguo/sanguo_moziplus_v2`) -2. `git pull origin main`(开发目录 `~/.openclaw/sanguo_projects/sanguo_moziplus_v2/`) -3. `rsync` 同步到安装目录(排除 `.git`/`node_modules`/`__pycache__`) -4. 获取 PR 变更文件列表(复用 `_fetch_pr_files`) -5. 判断是否需要重启:文件路径包含 `src/`、`templates/`、`frontend/` 或 `*.py` 后缀 → 重启 -6. 纯 `docs/` 变更 → 只 pull + rsync 不重启 -7. rsync 或 pm2 restart 失败 → 通知 `jiangwei-infra` -8. 部署失败仅 log + Mail 通知,不影响合并通知 +1. 读取 `config/deploy-targets.yaml`,查找 `repo` 对应的部署目标 +2. 不在配置中 → 跳过(未来新项目加一条配置即可) +3. `git pull origin main`(开发目录) +4. `rsync` 同步到安装目录(排除项由配置指定) +5. 判断是否需要执行 post_deploy:文件路径包含 `src/`、`templates/`、`frontend/` 或 `*.py` 后缀 +6. 纯 `docs/` 变更 → 只 pull + rsync,不执行 post_deploy +7. 部署失败复用 `deploy_failure.md` 模板通知 `jiangwei-infra` + `pangtong-fujunshi` + +**配置文件**:`config/deploy-targets.yaml`(集中管理所有仓库的部署目标) + +```yaml +targets: + sanguo/sanguo_moziplus_v2: + dev_dir: ~/.openclaw/sanguo_projects/sanguo_moziplus_v2 + install_dir: ~/.sanguo_projects/sanguo_moziplus_v2 + pm2_name: sanguo-moziplus-v2 + rsync_exclude: [.git/, node_modules/, __pycache__/, data/] + health_check: http://localhost:8083/health + post_deploy: [pm2 restart sanguo-moziplus-v2] +``` **设计决策**: -- **git pull 在开发目录**(有 `.git`),rsync 到安装目录:安装目录无 `.git`,直接 git pull 必然失败 -- **全异步**:所有子进程调用使用 `asyncio.create_subprocess_exec`,不阻塞 event loop -- **仓库白名单**:只对 `sanguo/sanguo_moziplus_v2` 触发自动部署,其他仓库忽略 -- **部署失败通知**:rsync 或 pm2 restart 失败时发 Mail 给 `jiangwei-infra`(S1) -- 不做优雅等待(sentinel file 方案):daemon 正在执行任务时重启,已 spawn 的子进程独立运行不受影响,最坏情况是当前 tick 中断、下一轮 PM2 拉起后继续 +- **集中式 YAML 配置**(姜维建议):部署是平台级能力,非仓库级。一个文件管所有仓库,新增项目零代码改动 +- **YAML 而非 JSON**:支持注释,方便临时禁用某个仓库或排除项 +- **post_deploy 列表**:支持任意 shell 命令,不只是 pm2 restart。未来可扩展(如 pip install -e .) +- **health_check 字段**:预留,后续可用于部署后健康检查 +- **失败通知复用**:CD 失败和 CI 失败用同一套通知机制(deploy_failure.md 模板 + _send_mail) +- git pull 在开发目录(有 `.git`),rsync 到安装目录 +- 全异步调用(asyncio.create_subprocess_exec) ### 不做的事 diff --git a/pyproject.toml b/pyproject.toml index 34e4063..889b6a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,9 @@ name = "sanguo-moziplus-v2" version = "3.0.0" description = "AI Native DevOps Platform v2 - Blackboard Architecture" requires-python = ">=3.9" +dependencies = [ + "pyyaml", +] [tool.pytest.ini_options] asyncio_mode = "auto" diff --git a/src/api/toolchain_routes.py b/src/api/toolchain_routes.py index dc2bc1b..1c266c8 100644 --- a/src/api/toolchain_routes.py +++ b/src/api/toolchain_routes.py @@ -450,6 +450,18 @@ async def _handle_pr_synchronize(payload: Dict[str, Any]) -> None: _send_mail(reviewer, title, text) +def _send_deploy_failure_mail(repo: str, pr_number: int, pr_title: str, reason: str) -> None: + """CD 部署失败通知,复用 deploy_failure 模板""" + text = render_template("deploy_failure", { + "repo": repo, + "commit_sha": f"PR #{pr_number}", + }) + title = f"部署失败: {repo} (auto-deploy, PR #{pr_number})" + full_text = f"{text}\n\n失败原因: {reason}" + for agent_id in ("jiangwei-infra", "pangtong-fujunshi"): + _send_mail(agent_id, title, full_text) + + async def _handle_pr_closed(payload: Dict[str, Any]) -> None: """PR closed → 如果 merged,通知 PR 作者。""" pr = payload.get("pull_request") @@ -481,14 +493,26 @@ async def _handle_pr_closed(payload: Dict[str, Any]) -> None: title = f"PR 已合并: {pr_title} ({repo}#{pr_number})" _send_mail(pr_author, title, text) - # 自动部署:git pull + rsync + 按需 pm2 restart(仅 sanguo/sanguo_moziplus_v2) + # 自动部署:git pull + rsync + 按需 post_deploy try: - if repo != "sanguo/sanguo_moziplus_v2": + import yaml + + # 加载部署配置 + config_path = Path(__file__).parent.parent.parent / "config" / "deploy-targets.yaml" + if not config_path.exists(): return - dev_dir = os.path.expanduser("~/.openclaw/sanguo_projects/sanguo_moziplus_v2") - install_dir = os.environ.get("SANGUO_PROJECTS_DIR", os.path.expanduser("~/.sanguo_projects")) - install_repo_dir = os.path.join(install_dir, "sanguo_moziplus_v2") + with open(config_path, "r", encoding="utf-8") as f: + deploy_config = yaml.safe_load(f) or {} + + targets = deploy_config.get("targets", {}) + target = targets.get(repo) + if not target: + return # 该仓库不在部署配置中,跳过 + + dev_dir = os.path.expanduser(target["dev_dir"]) + install_dir = os.path.expanduser(target.get("install_dir", target["dev_dir"])) + rsync_excludes = target.get("rsync_exclude", []) # Step 1: git pull in dev dir proc = await asyncio.create_subprocess_exec( @@ -500,15 +524,19 @@ async def _handle_pr_closed(payload: Dict[str, Any]) -> None: stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30) if proc.returncode != 0: - logger.warning("Auto-deploy: git pull failed: %s", stderr.decode()) + logger.warning("Auto-deploy: git pull failed for %s: %s", repo, stderr.decode()) return logger.info("Auto-deploy: git pull success for %s", repo) # Step 2: rsync to install dir + rsync_args = ["rsync", "-a"] + for exc in rsync_excludes: + rsync_args.extend(["--exclude", exc]) + rsync_args.extend([f"{dev_dir}/", f"{install_dir}/"]) + rsync_proc = await asyncio.create_subprocess_exec( - "rsync", "-a", "--exclude=.git", "--exclude=node_modules", "--exclude=__pycache__", - f"{dev_dir}/", f"{install_repo_dir}/", + *rsync_args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) @@ -516,35 +544,40 @@ async def _handle_pr_closed(payload: Dict[str, Any]) -> None: if rsync_proc.returncode != 0: logger.error("Auto-deploy: rsync failed: %s", rsync_err.decode()) - _send_mail("jiangwei-infra", f"[Auto-Deploy] rsync 失败 ({repo}#{pr_number})", - f"PR {pr_title} 合并后自动部署 rsync 失败。\n\nstderr: {rsync_err.decode()}") + _send_deploy_failure_mail(repo, pr_number, pr_title, f"rsync 失败: {rsync_err.decode()}") return - # Step 3: 判断是否需要重启 + # Step 3: 判断是否需要执行 post_deploy files = await _fetch_pr_files(repo, pr_number) + file_list = files[0] needs_restart = any( f.startswith("src/") or f.startswith("templates/") or f.startswith("frontend/") or f.endswith(".py") - for f in files[0] + for f in file_list ) if needs_restart: - restart_proc = await asyncio.create_subprocess_exec( - "pm2", "restart", "sanguo-moziplus-v2", - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - _, restart_err = await asyncio.wait_for(restart_proc.communicate(), timeout=15) + post_deploy_cmds = target.get("post_deploy", []) + for cmd in post_deploy_cmds: + logger.info("Auto-deploy: executing post_deploy: %s", cmd) + deploy_proc = await asyncio.create_subprocess_exec( + "sh", "-c", cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _, deploy_err = await asyncio.wait_for(deploy_proc.communicate(), timeout=30) - if restart_proc.returncode == 0: - logger.info("Auto-deploy: pm2 restart triggered (files: %s)", ", ".join(files[0][:5])) + if deploy_proc.returncode != 0: + logger.error("Auto-deploy: post_deploy failed: %s", deploy_err.decode()) + _send_deploy_failure_mail(repo, pr_number, pr_title, f"post_deploy 失败 ({cmd}): {deploy_err.decode()}") + break else: - logger.error("Auto-deploy: pm2 restart failed: %s", restart_err.decode()) - _send_mail("jiangwei-infra", f"[Auto-Deploy] pm2 restart 失败 ({repo}#{pr_number})", - f"PR {pr_title} 合并后 pm2 restart 失败。\n\nstderr: {restart_err.decode()}") + logger.info("Auto-deploy: all post_deploy commands succeeded (files: %s)", ", ".join(file_list[:5])) else: - logger.info("Auto-deploy: docs-only change, skip restart") + logger.info("Auto-deploy: docs-only change for %s, skip post_deploy", repo) + except asyncio.TimeoutError: - logger.error("Auto-deploy: timeout") + logger.error("Auto-deploy: timeout for %s", repo) + _send_deploy_failure_mail(repo, pr_number, pr_title, "部署超时") except Exception as e: logger.error("Auto-deploy: unexpected error: %s", e)