From c89863a2889d4604dac0b634bf2d116bb386f5e2 Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sat, 13 Jun 2026 23:36:44 +0800 Subject: [PATCH 01/10] =?UTF-8?q?feat:=20=C2=A717=20ToolchainHandler=20?= =?UTF-8?q?=E5=BC=BA=E7=BA=A6=E6=9D=9F=E5=AE=9E=E7=8E=B0=EF=BC=88Step=201-?= =?UTF-8?q?4=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 1: 基础设施 - prompt_composer.py: PromptContext 新增 action_type + action_steps 字段 - spawner.py: handler 路径提取 action_type/action_steps 传入 PromptContext - db.py: comments CHECK 约束加入 action_report Step 2: ToolchainHandler 强化 - ToolchainContextSection: 加 steps 渲染 + action_hint(按 action_type) - ToolchainApiSection: 改为 action_report 提交指引 + Gitea 协作指引 - ToolchainConstraintsSection: 5 条强约束 + Red Flags 防self-rationalization - verify_completion: action_report → output → comment 三层 fallback - review_merged 始终通过(纯通知) - infrastructure_failure 始终通过(防递归) - 修复 LENGTH(content) → LENGTH(body) bug - on_failure 三分路: 业务→Gitea PR comment / 系统→Gitea Issue / 基础设施→toolchain task Step 3: toolchain_routes 改造 - 新增 _toolchain_db_path() + _send_toolchain_task() - 所有 8 个 handler 改为 _send_toolchain_task - _send_mail 保留但不再被 toolchain handler 调用 - _send_deploy_failure_mail → _send_deploy_failure_task Step 4: 测试 - 29 个单元测试全部通过 - 全量 456 passed, 3 skipped, 0 failures --- src/api/toolchain_routes.py | 289 ++++++++++++- src/blackboard/db.py | 2 +- src/daemon/prompt_composer.py | 2 + src/daemon/spawner.py | 6 + src/daemon/toolchain_handler.py | 453 ++++++++++++++++----- tests/unit/test_toolchain_handler_v2.py | 513 ++++++++++++++++++++++++ 6 files changed, 1140 insertions(+), 125 deletions(-) create mode 100644 tests/unit/test_toolchain_handler_v2.py diff --git a/src/api/toolchain_routes.py b/src/api/toolchain_routes.py index 36ee3b3..20cc655 100644 --- a/src/api/toolchain_routes.py +++ b/src/api/toolchain_routes.py @@ -189,6 +189,7 @@ def _calc_risk_level(changed_files: List[str]) -> str: MAIL_PROJECT_ID = "_mail" +TOOLCHAIN_PROJECT_ID = "_toolchain" def _mail_db_path() -> Path: @@ -200,6 +201,73 @@ def _mail_db_path() -> Path: return db +def _toolchain_db_path() -> Path: + """获取 Toolchain 数据库路径,确保目录和表存在。""" + root = get_data_root() + db = root / TOOLCHAIN_PROJECT_ID / "blackboard.db" + db.parent.mkdir(parents=True, exist_ok=True) + init_db(db) + return db + + +def _send_toolchain_task( + to_agent: str, + title: str, + description: str, + event_type: str, + action_type: str, + steps: list, + context_data: dict | None = None, + source: str = "webhook", +) -> str: + """创建 Toolchain Task 并写入 _toolchain DB。 + + Args: + to_agent: 收件人 Agent ID + title: 任务标题 + description: 任务描述(模板渲染后的事件信息) + event_type: 事件类型(review_result / ci_failure / ...) + action_type: 动作分类(用于步骤选择和日志统计) + steps: 结构化编号步骤列表 + context_data: 事件上下文数据(PR 号、仓库名等) + source: 来源标识 + + Returns: + 创建的 Task ID + """ + if to_agent not in AGENT_IDS: + logger.warning("Unknown agent: %s, skipping toolchain task", to_agent) + return "" + + task_id = f"tc-{int(datetime.now().timestamp() * 1000)}" + must_hives = json.dumps({ + "event_type": event_type, + "action_type": action_type, + "steps": steps, + "context": context_data or {}, + "from": "system", + "source": source, + }, ensure_ascii=False) + + task = Task( + id=task_id, + title=title, + description=description, + assignee=to_agent, + assigned_by="system", + must_haves=must_hives, + task_type="toolchain", + status="pending", + ) + bb = Blackboard(_toolchain_db_path()) + bb.create_task(task) + logger.info( + "Toolchain task sent: %s → %s [%s] action_type=%s", + title[:40], to_agent, task_id, action_type, + ) + return task_id + + def _send_mail( to_agent: str, title: str, @@ -327,7 +395,25 @@ async def _send_mention_mails( }) title = f"@mention ({intent_hint}): {source_type} {number_str} ({repo})" - _send_mail(agent_id, title, text) + _send_toolchain_task( + to_agent=agent_id, + title=title, + description=text, + event_type="mention", + action_type="mention", + steps=[ + "按上方 mention 模板中的 response_guidance 执行", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "source_type": source_type, + "source_url": source_url, + "commenter": commenter, + "content_snippet": content[:500], + "repo": repo, + "issue_number": issue_number, + }, + ) # --------------------------------------------------------------------------- @@ -379,7 +465,27 @@ async def _handle_pr_opened(payload: Dict[str, Any]) -> None: }) title = f"Review 请求: {pr_title} ({repo}#{pr_number})" - _send_mail("simayi-challenger", title, text) + _send_toolchain_task( + to_agent="simayi-challenger", + title=title, + description=text, + event_type="review_request", + action_type="review_request", + steps=[ + f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", + "按审查清单审查(参考 code-review Skill)", + f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)— APPROVE 或 REQUEST_CHANGES", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "pr_author": pr_author, + "branch": branch, + "risk_level": risk_level, + }, + ) # S3: PR body @mention 通知 pr_body = pr.get("body", "") or "" @@ -488,7 +594,25 @@ async def _handle_pull_request_review(payload: Dict[str, Any]) -> None: }) title = f"Review 评论: {pr_title} ({repo}#{pr_number})" - _send_mail(pr_author, title, text) + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="review_comment", + action_type="review_comment", + steps=[ + f"查看评论(Gitea API: GET /repos/{repo}/issues/{pr_number}/comments)", + "根据评论内容响应(修改代码或在 PR 上回复 comment)", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "reviewer": reviewer, + "comment_body": review_body, + }, + ) # S5: Review body @mention 通知(COMMENTED 路径) await _send_review_mentions(review_body, reviewer, pr_author, pr, repo, pr_number) @@ -510,7 +634,34 @@ async def _handle_pull_request_review(payload: Dict[str, Any]) -> None: }) title = f"Review {result}: {pr_title} ({repo}#{pr_number})" - _send_mail(pr_author, title, text) + if state == "APPROVED": + tc_steps = [ + f"合并 PR(Gitea API: POST /repos/{repo}/pulls/{pr_number}/merge)", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ] + else: # REQUEST_CHANGES + tc_steps = [ + "按审查意见逐条修改代码", + "push 到原分支 → CI 自动跑", + "CI 通过后等重新 Review", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ] + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="review_result", + action_type="review_result", + steps=tc_steps, + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "result": result, + "reviewer": reviewer, + "review_body": review_body, + }, + ) # S5: Review body @mention 通知(非 COMMENTED 路径) await _send_review_mentions(review_body, reviewer, pr_author, pr, repo, pr_number) @@ -579,11 +730,31 @@ async def _handle_pr_synchronize(payload: Dict[str, Any]) -> None: }) title = f"PR 更新: {pr_title} ({repo}#{pr_number})" - _send_mail(reviewer, title, text) + _send_toolchain_task( + to_agent=reviewer, + title=title, + description=text, + event_type="review_updated", + action_type="review_updated", + steps=[ + f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", + "重点检查上次 Review 意见的修改部分", + f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "pr_author": pr_author, + "new_sha": new_sha, + "reviewer": reviewer, + }, + ) -def _send_deploy_failure_mail(repo: str, pr_number: int, pr_title: str, reason: str) -> None: - """CD 部署失败通知,复用 deploy_failure 模板""" +def _send_deploy_failure_task(repo: str, pr_number: int, pr_title: str, reason: str) -> None: + """CD 部署失败通知,走 ToolchainHandler。""" text = render_template("deploy_failure", { "repo": repo, "commit_sha": f"PR #{pr_number}", @@ -591,7 +762,25 @@ def _send_deploy_failure_mail(repo: str, pr_number: int, pr_title: str, reason: title = f"部署失败: {repo} (auto-deploy, PR #{pr_number})" full_text = f"{text}\n\n失败原因: {reason}" for agent_id in ("jiangwei-infra", "pangtong-fujunshi"): - _send_mail(agent_id, title, full_text) + _send_toolchain_task( + to_agent=agent_id, + title=title, + description=full_text, + event_type="deploy_failure", + action_type="deploy_failure", + steps=[ + "检查 deploy 日志", + "排查失败原因", + "修复并重新部署", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "repo": repo, + "pr_number": pr_number, + "pr_title": pr_title, + "reason": reason, + }, + ) async def _handle_pr_closed(payload: Dict[str, Any]) -> None: @@ -623,7 +812,21 @@ async def _handle_pr_closed(payload: Dict[str, Any]) -> None: }) title = f"PR 已合并: {pr_title} ({repo}#{pr_number})" - _send_mail(pr_author, title, text) + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="review_merged", + action_type="review_merged", + steps=[], # 纯通知,无步骤 + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "pr_author": pr_author, + "merged_by": merged_by, + }, + ) # 自动部署:git pull + rsync + 按需 post_deploy try: @@ -676,7 +879,7 @@ async def _handle_pr_closed(payload: Dict[str, Any]) -> None: if rsync_proc.returncode != 0: logger.error("Auto-deploy: rsync failed: %s", rsync_err.decode()) - _send_deploy_failure_mail(repo, pr_number, pr_title, f"rsync 失败: {rsync_err.decode()}") + _send_deploy_failure_task(repo, pr_number, pr_title, f"rsync 失败: {rsync_err.decode()}") return # Step 3: 判断是否需要执行 post_deploy @@ -731,7 +934,7 @@ async def _handle_pr_closed(payload: Dict[str, Any]) -> None: if deploy_proc.returncode != 0: logger.error("Auto-deploy: post_deploy failed: %s", deploy_err.decode()) - _send_deploy_failure_mail(repo, pr_number, pr_title, f"post_deploy 失败 ({cmd}): {deploy_err.decode()}") + _send_deploy_failure_task(repo, pr_number, pr_title, f"post_deploy 失败 ({cmd}): {deploy_err.decode()}") break else: logger.info("Auto-deploy: all post_deploy commands succeeded (files: %s)", ", ".join(file_list[:5])) @@ -740,7 +943,7 @@ async def _handle_pr_closed(payload: Dict[str, Any]) -> None: except asyncio.TimeoutError: logger.error("Auto-deploy: timeout for %s", repo) - _send_deploy_failure_mail(repo, pr_number, pr_title, "部署超时") + _send_deploy_failure_task(repo, pr_number, pr_title, "部署超时") except Exception as e: logger.error("Auto-deploy: unexpected error: %s", e) @@ -787,7 +990,29 @@ async def _handle_issues(payload: Dict[str, Any]) -> None: }) title = f"Issue 指派: {issue_title} ({repo}#{issue_number})" - _send_mail(assignee, title, text) + _send_toolchain_task( + to_agent=assignee, + title=title, + description=text, + event_type="issue_assigned", + action_type="issue_assigned", + steps=[ + f"创建分支 fix/{issue_number}-{brief}", + "编码 + 写 UT", + "push → 等 CI", + f"CI 通过后创建 PR(Gitea API: POST /repos/{repo}/pulls)", + "等 Review", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "issue_number": issue_number, + "repo": repo, + "issue_title": issue_title, + "labels": labels, + "issue_body": issue_body or "(无描述)", + "brief": brief, + }, + ) elif action == "opened": if "部署失败" in issue_title: @@ -802,7 +1027,23 @@ async def _handle_issues(payload: Dict[str, Any]) -> None: title = f"部署失败: {repo}" for agent_id in ("jiangwei-infra", "pangtong-fujunshi"): - _send_mail(agent_id, title, text) + _send_toolchain_task( + to_agent=agent_id, + title=title, + description=text, + event_type="deploy_failure", + action_type="deploy_failure", + steps=[ + "检查 deploy 日志", + "排查失败原因", + "修复并重新部署", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "repo": repo, + "commit_sha": commit_sha or "(未知)", + }, + ) # Issue body @mention(opened 时检查) issue_body = issue.get("body", "") or "" @@ -869,7 +1110,25 @@ async def _handle_issue_comment(payload: Dict[str, Any]) -> None: }) title = f"CI 失败: {repo}#{issue_number}" - _send_mail(pr_author, title, text) + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="ci_failure", + action_type="ci_failure", + steps=[ + "查看完整 CI 日志(PR 页面或 Gitea Actions 页面)", + "修复失败的测试", + "push → CI 自动重跑", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": issue_number, + "repo": repo, + "branch": branch, + "error_summary": error_summary, + }, + ) # CI 处理完不 return,继续检查 @mention # === 路径 2:@mention 通知(新增,独立路径) === diff --git a/src/blackboard/db.py b/src/blackboard/db.py index 4a0c2a5..b56d8be 100644 --- a/src/blackboard/db.py +++ b/src/blackboard/db.py @@ -293,7 +293,7 @@ _SCHEMA_STATEMENTS = [ id INTEGER PRIMARY KEY AUTOINCREMENT, task_id TEXT NOT NULL REFERENCES tasks(id), author TEXT NOT NULL, - comment_type TEXT NOT NULL DEFAULT 'general' CHECK (comment_type IN ('general','handoff','observation','review','rebuttal','rebuttal_response','debate_argument','debate_rebuttal','debate_judgment')), + comment_type TEXT NOT NULL DEFAULT 'general' CHECK (comment_type IN ('general','handoff','observation','review','rebuttal','rebuttal_response','debate_argument','debate_rebuttal','debate_judgment','action_report')), body TEXT NOT NULL, mentions TEXT, created_at TEXT NOT NULL DEFAULT (datetime('now')) diff --git a/src/daemon/prompt_composer.py b/src/daemon/prompt_composer.py index 2eb6fa4..bf7908d 100644 --- a/src/daemon/prompt_composer.py +++ b/src/daemon/prompt_composer.py @@ -65,6 +65,8 @@ class PromptContext: # toolchain 专用 event_type: str = "" # ci_failure / review_request / ... event_data: Dict = field(default_factory=dict) + action_type: str = "" # 动作分类(review_result / ci_failure / ...) + action_steps: list = field(default_factory=list) # 结构化编号步骤列表 # 前序产出 depends_on_outputs: Optional[List] = None diff --git a/src/daemon/spawner.py b/src/daemon/spawner.py index 91d2770..28451bb 100644 --- a/src/daemon/spawner.py +++ b/src/daemon/spawner.py @@ -286,10 +286,15 @@ class AgentSpawner: # 从 must_haves 解析 mail 元数据(from / performative) from_agent = "" mail_type = "" + action_type = "" + action_steps = [] try: meta = json.loads(must_haves) if must_haves else {} from_agent = meta.get("from", "") mail_type = meta.get("performative", meta.get("type", "")) + # toolchain 字段提取 + action_type = meta.get("action_type", "") + action_steps = meta.get("steps", []) except Exception: pass ctx = PromptContext( @@ -298,6 +303,7 @@ class AgentSpawner: agent_id=agent_id, role=spawn_type, spawn_type=spawn_type, from_agent=from_agent, mail_type=mail_type, + action_type=action_type, action_steps=action_steps, ) return handler.build_prompt(ctx) diff --git a/src/daemon/toolchain_handler.py b/src/daemon/toolchain_handler.py index 3ee37ce..4ecf503 100644 --- a/src/daemon/toolchain_handler.py +++ b/src/daemon/toolchain_handler.py @@ -1,6 +1,7 @@ -"""toolchain_handler.py — 工具链事件 handler。 +"""toolchain_handler.py - 工具链事件 handler。 -处理 Gitea Webhook 事件(CI 失败、Review 请求、Issue 指派等)。 +处理 Gitea Webhook 事件(CI 失败、Review 请求、Issue 指派等)。 +L2 引擎层强约束:输入(结构化步骤)+ 执行(Red Flags)+ 输出(action_report 验证)。 """ from __future__ import annotations @@ -8,7 +9,7 @@ import json import logging import urllib.request from pathlib import Path -from typing import Dict +from typing import Dict, List from src.daemon.base_task_handler import BaseTaskHandler, VerifyResult from src.daemon.prompt_composer import PromptComposer, PromptContext @@ -17,13 +18,37 @@ from src.blackboard.db import get_connection logger = logging.getLogger("moziplus-v2.handler.toolchain") +# --------------------------------------------------------------------------- +# Gitea API 配置 +# --------------------------------------------------------------------------- + +_GITEA_BASE = "http://192.168.2.154:3000/api/v1" +_GITEA_TOKEN = "a6d596b826f4bfeaf983ef4d25ac25dab95bbc4e" + +# 业务失败连续次数阈值,超过则升级为系统失败 +_BUSINESS_FAIL_THRESHOLD = 3 + +# action_type → action_hint 映射 +_ACTION_HINTS: Dict[str, str] = { + "review_result": "你收到一个 Review 结果通知,这是一个需要你执行动作的事件(不是纯通知)。", + "review_request": "你收到一个 Review 请求,这是一个需要你审查并提交 Review 的事件。", + "review_updated": "你收到一个 PR 更新通知,这是一个需要你重新审查修改部分的事件。", + "review_comment": "你收到一个 Review 评论,这是一个需要你查看并响应的事件。", + "ci_failure": "你收到一个 CI 失败通知,这是一个需要你修复失败测试的事件。", + "issue_assigned": "你收到一个 Issue 指派,这是一个需要你编码实现的事件。", + "deploy_failure": "你收到一个部署失败通知,这是一个需要你排查并修复的事件。", + "mention": "你收到一个 @mention 通知,这是一个需要你按指引响应的事件。", + "review_merged": "你收到一个 PR 合并通知。这是一条纯通知,阅读即可。", + "infrastructure_failure": "你收到一个基础设施问题报告,请排查并修复。", +} + # --------------------------------------------------------------------------- # Toolchain PromptSections # --------------------------------------------------------------------------- class ToolchainContextSection: - """事件类型 + 事件详情(priority=10)""" + """事件类型 + 事件详情 + 结构化步骤 + action_hint(priority=10)""" name: str = "toolchain_context" priority: int = 10 @@ -32,27 +57,44 @@ class ToolchainContextSection: event_type = context.event_type event_data: Dict = context.event_data or {} + # Part 1: 事件信息(现有模板引擎) if event_type in _TEMPLATE_MAP: - # 使用模板引擎渲染已知事件 variables = {k: str(v) for k, v in event_data.items()} - return render_template(event_type, variables) + event_text = render_template(event_type, variables) + else: + lines = ["## 工具链事件", ""] + lines.append(f"- **事件类型**: {event_type or '未知'}") + if event_data: + lines.append("- **事件详情**:") + for key, value in event_data.items(): + lines.append(f" - {key}: {value}") + lines.append("") + event_text = "\n".join(lines) - # fallback:通用事件描述 - lines = ["## 工具链事件", ""] - lines.append(f"- **事件类型**: {event_type or '未知'}") - if event_data: - lines.append("- **事件详情**:") - for key, value in event_data.items(): - lines.append(f" - {key}: {value}") - lines.append("") - return "\n".join(lines) + # Part 2: 结构化编号步骤(新增,从 action_steps 渲染) + steps: List[str] = context.action_steps or [] + if steps: + step_lines = ["", "### 必须执行的步骤", ""] + for i, step in enumerate(steps, 1): + step_lines.append(f"{i}. {step}") + steps_text = "\n".join(step_lines) + else: + steps_text = "" + + # Part 3: action 指引(新增,按 action_type 选择) + action_hint = _ACTION_HINTS.get( + context.action_type, + "你收到一个工具链事件,这是一个需要你执行动作的事件。", + ) + + return f"{action_hint}\n\n{event_text}{steps_text}" def should_include(self, context: PromptContext) -> bool: return True class ToolchainApiSection: - """API 操作指令(priority=40),success_status=done""" + """API 操作指令(priority=40)-- action_report 提交指引""" name: str = "toolchain_api" priority: int = 40 @@ -60,28 +102,48 @@ class ToolchainApiSection: API_HOST = "localhost:8083" def render(self, context: PromptContext) -> str: + task_id = context.task_id + project_id = context.project_id + agent_id = context.agent_id + lines = [ "## API 操作指令", "", - f"项目 ID: `{context.project_id}`", - f"任务 ID: `{context.task_id}`", + f"项目 ID: `{project_id}`", + f"任务 ID: `{task_id}`", "", - "### 完成后必须更新任务状态", - "完成后务必通过以下命令将任务标记为 **done**:", + "### 完成后必须提交 action report", + "", + "执行完所有步骤后,必须提交 action report:", "```bash", - f'curl -s -X POST "http://{self.API_HOST}/api/projects/{context.project_id}/tasks/{context.task_id}/status" \\', + f'curl -s -X POST "http://{self.API_HOST}/api/projects/{project_id}/tasks/{task_id}/comments" \\', ' -H "Content-Type: application/json" \\', - ' -d \'{"status": "done"}\'', + f' -d \'{{"author": "{agent_id}", "comment_type": "action_report", "body": "简要描述你执行了什么操作及结果"}}\'', "```", "", + "⚠️ 不提交 action report 的任务会被标记为 failed。", + "", "### 提交产出", - "如有产出(如 review 结果、修复方案),提交到任务 outputs:", + "", + "如有产出(如 review 结果、修复方案),提交到任务 outputs:", "```bash", - f'curl -s -X POST "http://{self.API_HOST}/api/projects/{context.project_id}/tasks/{context.task_id}/outputs" \\', + f'curl -s -X POST "http://{self.API_HOST}/api/projects/{project_id}/tasks/{task_id}/outputs" \\', ' -H "Content-Type: application/json" \\', ' -d \'{"content": "<你的产出内容>", "type": "text"}\'', "```", "", + "### 需要其他角色支持时", + "", + "如果在执行过程中需要其他角色协助(如缺数据、需要审批等),在关联的 PR/Issue 上创建 comment @对方:", + "```bash", + f'curl -s -X POST "{_GITEA_BASE}/repos/{{repo}}/issues/{{pr_number}}/comments" \\', + f' -H "Authorization: token " \\', + ' -H "Content-Type: application/json" \\', + ' -d \'{"body": "@{agent-id} 需要你的支持:{描述问题}"}\'', + "```", + "", + "⚠️ 不要使用 Mail API(飞鸽传书)。所有协作通过 Gitea 留痕。", + "", ] return "\n".join(lines) @@ -90,20 +152,50 @@ class ToolchainApiSection: class ToolchainConstraintsSection: - """硬约束(priority=50)""" + """硬约束 + Red Flags(priority=50)""" name: str = "toolchain_constraints" priority: int = 50 def render(self, context: PromptContext) -> str: lines = [ - "## 硬约束", + "## 硬约束(必须遵守)", "", - "1. **必须标 done**:处理完成后必须通过 API 将任务状态更新为 `done`,否则视为未完成", - "2. **产出不能为空**:必须提交有意义的产出(output 或 comment),不能只改状态", - "3. **单一职责**:只处理本次事件相关的操作,不要越界执行无关任务", - "4. **出错即报告**:如果无法处理(如权限不足、资源不存在),在 comment 中说明原因并标 done", - "5. **不要创建新任务**:工具链事件只处理当前事件,不衍生新任务", + "⚠️ 以下是强制要求,不是建议或参考。违反任何一条都会导致任务失败。", + "", + "### 1. 必须按步骤执行", + '- 检查上方“必须执行的步骤”列表', + '- 逐条执行每个步骤,不可跳过', + '- 不要只读不做——这不是纯通知', + "", + "### 2. 必须提交 action report", + '- 执行完所有步骤后,必须提交 action report', + "- 提交方式:POST comment(comment_type='action_report')", + '- 报告内容:简要描述你执行了什么操作、结果如何', + '- ⚠️ 不提交 action report 的任务会被标记为 failed', + "", + "### 3. 不要执行任何状态转换命令", + '- 不要手动标 working/done/review/failed,系统会自动处理', + "", + "### 4. 不需要回复", + '- action report 就是你的完成凭证', + '- 不要发送 Mail(飞鸽传书),你的所有操作在 toolchain 流程内完成', + "", + "### 5. 所有协作通过 Gitea 完成", + '- 如果遇到问题需要其他角色支持,在关联的 PR/Issue 上创建 comment @对方', + '- 不要使用 Mail API(飞鸽传书)发送消息', + '- 你的所有操作都在 toolchain 流程内,通过 Gitea 留痕', + "", + "### Red Flags(如果脑海中出现以下想法,说明你错了)", + "", + '| Agent 想法 | Red Flag 驳回 |', + '|------------|--------------|', + '| “这个通知看看就行了” | ❌ 错!这是 action 指令,必须执行步骤列表中的每一项 |', + '| “我不需要做任何事” | ❌ 错!检查“必须执行的步骤”列表,每一步都要执行 |', + '| “先放着等会处理” | ❌ 错!立即执行,不要推迟 |', + '| “我已经知道了” | ❌ 知道不等于执行。执行步骤 + 提交 action report 才算完成 |', + '| “步骤太多了,选几个做就行” | ❌ 错!必须逐条执行,不可跳过 |', + '| “这个步骤不适用于当前情况” | ❌ 如果确实不适用,在 action report 中说明原因,但其他步骤必须执行 |', "", ] return "\n".join(lines) @@ -127,7 +219,7 @@ class ToolchainHandler(BaseTaskHandler): return "done" def pre_spawn(self, task_id: str, db_path: Path) -> bool: - """auto_working:pending → working""" + """auto_working:pending → working""" return self._auto_mark_working(task_id, db_path) def get_sections(self) -> list: @@ -145,27 +237,55 @@ class ToolchainHandler(BaseTaskHandler): return composer.compose(context) def verify_completion(self, task_id: str, db_path: Path) -> VerifyResult: - """检查行动输出(output 或 comment 有实质内容)""" + """检查 action report(精确验证)+ 三层 fallback""" try: conn = get_connection(db_path) try: - # 检查 output + # 特殊处理:infrastructure_failure 始终通过(防递归) + row = conn.execute( + "SELECT must_haves FROM tasks WHERE id=?", (task_id,) + ).fetchone() + if row and row["must_haves"]: + try: + meta = json.loads(row["must_haves"]) + except Exception: + meta = {} + if meta.get("action_type") == "infrastructure_failure": + return VerifyResult(True, "infrastructure_passthrough", + "infrastructure_failure auto-pass") + + # 特殊处理:review_merged 始终通过(纯通知) + if meta.get("action_type") == "review_merged": + return VerifyResult(True, "merged_passthrough", + "review_merged auto-pass") + + # 1. 优先检查 action_report comment + report_row = conn.execute( + "SELECT id FROM comments WHERE task_id=? " + "AND comment_type='action_report' LIMIT 1", + (task_id,) + ).fetchone() + if report_row: + return VerifyResult(True, "has_action_report", "action_report found") + + # 2. fallback:检查 output(向后兼容) output_count = conn.execute( "SELECT COUNT(*) FROM outputs WHERE task_id=?", (task_id,) ).fetchone()[0] if output_count > 0: return VerifyResult(True, "has_output", f"output_count={output_count}") - # 检查 comment(非系统、有实质内容) + # 3. fallback:检查有实质内容的 comment(向后兼容) comment_count = conn.execute( "SELECT COUNT(*) FROM comments WHERE task_id=? " - "AND author != 'system' AND LENGTH(content) >= 20", + "AND author != 'system' AND LENGTH(body) >= 20", (task_id,) ).fetchone()[0] if comment_count > 0: return VerifyResult(True, "has_comment", f"comment_count={comment_count}") - return VerifyResult(False, "no_action", "output=0, comment=0") + return VerifyResult(False, "no_action", + "no action_report, no output, no valid comment") finally: conn.close() except Exception as e: @@ -174,13 +294,13 @@ class ToolchainHandler(BaseTaskHandler): def on_failure(self, task_id: str, agent_id: str, db_path: Path, verify: VerifyResult) -> None: - """验证失败 → 标 failed + Mail API 通知主公""" + """验证失败 → 三分路处理(业务/系统/基础设施)""" self._mark_task_status(db_path, task_id, "failed") - logger.info("Toolchain %s: verify failed (%s), marked failed", task_id, verify.reason) + logger.info("Toolchain %s: verify failed (%s), marked failed", + task_id, verify.reason) - # 从 db 读取事件上下文 - event_type = "" - event_data: Dict = {} + # 读取 must_haves 获取事件上下文 + meta = {} try: conn = get_connection(db_path) row = conn.execute( @@ -188,18 +308,192 @@ class ToolchainHandler(BaseTaskHandler): ).fetchone() if row and row["must_haves"]: meta = json.loads(row["must_haves"]) - event_type = meta.get("event_type", "") - raw = meta.get("event_data", "{}") - event_data = json.loads(raw) if isinstance(raw, str) else raw + # 统计该 task 的业务失败次数 + fail_count = conn.execute( + "SELECT COUNT(*) FROM events WHERE task_id=? " + "AND event_type='status_change' AND payload LIKE '%failed%'", + (task_id,) + ).fetchone()[0] conn.close() except Exception: - pass + fail_count = 0 - self._notify_via_mail_api( - task_id, verify.reason, verify.evidence, - event_type, event_data, + action_type = meta.get("action_type", "") + context_data = meta.get("context", {}) + assignee = meta.get("assignee", "") or meta.get("from", "") + + # 三分路决策 + route = self._classify_failure(verify, fail_count) + + if route == "business": + self._handle_business_failure( + task_id, agent_id, verify, action_type, context_data, assignee, db_path) + elif route == "system": + self._handle_system_failure( + task_id, agent_id, verify, action_type, context_data, db_path) + else: # infrastructure + self._handle_infrastructure_failure( + task_id, agent_id, verify, db_path) + + def _classify_failure(self, verify: VerifyResult, fail_count: int) -> str: + """分类失败类型:business / system / infrastructure""" + # verify_error 或 DB 不可用 → 基础设施失败 + if verify.reason == "verify_error": + return "infrastructure" + # 连续业务失败超过阈值 → 升级为系统失败 + if fail_count >= _BUSINESS_FAIL_THRESHOLD: + return "system" + # 默认:业务失败 + return "business" + + def _handle_business_failure( + self, task_id: str, agent_id: str, verify: VerifyResult, + action_type: str, context_data: dict, assignee: str, + db_path: Path, + ) -> None: + """业务失败 → 在关联 PR/Issue 上创建 comment @原始 assignee""" + repo = context_data.get("repo", "") + pr_number = context_data.get("pr_number") or context_data.get("issue_number", "") + + if repo and pr_number: + comment_body = ( + f"@{assignee or agent_id} 工具链任务执行失败\n\n" + f"任务 ID: {task_id}\n" + f"失败原因: {verify.reason}\n" + f"证据: {verify.evidence}\n\n" + f"请检查黑板任务并处理。" + ) + success = self._create_gitea_comment(repo, pr_number, comment_body) + if success: + logger.info("Toolchain %s: business failure → Gitea comment on %s#%s", + task_id, repo, pr_number) + return + # Gitea API failed → escalate to system failure + logger.warning( + "Toolchain %s: Gitea comment failed, escalating to system failure", + task_id) + self._handle_system_failure( + task_id, agent_id, verify, action_type, context_data, db_path) + else: + # 没有 PR/Issue 关联 → fallback 到系统失败 + logger.warning( + "Toolchain %s: no PR/Issue context for business failure, " + "escalating to system failure", task_id) + self._handle_system_failure( + task_id, agent_id, verify, action_type, context_data, db_path) + + def _handle_system_failure( + self, task_id: str, agent_id: str, verify: VerifyResult, + action_type: str, context_data: dict, db_path: Path, + ) -> None: + """系统失败 → 创建 Gitea Issue @pangtong-fujunshi""" + repo = context_data.get("repo", "sanguo/sanguo_moziplus_v2") + title = f"[toolchain-handler] 工具链事件处理失败: {task_id}" + body = ( + f"任务 {task_id} 验证失败\n\n" + f"事件类型: {action_type or '未知'}\n" + f"失败原因: {verify.reason}\n" + f"证据: {verify.evidence}\n\n" + f"@pangtong-fujunshi 请检查黑板任务并手动处理。" ) + # 尝试在 Gitea 创建 Issue + created = self._create_gitea_issue(repo, title, body, ["pangtong-fujunshi"]) + if created: + logger.info("Toolchain %s: system failure → Gitea Issue created on %s", + task_id, repo) + else: + # Gitea API 不可用 → 基础设施失败 + logger.error( + "Toolchain %s: Gitea API unavailable, escalating to infrastructure failure", + task_id) + self._handle_infrastructure_failure( + task_id, agent_id, verify, db_path) + + def _handle_infrastructure_failure( + self, task_id: str, agent_id: str, + verify: VerifyResult, db_path: Path, + ) -> None: + """基础设施失败 → _send_toolchain_task @jiangwei-infra(防递归)""" + # 直接在 _toolchain DB 创建 task(不走 Gitea webhook) + try: + from src.api.toolchain_routes import _send_toolchain_task + _send_toolchain_task( + to_agent="jiangwei-infra", + title=f"[基础设施] Gitea API 不可用 - {task_id}", + description=( + f"Gitea API 不可用,原任务 {task_id} 无法通过正常路径处理。\n" + f"请检查 Gitea 服务状态和网络连通性。" + ), + event_type="infrastructure_failure", + action_type="infrastructure_failure", + steps=[ + "检查 Gitea 服务状态(http://192.168.2.154:3000)", + "检查网络连通性", + "恢复后提交 action report", + ], + context_data={"original_task_id": task_id, "verify_reason": verify.reason}, + source="toolchain_handler", + ) + logger.info("Toolchain %s: infrastructure failure → task created for jiangwei-infra", + task_id) + except Exception as e: + logger.error( + "Toolchain %s: failed to create infrastructure_failure task: %s", + task_id, e) + + # ----------------------------------------------------------------------- + # Gitea API 辅助 + # ----------------------------------------------------------------------- + + def _create_gitea_comment( + self, repo: str, pr_number: int, body: str, + ) -> bool: + """在 PR/Issue 上创建 comment。返回是否成功。""" + payload = json.dumps({"body": body}, ensure_ascii=False).encode("utf-8") + try: + req = urllib.request.Request( + f"{_GITEA_BASE}/repos/{repo}/issues/{pr_number}/comments", + data=payload, + headers={ + "Authorization": f"token {_GITEA_TOKEN}", + "Content-Type": "application/json", + }, + ) + urllib.request.urlopen(req, timeout=5) + return True + except Exception as e: + logger.warning("Gitea comment failed on %s#%s: %s", repo, pr_number, e) + return False + + def _create_gitea_issue( + self, repo: str, title: str, body: str, + assignees: list = None, + ) -> bool: + """创建 Gitea Issue。返回是否成功。""" + data = {"title": title, "body": body} + if assignees: + data["assignees"] = assignees + payload = json.dumps(data, ensure_ascii=False).encode("utf-8") + try: + req = urllib.request.Request( + f"{_GITEA_BASE}/repos/{repo}/issues", + data=payload, + headers={ + "Authorization": f"token {_GITEA_TOKEN}", + "Content-Type": "application/json", + }, + ) + urllib.request.urlopen(req, timeout=5) + return True + except Exception as e: + logger.warning("Gitea create issue failed on %s: %s", repo, e) + return False + + # ----------------------------------------------------------------------- + # 兼容:保留旧方法签名(但不再被 on_failure 调用) + # ----------------------------------------------------------------------- + def _build_gitea_links(self, event_type: str, event_data: dict) -> str: """根据事件类型构建 Gitea 链接。""" links = [] @@ -215,63 +509,4 @@ class ToolchainHandler(BaseTaskHandler): if "branch" in event_data and "commit" not in event_data: links.append(f"分支: {event_data['branch']}") - return "\n".join(links) if links else "(无法提取链接,请检查黑板任务详情)" - - def _notify_via_mail_api( - self, - task_id: str, - reason: str, - evidence: str, - event_type: str, - event_data: Dict, - ) -> None: - """通过 Mail API 发送丰富的失败通知给主公。""" - # 构建行动指引 - action_hint = "请检查黑板任务并手动处理。" - et_lower = event_type.lower() - if "ci" in et_lower or "deploy" in et_lower: - action_hint = "建议创建任务派给 jiangwei-infra 检查 CI/部署问题。" - elif "review" in et_lower: - action_hint = "建议查看 PR review 状态,必要时通知相关开发者。" - elif "issue" in et_lower: - action_hint = "建议创建任务派给对应开发者处理 Issue。" - - # 构建事件详情 - event_details = "" - if event_data: - event_details = "\n".join( - f" - {k}: {v}" for k, v in event_data.items() - ) - - # 构建 Gitea 链接 - gitea_links = self._build_gitea_links(event_type, event_data) - - title = f"[toolchain-handler] 工具链事件处理失败: {task_id}" - text = ( - f"任务 {task_id} 验证失败\n\n" - f"事件类型: {event_type or '未知'}\n" - f"事件详情:\n{event_details or ' (无)'}\n\n" - f"失败原因: {reason}\n" - f"证据: {evidence}\n\n" - f"{gitea_links}\n\n" - f"行动指引: {action_hint}" - ) - - payload = json.dumps({ - "from": "daemon", - "to": "pangtong-fujunshi", - "title": title, - "text": text, - "type": "inform", - }, ensure_ascii=False).encode("utf-8") - - try: - req = urllib.request.Request( - "http://localhost:8083/api/mail", - data=payload, - headers={"Content-Type": "application/json"}, - ) - urllib.request.urlopen(req, timeout=5) - logger.info("Toolchain %s: sent failure notification via Mail API", task_id) - except Exception as e: - logger.warning("Toolchain %s: failed to notify via Mail API: %s", task_id, e) + return "\n".join(links) if links else "(无法提取链接,请检查黑板任务详情)" diff --git a/tests/unit/test_toolchain_handler_v2.py b/tests/unit/test_toolchain_handler_v2.py new file mode 100644 index 0000000..3bcd311 --- /dev/null +++ b/tests/unit/test_toolchain_handler_v2.py @@ -0,0 +1,513 @@ +"""Unit tests for §17 ToolchainHandler 强约束实现.""" +import json +import os +import sys +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +# Add project root to path +PROJECT_ROOT = Path(__file__).parent.parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) + +from src.daemon.prompt_composer import PromptContext, PromptComposer +from src.daemon.toolchain_handler import ( + ToolchainHandler, + ToolchainContextSection, + ToolchainApiSection, + ToolchainConstraintsSection, + _ACTION_HINTS, +) +from src.daemon.base_task_handler import VerifyResult +from src.blackboard.db import init_db, get_connection + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def tmp_db(): + """Create a temporary _toolchain DB for testing.""" + with tempfile.TemporaryDirectory() as d: + db_path = Path(d) / "blackboard.db" + init_db(db_path) + yield db_path + + +@pytest.fixture +def handler(): + return ToolchainHandler() + + +def _insert_task(db_path, task_id, must_haves_json, status="working"): + """Insert a task into DB for testing.""" + conn = get_connection(db_path) + conn.execute( + "INSERT INTO tasks (id, title, description, assignee, assigned_by, " + "must_haves, task_type, status) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (task_id, "test", "test desc", "zhangfei-dev", "system", + must_haves_json, "toolchain", status) + ) + conn.commit() + conn.close() + + +def _insert_comment(db_path, task_id, author, body, comment_type="general"): + """Insert a comment into DB.""" + conn = get_connection(db_path) + conn.execute( + "INSERT INTO comments (task_id, author, comment_type, body) VALUES (?, ?, ?, ?)", + (task_id, author, comment_type, body) + ) + conn.commit() + conn.close() + + +def _insert_output(db_path, task_id, content="test output"): + """Insert an output into DB.""" + conn = get_connection(db_path) + conn.execute( + "INSERT INTO outputs (task_id, agent, output_type, title, summary) " + "VALUES (?, ?, ?, ?, ?)", + (task_id, "zhangfei-dev", "document", "test", content) + ) + conn.commit() + conn.close() + + +# --------------------------------------------------------------------------- +# Step 1a: PromptContext new fields +# --------------------------------------------------------------------------- + +class TestPromptContextFields: + def test_action_type_default(self): + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + ) + assert ctx.action_type == "" + + def test_action_steps_default(self): + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + ) + assert ctx.action_steps == [] + + def test_action_type_set(self): + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + action_type="review_result", + ) + assert ctx.action_type == "review_result" + + def test_action_steps_set(self): + steps = ["step 1", "step 2"] + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + action_steps=steps, + ) + assert ctx.action_steps == steps + + +# --------------------------------------------------------------------------- +# Step 2a: ToolchainContextSection steps rendering + action_hint +# --------------------------------------------------------------------------- + +class TestToolchainContextSection: + def test_renders_steps(self): + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + event_type="review_result", + event_data={"pr_number": "42", "repo": "sanguo/test"}, + action_type="review_result", + action_steps=["合并 PR", "提交 action report"], + ) + section = ToolchainContextSection() + result = section.render(ctx) + assert "必须执行的步骤" in result + assert "1. 合并 PR" in result + assert "2. 提交 action report" in result + + def test_renders_action_hint(self): + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + event_type="ci_failure", + action_type="ci_failure", + action_steps=[], + ) + section = ToolchainContextSection() + result = section.render(ctx) + assert "CI 失败" in result + assert "需要你修复" in result + + def test_renders_default_hint_for_unknown_action_type(self): + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + event_type="unknown", + action_type="unknown_type", + action_steps=[], + ) + section = ToolchainContextSection() + result = section.render(ctx) + assert "需要你执行动作的事件" in result + + def test_no_steps_no_steps_section(self): + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + event_type="review_merged", + action_type="review_merged", + action_steps=[], + ) + section = ToolchainContextSection() + result = section.render(ctx) + assert "必须执行的步骤" not in result + + +# --------------------------------------------------------------------------- +# Step 2b: ToolchainApiSection action_report guidance +# --------------------------------------------------------------------------- + +class TestToolchainApiSection: + def test_has_action_report_instruction(self): + ctx = PromptContext( + task_id="tc-123", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="zhangfei-dev", + ) + section = ToolchainApiSection() + result = section.render(ctx) + assert "action_report" in result + assert "comment_type" in result + assert "tc-123" in result + + def test_no_manual_done_instruction(self): + ctx = PromptContext( + task_id="tc-123", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="zhangfei-dev", + ) + section = ToolchainApiSection() + result = section.render(ctx) + # Should NOT contain the old "标记为 done" instruction + assert "标记为 **done**" not in result + assert '"status": "done"' not in result + + def test_has_outputs_instruction(self): + ctx = PromptContext( + task_id="tc-123", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="zhangfei-dev", + ) + section = ToolchainApiSection() + result = section.render(ctx) + assert "outputs" in result + + def test_has_gitea_collaboration_instruction(self): + ctx = PromptContext( + task_id="tc-123", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="zhangfei-dev", + ) + section = ToolchainApiSection() + result = section.render(ctx) + assert "Gitea" in result + assert "Mail API" in result + + +# --------------------------------------------------------------------------- +# Step 2c: ToolchainConstraintsSection Red Flags +# --------------------------------------------------------------------------- + +class TestToolchainConstraintsSection: + def test_has_red_flags_table(self): + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + ) + section = ToolchainConstraintsSection() + result = section.render(ctx) + assert "Red Flags" in result + assert "❌" in result + + def test_has_all_5_constraints(self): + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + ) + section = ToolchainConstraintsSection() + result = section.render(ctx) + assert "必须按步骤执行" in result + assert "必须提交 action report" in result + assert "不要执行任何状态转换命令" in result + assert "不需要回复" in result + assert "所有协作通过 Gitea 完成" in result + + def test_has_strong_language(self): + ctx = PromptContext( + task_id="t1", title="test", description="d", + must_haves="", project_id="_toolchain", agent_id="a1", + ) + section = ToolchainConstraintsSection() + result = section.render(ctx) + assert "强制要求" in result + assert "不是建议" in result + + +# --------------------------------------------------------------------------- +# Step 2d: verify_completion tests +# --------------------------------------------------------------------------- + +class TestVerifyCompletion: + def test_action_report_passes(self, handler, tmp_db): + """action_report comment → pass""" + must_haves = json.dumps({"action_type": "review_result"}) + _insert_task(tmp_db, "t1", must_haves) + _insert_comment(tmp_db, "t1", "zhangfei-dev", + "已修复 CI", comment_type="action_report") + + result = handler.verify_completion("t1", tmp_db) + assert result.passed is True + assert result.reason == "has_action_report" + + def test_no_action_report_fallback_output(self, handler, tmp_db): + """No action_report but has output → pass (fallback)""" + must_haves = json.dumps({"action_type": "review_result"}) + _insert_task(tmp_db, "t2", must_haves) + _insert_output(tmp_db, "t2", "review result content") + + result = handler.verify_completion("t2", tmp_db) + assert result.passed is True + assert result.reason == "has_output" + + def test_no_action_report_fallback_comment(self, handler, tmp_db): + """No action_report but has substantial comment → pass (fallback)""" + must_haves = json.dumps({"action_type": "review_result"}) + _insert_task(tmp_db, "t3", must_haves) + _insert_comment(tmp_db, "t3", "zhangfei-dev", + "This is a sufficiently long comment about the task.") + + result = handler.verify_completion("t3", tmp_db) + assert result.passed is True + assert result.reason == "has_comment" + + def test_nothing_passes(self, handler, tmp_db): + """No action_report, no output, no comment → fail""" + must_haves = json.dumps({"action_type": "review_result"}) + _insert_task(tmp_db, "t4", must_haves) + + result = handler.verify_completion("t4", tmp_db) + assert result.passed is False + assert result.reason == "no_action" + + def test_short_comment_fails(self, handler, tmp_db): + """Comment < 20 chars → fail""" + must_haves = json.dumps({"action_type": "review_result"}) + _insert_task(tmp_db, "t5", must_haves) + _insert_comment(tmp_db, "t5", "zhangfei-dev", "ok") + + result = handler.verify_completion("t5", tmp_db) + assert result.passed is False + + def test_review_merged_auto_passes(self, handler, tmp_db): + """review_merged → always pass""" + must_haves = json.dumps({"action_type": "review_merged"}) + _insert_task(tmp_db, "t6", must_haves) + + result = handler.verify_completion("t6", tmp_db) + assert result.passed is True + assert result.reason == "merged_passthrough" + + def test_infrastructure_failure_auto_passes(self, handler, tmp_db): + """infrastructure_failure → always pass (anti-recursion)""" + must_haves = json.dumps({"action_type": "infrastructure_failure"}) + _insert_task(tmp_db, "t7", must_haves) + + result = handler.verify_completion("t7", tmp_db) + assert result.passed is True + assert result.reason == "infrastructure_passthrough" + + +# --------------------------------------------------------------------------- +# Step 3a: _send_toolchain_task tests +# --------------------------------------------------------------------------- + +class TestSendToolchainTask: + def test_creates_task_in_toolchain_db(self): + """_send_toolchain_task creates a task in _toolchain DB.""" + from src.api.toolchain_routes import _send_toolchain_task, _toolchain_db_path + + with patch("src.api.toolchain_routes.get_data_root") as mock_root: + with tempfile.TemporaryDirectory() as d: + mock_root.return_value = Path(d) + + task_id = _send_toolchain_task( + to_agent="zhangfei-dev", + title="Test Task", + description="Test description", + event_type="ci_failure", + action_type="ci_failure", + steps=["Fix test", "Submit report"], + context_data={"pr_number": 42}, + ) + + assert task_id.startswith("tc-") + + # Verify task was written to _toolchain DB + db_path = _toolchain_db_path() + conn = get_connection(db_path) + row = conn.execute( + "SELECT * FROM tasks WHERE id=?", (task_id,) + ).fetchone() + assert row is not None + assert row["task_type"] == "toolchain" + assert row["assignee"] == "zhangfei-dev" + + # Verify must_haves JSON + meta = json.loads(row["must_haves"]) + assert meta["event_type"] == "ci_failure" + assert meta["action_type"] == "ci_failure" + assert meta["steps"] == ["Fix test", "Submit report"] + assert meta["context"]["pr_number"] == 42 + conn.close() + + def test_unknown_agent_returns_empty(self): + """_send_toolchain_task with unknown agent returns empty string.""" + from src.api.toolchain_routes import _send_toolchain_task + + task_id = _send_toolchain_task( + to_agent="unknown-agent", + title="Test", + description="desc", + event_type="test", + action_type="test", + steps=[], + ) + assert task_id == "" + + +# --------------------------------------------------------------------------- +# Step 2e: on_failure three-way routing tests +# --------------------------------------------------------------------------- + +class TestOnFailureRouting: + def test_business_failure_creates_gitea_comment(self, handler, tmp_db): + """Business failure → Gitea PR comment""" + must_haves = json.dumps({ + "action_type": "review_result", + "context": {"repo": "sanguo/test", "pr_number": 42}, + "assignee": "zhangfei-dev", + }) + _insert_task(tmp_db, "t-fail", must_haves) + + with patch.object(handler, "_create_gitea_comment") as mock_comment: + mock_comment.return_value = True + verify = VerifyResult(False, "no_action", "no action_report") + handler.on_failure("t-fail", "zhangfei-dev", tmp_db, verify) + mock_comment.assert_called_once() + call_args = mock_comment.call_args + assert call_args[0][0] == "sanguo/test" + assert call_args[0][1] == 42 + + def test_infrastructure_failure_creates_task(self, handler, tmp_db): + """Infrastructure failure → _send_toolchain_task for jiangwei-infra""" + must_haves = json.dumps({ + "action_type": "review_result", + "context": {"repo": "sanguo/test", "pr_number": 42}, + }) + _insert_task(tmp_db, "t-infra", must_haves) + + with patch.object(handler, "_create_gitea_comment") as mock_comment: + mock_comment.return_value = False # Gitea API down + with patch.object(handler, "_create_gitea_issue") as mock_issue: + mock_issue.return_value = False # Gitea API still down + with patch("src.api.toolchain_routes._send_toolchain_task") as mock_send: + mock_send.return_value = "tc-infra" + verify = VerifyResult(False, "no_action", "no action_report") + handler.on_failure("t-infra", "zhangfei-dev", tmp_db, verify) + # Should eventually try to create infrastructure_failure task + mock_send.assert_called() + call_kwargs = mock_send.call_args + assert call_kwargs[1]["action_type"] == "infrastructure_failure" + assert call_kwargs[1]["to_agent"] == "jiangwei-infra" + + +# --------------------------------------------------------------------------- +# Regression: _mail path unaffected +# --------------------------------------------------------------------------- + +class TestMailRegression: + def test_send_mail_still_exists(self): + """_send_mail function is preserved.""" + from src.api.toolchain_routes import _send_mail + assert callable(_send_mail) + + def test_send_mail_not_called_by_handlers(self): + """No toolchain handler calls _send_mail.""" + import inspect + from src.api import toolchain_routes + + # Get source of handler functions + source = inspect.getsource(toolchain_routes) + # _send_mail should appear only in its own definition, not in handler bodies + lines = source.split("\n") + in_handler = False + handler_send_mail_calls = [] + for i, line in enumerate(lines): + if line.strip().startswith("async def _handle_") or line.strip().startswith("async def _send_mention_mails"): + in_handler = True + elif line.strip().startswith("async def ") or line.strip().startswith("def _"): + if not line.strip().startswith("async def _handle_") and not line.strip().startswith("async def _send_mention_mails"): + in_handler = False + if in_handler and "_send_mail(" in line and not line.strip().startswith("#"): + handler_send_mail_calls.append((i, line.strip())) + + assert len(handler_send_mail_calls) == 0, \ + f"_send_mail still called in handlers: {handler_send_mail_calls}" + + +# --------------------------------------------------------------------------- +# Integration: full prompt build +# --------------------------------------------------------------------------- + +class TestFullPromptBuild: + def test_prompt_contains_all_sections(self, handler): + """Full prompt has context, API, and constraints sections.""" + ctx = PromptContext( + task_id="tc-test", + title="CI 失败修复", + description="Fix CI failure", + must_haves=json.dumps({ + "event_type": "ci_failure", + "action_type": "ci_failure", + "steps": ["Fix test", "Push", "Submit report"], + "context": {"pr_number": 42}, + }), + project_id="_toolchain", + agent_id="zhangfei-dev", + event_type="ci_failure", + event_data={"pr_number": "42", "repo": "sanguo/test"}, + action_type="ci_failure", + action_steps=["Fix test", "Push", "Submit report"], + ) + + prompt = handler.build_prompt(ctx) + + # Must have action hint + assert "CI 失败" in prompt + assert "需要你修复" in prompt + # Must have steps + assert "必须执行的步骤" in prompt + assert "1. Fix test" in prompt + # Must have API section with action_report + assert "action_report" in prompt + assert "tc-test" in prompt + # Must have constraints with Red Flags + assert "Red Flags" in prompt + assert "强制要求" in prompt -- 2.45.4 From 3b9ad834050eac89cf035665fd62438bac3cefd1 Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sat, 13 Jun 2026 23:40:56 +0800 Subject: [PATCH 02/10] =?UTF-8?q?fix(lint):=20F541=20f-string=20=E6=97=A0?= =?UTF-8?q?=E5=8D=A0=E4=BD=8D=E7=AC=A6=E5=8E=BB=E6=8E=89=20f=20=E5=89=8D?= =?UTF-8?q?=E7=BC=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api/toolchain_routes.py | 20 +- src/daemon/toolchain_handler.py | 2 +- .../src/api/mention_utils.py | 169 ++ .../src/api/toolchain_routes.py | 1246 ++++++++++ .../src/daemon/prompt_composer.py | 129 + .../sanguo_moziplus_v2/src/daemon/spawner.py | 2088 +++++++++++++++++ .../src/daemon/toolchain_handler.py | 512 ++++ .../src/daemon/toolchain_templates.py | 89 + .../templates/toolchain/mention.md | 16 + .../tests/unit/test_mention_utils.py | 129 + 10 files changed, 4389 insertions(+), 11 deletions(-) create mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py create mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py create mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py create mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py create mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py create mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py create mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md create mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py diff --git a/src/api/toolchain_routes.py b/src/api/toolchain_routes.py index 20cc655..e8c1e92 100644 --- a/src/api/toolchain_routes.py +++ b/src/api/toolchain_routes.py @@ -403,7 +403,7 @@ async def _send_mention_mails( action_type="mention", steps=[ "按上方 mention 模板中的 response_guidance 执行", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "source_type": source_type, @@ -475,7 +475,7 @@ async def _handle_pr_opened(payload: Dict[str, Any]) -> None: f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", "按审查清单审查(参考 code-review Skill)", f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)— APPROVE 或 REQUEST_CHANGES", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "pr_number": pr_number, @@ -603,7 +603,7 @@ async def _handle_pull_request_review(payload: Dict[str, Any]) -> None: steps=[ f"查看评论(Gitea API: GET /repos/{repo}/issues/{pr_number}/comments)", "根据评论内容响应(修改代码或在 PR 上回复 comment)", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "pr_number": pr_number, @@ -637,14 +637,14 @@ async def _handle_pull_request_review(payload: Dict[str, Any]) -> None: if state == "APPROVED": tc_steps = [ f"合并 PR(Gitea API: POST /repos/{repo}/pulls/{pr_number}/merge)", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ] else: # REQUEST_CHANGES tc_steps = [ "按审查意见逐条修改代码", "push 到原分支 → CI 自动跑", "CI 通过后等重新 Review", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ] _send_toolchain_task( to_agent=pr_author, @@ -740,7 +740,7 @@ async def _handle_pr_synchronize(payload: Dict[str, Any]) -> None: f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", "重点检查上次 Review 意见的修改部分", f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "pr_number": pr_number, @@ -772,7 +772,7 @@ def _send_deploy_failure_task(repo: str, pr_number: int, pr_title: str, reason: "检查 deploy 日志", "排查失败原因", "修复并重新部署", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "repo": repo, @@ -1002,7 +1002,7 @@ async def _handle_issues(payload: Dict[str, Any]) -> None: "push → 等 CI", f"CI 通过后创建 PR(Gitea API: POST /repos/{repo}/pulls)", "等 Review", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "issue_number": issue_number, @@ -1037,7 +1037,7 @@ async def _handle_issues(payload: Dict[str, Any]) -> None: "检查 deploy 日志", "排查失败原因", "修复并重新部署", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "repo": repo, @@ -1120,7 +1120,7 @@ async def _handle_issue_comment(payload: Dict[str, Any]) -> None: "查看完整 CI 日志(PR 页面或 Gitea Actions 页面)", "修复失败的测试", "push → CI 自动重跑", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "pr_number": issue_number, diff --git a/src/daemon/toolchain_handler.py b/src/daemon/toolchain_handler.py index 4ecf503..65bf8a6 100644 --- a/src/daemon/toolchain_handler.py +++ b/src/daemon/toolchain_handler.py @@ -137,7 +137,7 @@ class ToolchainApiSection: "如果在执行过程中需要其他角色协助(如缺数据、需要审批等),在关联的 PR/Issue 上创建 comment @对方:", "```bash", f'curl -s -X POST "{_GITEA_BASE}/repos/{{repo}}/issues/{{pr_number}}/comments" \\', - f' -H "Authorization: token " \\', + ' -H "Authorization: token " \\', ' -H "Content-Type: application/json" \\', ' -d \'{"body": "@{agent-id} 需要你的支持:{描述问题}"}\'', "```", diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py new file mode 100644 index 0000000..5922ff0 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py @@ -0,0 +1,169 @@ +"""@mention 解析工具模块。供所有 toolchain handler 复用。""" + +import re +import logging +from typing import List, Set + +from src.config.agents import AGENT_IDS + +logger = logging.getLogger(__name__) + +# Gitea API 基地址常量(避免硬编码) +GITEA_API_BASE = "http://192.168.2.154:3000/api/v1" +GITEA_WEB_BASE = "http://192.168.2.154:3000" + +# Agent 别名映射 +# 规则: +# 1. 中文名(如"张飞")→ 完整 Agent ID +# 2. 英文短名(如"zhangfei")→ 完整 Agent ID +# 3. 前缀模糊匹配需唯一匹配(见 extract_mentions 假设 A2) +AGENT_ALIAS: dict[str, str] = { + # 中文名 + "张飞": "zhangfei-dev", + "关羽": "guanyu-dev", + "赵云": "zhaoyun-data", + "姜维": "jiangwei-infra", + "司马懿": "simayi-challenger", + "庞统": "pangtong-fujunshi", + # 字+号(常见写法) + "翼德": "zhangfei-dev", + "云长": "guanyu-dev", + "子龙": "zhaoyun-data", + "伯约": "jiangwei-infra", + "仲达": "simayi-challenger", + "士元": "pangtong-fujunshi", + # 英文短名 + "zhangfei": "zhangfei-dev", + "guanyu": "guanyu-dev", + "zhaoyun": "zhaoyun-data", + "jiangwei": "jiangwei-infra", + "simayi": "simayi-challenger", + "pangtong": "pangtong-fujunshi", +} + +# 正则:匹配 @后面跟着的合法 Agent 名(英文字母/中文/数字/连字符) +_MENTION_PATTERN = re.compile(r"@([a-zA-Z\u4e00-\u9fa5][a-zA-Z0-9\u4e00-\u9fff-]*)") + + +def extract_mentions(body: str, sender: str) -> list[str]: + """从文本中提取 @mention 的 Agent ID 列表。 + + Args: + body: 评论文本 + sender: 评论者 Gitea 用户名(用于排除自己 @自己) + + Returns: + 去重后的 Agent ID 列表 + + 匹配优先级:精确 > 别名 > 前缀模糊(需唯一匹配,多候选则跳过) + """ + candidates = _MENTION_PATTERN.findall(body) + result: Set[str] = set() + + for c in candidates: + # 1. 精确匹配(@zhangfei-dev) + if c in AGENT_IDS: + result.add(c) + # 2. 别名匹配(@张飞、@zhangfei) + elif c in AGENT_ALIAS: + result.add(AGENT_ALIAS[c]) + else: + # 3. 前缀模糊匹配(@zhangf → zhangfei-dev) + # 假设 A2:多个候选时不匹配,只 log warning + matches = [aid for aid in AGENT_IDS if aid.startswith(c)] + if len(matches) == 1: + result.add(matches[0]) + elif len(matches) > 1: + logger.warning( + "Prefix '%s' matched %d agents (%s), skipping ambiguous mention", + c, len(matches), matches) + + # 排除自己 @自己(假设 A1:Gitea login = Agent ID) + result.discard(sender) + return list(result) + + +def should_suppress_mention( + mentioned_agent: str, + auto_notify_targets: List[str], +) -> bool: + """判断 @mention 通知是否应被抑制(因为自动流转已通知同一人)。 + + Args: + mentioned_agent: 被 @的 Agent ID + auto_notify_targets: 本次事件自动流转已通知的目标列表 + + Returns: + True 表示应抑制(不发 @mention Mail) + """ + return mentioned_agent in auto_notify_targets + + +def infer_intent(body: str) -> str: + """从 @mention 内容推断意图。 + + Returns: + "help" | "notify" | "collaborate" | "assign" + """ + # 分配子任务关键词 + assign_keywords = ["交给", "分配", "负责", "认领", "做一下", "帮忙做", "implement"] + if any(kw in body for kw in assign_keywords): + return "assign" + + # 求助关键词(注意:"帮忙"已由 assign_keywords 的"帮忙做"覆盖,"请帮忙"由 collab_keywords 覆盖) + help_keywords = ["怎么", "如何", "?", "?", "什么", "哪个", "能否"] + if any(kw in body for kw in help_keywords): + return "help" + + # 协作请求关键词 + collab_keywords = ["请帮忙", "请协助", "请澄清", "请review", "请审查", "评估"] + if any(kw in body for kw in collab_keywords): + return "collaborate" + + # 默认为通知关注 + return "notify" + + +def _build_response_guidance( + intent: str, + gitea_api: str, + repo: str, + issue_number: int, + commenter: str, +) -> str: + """根据意图类型生成响应指引文本。""" + if intent == "help": + return ( + f"这是一条求助,请到 Gitea 评论回复:\n" + f"1. 获取评论上下文(上方 API)\n" + f"2. 组织回答\n" + f"3. 在 Gitea 评论回复: POST {gitea_api}/repos/{repo}/issues/{issue_number}/comments\n" + f' Body: {{"body": "你的回答内容"}}' + ) + elif intent == "notify": + return ( + f"这是一条通知,请查看并知晓。如有意见,可到 Gitea 评论:\n" + f"- 查看 Issue/PR 详情(上方 API)\n" + f"- 如有意见,评论回复: POST {gitea_api}/repos/{repo}/issues/{issue_number}/comments" + ) + elif intent == "collaborate": + return ( + f"这是一条协作请求,请评估后回复(评论或 Mail):\n" + f"1. 获取详情(上方 API)\n" + f"2. 评估可行性\n" + f"3a. 评论回复: POST {gitea_api}/repos/{repo}/issues/{issue_number}/comments\n" + f' Body: {{"body": "你的回复"}}\n' + f"3b. 或通过 Mail 回复评论者: {commenter}" + ) + elif intent == "assign": + return ( + f"这是一条任务分配,请认领并执行:\n" + f"1. 获取 Issue 详情(上方 API)\n" + f"2. 评估可行性\n" + f"3. 认领 Issue: POST {gitea_api}/repos/{repo}/issues/{issue_number}/assignees\n" + f' Body: {{"assignees": ["{{your_agent_id}}"]}}\n' + f"4. 执行任务\n" + f"5. 完成后更新 Issue 状态: PATCH {gitea_api}/repos/{repo}/issues/{issue_number}\n" + f' Body: {{"state": "closed"}}' + ) + return "请查看详情(上方 API)并按需回复。" diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py new file mode 100644 index 0000000..20cc655 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py @@ -0,0 +1,1246 @@ +"""API 路由 — 工具链事件中枢(Toolchain Event Hub) + +接收 Gitea Webhook,翻译成 Mail 通知推送给 Agent。 + +端点: POST /webhook/gitea +支持事件: pull_request, pull_request_review, issues, issue_comment +""" + +from __future__ import annotations + +import asyncio +import hashlib +import hmac +import json +import logging +import os +import re +import time +from datetime import datetime +from pathlib import Path, PurePath +from typing import Any, Dict, List, Optional, Set, Tuple + +import httpx +from fastapi import APIRouter, Header, Request, Response + +from src.blackboard.db import init_db +from src.blackboard.models import Task +from src.blackboard.operations import Blackboard +from src.config.agents import AGENT_IDS +from src.api.mention_utils import ( + extract_mentions, + should_suppress_mention, + infer_intent, + _build_response_guidance, + GITEA_API_BASE, +) +from src.daemon.toolchain_templates import render_template +from src.utils import get_data_root + +logger = logging.getLogger(__name__) + +router = APIRouter(tags=["toolchain"]) + +# --------------------------------------------------------------------------- +# 幂等检查:内存 set,保留最近 7 天 +# --------------------------------------------------------------------------- +# 使用内存 set 而非 SQLite(设计文档原计划 SQLite,简化实现:daemon 重启不频繁, +# 重启后丢失可接受,Webhook 重试窗口内不会重复) + +_delivery_cache: Set[str] = set() +_delivery_timestamps: List[Tuple[float, str]] = [] +_TTL_SECONDS = 7 * 24 * 3600 +_idempotency_lock = asyncio.Lock() + + +def _is_duplicate(event: str, delivery: str, + payload: Optional[Dict[str, Any]] = None) -> bool: + """检查 Webhook 是否重复投递,自动清理过期条目。 + + 双重去重策略: + 1. delivery UUID 去重(标准幂等) + 2. payload 内容去重(应对 Gitea v1.23.4 的 webhookNotifier + actionsNotifier + 对同一 review 生成不同 UUID 的双投递问题) + """ + now = time.time() + # 清理过期条目 + while _delivery_timestamps and ( + now - _delivery_timestamps[0][0]) > _TTL_SECONDS: + _, key = _delivery_timestamps.pop(0) + _delivery_cache.discard(key) + + # 检查 delivery UUID 去重 + key = f"{event}-{delivery}" + if key in _delivery_cache: + return True + + # 检查 payload 内容去重(review 事件:同一 PR + 同一用户 + 同一内容) + # 注意:Gitea webhookNotifier 用 review.body,actionsNotifier 用 review.content + # 所以去重 key 需要同时取两个字段,确保两种格式生成相同 key + if payload and "review" in event: + pr_num = payload.get("pull_request", {}).get("number") + sender = payload.get("sender", {}).get("login") + review = payload.get("review", {}) + # 取 body 或 content,优先 body(webhookNotifier 格式) + content = review.get("body", "") or review.get("content", "") + content_hash = hashlib.sha256(content.encode()).hexdigest()[:16] + review_id = review.get("id", "") + content_key = f"content:{event}:{pr_num}:{sender}:{review_id}:{content_hash}" + if content_key in _delivery_cache: + logger.info( + "Content-based duplicate detected: %s PR#%s by %s", + event, + pr_num, + sender) + return True + _delivery_cache.add(content_key) + _delivery_timestamps.append((now, content_key)) + + _delivery_cache.add(key) + _delivery_timestamps.append((now, key)) + return False + + +# --------------------------------------------------------------------------- +# 签名验证 +# --------------------------------------------------------------------------- + +_WEBHOOK_SECRET: Optional[str] = os.environ.get("GITEA_WEBHOOK_SECRET") + + +def _verify_signature(body: bytes, signature: Optional[str]) -> bool: + """验证 HMAC-SHA256 签名。secret 为空时跳过验签。""" + if not _WEBHOOK_SECRET: + return True + if not signature: + return False + expected = hmac.new( + _WEBHOOK_SECRET.encode(), body, hashlib.sha256 + ).hexdigest() + return hmac.compare_digest(expected, signature) + + +# --------------------------------------------------------------------------- +# Gitea API 调用 +# --------------------------------------------------------------------------- + +_GITEA_TOKEN: str = os.environ.get("GITEA_TOKEN", "") +_GITEA_BASE = "http://192.168.2.154:3000/api/v1" + + +async def _fetch_pr_files(repo: str, pr_number: int) -> Tuple[List[str], str]: + """获取 PR 文件列表,含重试机制。 + + Returns: + (文件列表, 错误信息) — 成功时错误信息为空字符串 + """ + if not _GITEA_TOKEN: + return [], "GITEA_TOKEN 未配置" + + url = f"{_GITEA_BASE}/repos/{repo}/pulls/{pr_number}/files" + headers = {"Authorization": f"token {_GITEA_TOKEN}"} + last_error = "" + for attempt in range(3): + try: + async with httpx.AsyncClient(timeout=5.0) as client: + resp = await client.get(url, headers=headers) + resp.raise_for_status() + files: List[Dict[str, Any]] = resp.json() + return [f.get("filename", "") for f in files], "" + except Exception as e: + last_error = str(e) + if attempt < 2: + await asyncio.sleep(0.5 * (attempt + 1)) + logger.warning( + "Retry %d/3 fetching PR files: %s/pulls/%d", + attempt + 1, + repo, + pr_number) + logger.warning( + "Failed to fetch PR files after 3 retries: %s/pulls/%d - %s", + repo, + pr_number, + last_error) + return [], f"获取文件列表失败(重试3次): {last_error}" + + +# --------------------------------------------------------------------------- +# 风险级别判定 +# --------------------------------------------------------------------------- + +_HIGH_PATTERNS = [ + "**/spawner*", "**/ticker*", "**/dispatcher*", + "**/router*", "**/guardrails*", "**/strategy*", "**/risk*", +] + + +def _calc_risk_level(changed_files: List[str]) -> str: + """根据改动文件列表判定风险级别。""" + for filepath in changed_files: + for pattern in _HIGH_PATTERNS: + if PurePath(filepath).match(pattern): + return "high" + return "standard" + + +# --------------------------------------------------------------------------- +# Mail 创建 +# --------------------------------------------------------------------------- + + +MAIL_PROJECT_ID = "_mail" +TOOLCHAIN_PROJECT_ID = "_toolchain" + + +def _mail_db_path() -> Path: + """获取 Mail 数据库路径,确保目录存在。""" + root = get_data_root() + db = root / MAIL_PROJECT_ID / "blackboard.db" + db.parent.mkdir(parents=True, exist_ok=True) + init_db(db) + return db + + +def _toolchain_db_path() -> Path: + """获取 Toolchain 数据库路径,确保目录和表存在。""" + root = get_data_root() + db = root / TOOLCHAIN_PROJECT_ID / "blackboard.db" + db.parent.mkdir(parents=True, exist_ok=True) + init_db(db) + return db + + +def _send_toolchain_task( + to_agent: str, + title: str, + description: str, + event_type: str, + action_type: str, + steps: list, + context_data: dict | None = None, + source: str = "webhook", +) -> str: + """创建 Toolchain Task 并写入 _toolchain DB。 + + Args: + to_agent: 收件人 Agent ID + title: 任务标题 + description: 任务描述(模板渲染后的事件信息) + event_type: 事件类型(review_result / ci_failure / ...) + action_type: 动作分类(用于步骤选择和日志统计) + steps: 结构化编号步骤列表 + context_data: 事件上下文数据(PR 号、仓库名等) + source: 来源标识 + + Returns: + 创建的 Task ID + """ + if to_agent not in AGENT_IDS: + logger.warning("Unknown agent: %s, skipping toolchain task", to_agent) + return "" + + task_id = f"tc-{int(datetime.now().timestamp() * 1000)}" + must_hives = json.dumps({ + "event_type": event_type, + "action_type": action_type, + "steps": steps, + "context": context_data or {}, + "from": "system", + "source": source, + }, ensure_ascii=False) + + task = Task( + id=task_id, + title=title, + description=description, + assignee=to_agent, + assigned_by="system", + must_haves=must_hives, + task_type="toolchain", + status="pending", + ) + bb = Blackboard(_toolchain_db_path()) + bb.create_task(task) + logger.info( + "Toolchain task sent: %s → %s [%s] action_type=%s", + title[:40], to_agent, task_id, action_type, + ) + return task_id + + +def _send_mail( + to_agent: str, + title: str, + description: str, + source: str = "webhook", +) -> str: + """创建 Mail Task 并写入数据库。 + + Args: + to_agent: 收件人 Agent ID + title: 邮件标题 + description: 邮件正文 + source: 来源标识 + + Returns: + 创建的 Mail ID + + Raises: + Exception: 数据库写入失败 + """ + if to_agent not in AGENT_IDS: + logger.warning("Unknown agent: %s, skipping mail", to_agent) + return "" + + mail_id = f"mail-{int(datetime.now().timestamp() * 1000)}" + notify_meta = { + "type": "inform", + "performative": "inform", + "is_read": False, + "conversation_id": f"conv-{mail_id}", + "from": "system", + "source": source, + } + task = Task( + id=mail_id, + title=title, + description=description, + assignee=to_agent, + assigned_by="system", + must_haves=json.dumps(notify_meta, ensure_ascii=False), + task_type="mail", + status="pending", + ) + bb = Blackboard(_mail_db_path()) + bb.create_task(task) + logger.info("Mail sent: %s → %s [%s]", title[:40], to_agent, mail_id) + return mail_id + + +# --------------------------------------------------------------------------- +# 辅助:从 payload 提取仓库全名 +# --------------------------------------------------------------------------- + + +def _repo_fullname(payload: Dict[str, Any]) -> str: + """从 Webhook payload 提取仓库全名(owner/repo)。""" + repo = payload.get("repository") or {} + return repo.get("full_name", "") + + +# --------------------------------------------------------------------------- +# @mention 通用发送函数 +# --------------------------------------------------------------------------- + + +async def _send_mention_mails( + mentions: list[str], + auto_targets: list[str], + source_type: str, + mention_type: str, + source_url: str, + commenter: str, + content: str, + repo: str, + issue_number: int, + is_pr: bool, +) -> None: + """通用 @mention Mail 发送函数。 + + 自动抑制已在 auto_targets 中的 Agent,避免双重通知。 + 根据内容推断意图,生成不同的响应指引。 + """ + # 确定 API 路径 + if is_pr: + detail_api = f"pulls/{issue_number}" + comments_api = f"issues/{issue_number}/comments" + else: + detail_api = f"issues/{issue_number}" + comments_api = f"issues/{issue_number}/comments" + + for agent_id in mentions: + if should_suppress_mention(agent_id, auto_targets): + logger.info( + "Mention suppressed for %s (already notified by auto flow)", + agent_id) + continue + + # 从 api_path 提取编号用于标题,如 "issues/32" → "#32" + number_str = f"#{issue_number}" if issue_number else "" + intent = infer_intent(content) + intent_hint = {"help": "求助", "notify": "通知关注", + "collaborate": "协作请求", "assign": "分配子任务"}[intent] + + # 生成响应指引 + guidance = _build_response_guidance( + intent=intent, + gitea_api=GITEA_API_BASE, + repo=repo, + issue_number=issue_number, + commenter=commenter, + ) + + text = render_template("mention", { + "mention_type": mention_type, + "source_type": source_type, + "source_url": source_url, + "commenter": commenter, + "intent_hint": intent_hint, + "content_snippet": content[:500], + "gitea_api": GITEA_API_BASE, + "repo": repo, + "source_detail_api_path": detail_api, + "source_comments_api_path": comments_api, + "response_guidance": guidance, + }) + + title = f"@mention ({intent_hint}): {source_type} {number_str} ({repo})" + _send_toolchain_task( + to_agent=agent_id, + title=title, + description=text, + event_type="mention", + action_type="mention", + steps=[ + "按上方 mention 模板中的 response_guidance 执行", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "source_type": source_type, + "source_url": source_url, + "commenter": commenter, + "content_snippet": content[:500], + "repo": repo, + "issue_number": issue_number, + }, + ) + + +# --------------------------------------------------------------------------- +# 事件处理函数 +# --------------------------------------------------------------------------- + + +async def _handle_pull_request(payload: Dict[str, Any]) -> None: + """处理 pull_request 事件:opened → 通知 reviewer;closed → merge 通知。""" + action = payload.get("action", "") + if action == "opened": + await _handle_pr_opened(payload) + elif action == "closed": + await _handle_pr_closed(payload) + elif action == "synchronize": + await _handle_pr_synchronize(payload) + + +async def _handle_pr_opened(payload: Dict[str, Any]) -> None: + """PR opened → 通知 simayi-challenger。""" + pr = payload.get("pull_request") + if not pr or not isinstance(pr, dict): + logger.warning( + "pull_request event missing pull_request field, skipping") + return + repo = _repo_fullname(payload) + pr_number = pr.get("number", 0) + pr_title = pr.get("title", "") + pr_author = pr.get("user", {}).get("login", "unknown") + branch = pr.get("head", {}).get("ref", "unknown") + + # 获取改动文件列表 + changed_files, fetch_error = await _fetch_pr_files(repo, pr_number) + risk_level = _calc_risk_level(changed_files) + if fetch_error: + file_list = f"⚠️ {fetch_error}" + else: + file_list = "\n".join( + f"- {f}" for f in changed_files) if changed_files else "(无文件变更)" + + text = render_template("review_request", { + "repo": repo, + "pr_number": str(pr_number), + "pr_title": pr_title, + "pr_author": pr_author, + "branch": branch, + "risk_level": risk_level, + "file_list": file_list, + }) + + title = f"Review 请求: {pr_title} ({repo}#{pr_number})" + _send_toolchain_task( + to_agent="simayi-challenger", + title=title, + description=text, + event_type="review_request", + action_type="review_request", + steps=[ + f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", + "按审查清单审查(参考 code-review Skill)", + f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)— APPROVE 或 REQUEST_CHANGES", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "pr_author": pr_author, + "branch": branch, + "risk_level": risk_level, + }, + ) + + # S3: PR body @mention 通知 + pr_body = pr.get("body", "") or "" + sender = pr.get("user", {}).get("login", "") + mentions = extract_mentions(pr_body, sender) + if mentions: + # 自动流转已通知 simayi-challenger(review_request) + auto_targets = ["simayi-challenger"] + await _send_mention_mails( + mentions=mentions, + auto_targets=auto_targets, + source_type="PR", + mention_type="PR @mention", + source_url=pr.get("html_url", ""), + commenter=sender, + content=pr_body, + repo=repo, + issue_number=pr_number, + is_pr=True, + ) + + +async def _send_review_mentions( + review_body: str, + reviewer: str, + pr_author: str, + pr: dict, + repo: str, + pr_number: int, +) -> None: + """提取并发送 Review body 中的 @mention 通知(COMMENTED / 非 COMMENTED 通用)。""" + mentions = extract_mentions(review_body, reviewer) + if mentions: + auto_targets = [pr_author] + await _send_mention_mails( + mentions=mentions, + auto_targets=auto_targets, + source_type="Review", + mention_type="Review @mention", + source_url=pr.get("html_url", ""), + commenter=reviewer, + content=review_body, + repo=repo, + issue_number=pr_number, + is_pr=True, + ) + + +async def _handle_pull_request_review(payload: Dict[str, Any]) -> None: + """处理 pull_request_review 事件:非 COMMENTED → 通知 PR 作者。 + + 支持两种 payload 格式: + - repo webhook: review.state = "APPROVED" / "REQUEST_CHANGES" + - org webhook (Gitea v1.23.4): review.type = "pull_request_review_approved" / "pull_request_review_rejected" + """ + review = payload.get("review") + if not review or not isinstance(review, dict): + logger.warning( + "pull_request_review event missing review field, skipping") + return + pr = payload.get("pull_request") + if not pr or not isinstance(pr, dict): + logger.warning( + "pull_request_review event missing pull_request field, skipping") + return + + # 兼容两种 payload 格式提取 state + state = review.get("state", "") + if not state: + # org webhook 格式:review.type = "pull_request_review_approved" + review_type = review.get("type", "") + type_map = { + "pull_request_review_approved": "APPROVED", + "pull_request_review_rejected": "REQUEST_CHANGES", + "pull_request_review_comment": "COMMENTED", + } + state = type_map.get(review_type, "") + + repo = _repo_fullname(payload) + pr_number = pr.get("number", 0) + pr_title = pr.get("title", "") + pr_author = pr.get("user", {}).get("login", "unknown") + # 兼容:org webhook 的 review 没有 user,从 sender 取 + reviewer = review.get( + "user", + {}).get( + "login", + "") or payload.get( + "sender", + {}).get( + "login", + "unknown") + review_body = review.get("body", "") or review.get("content", "(无评论)") + + if state == "COMMENTED": + # Review 评论 → 通知 PR 作者 + review_body = review.get("body", "") or review.get("content", "(无评论)") + reviewer = review.get("user", {}).get("login", "") or payload.get("sender", {}).get("login", "unknown") + + text = render_template("review_comment", { + "repo": repo, + "pr_number": str(pr_number), + "pr_title": pr_title, + "reviewer": reviewer, + "comment_body": review_body, + }) + + title = f"Review 评论: {pr_title} ({repo}#{pr_number})" + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="review_comment", + action_type="review_comment", + steps=[ + f"查看评论(Gitea API: GET /repos/{repo}/issues/{pr_number}/comments)", + "根据评论内容响应(修改代码或在 PR 上回复 comment)", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "reviewer": reviewer, + "comment_body": review_body, + }, + ) + + # S5: Review body @mention 通知(COMMENTED 路径) + await _send_review_mentions(review_body, reviewer, pr_author, pr, repo, pr_number) + + return + + result_map = {"APPROVED": "通过 ✓", "REQUEST_CHANGES": "驳回 ✗"} + if state not in result_map: + return + result = result_map[state] + + text = render_template("review_result", { + "repo": repo, + "pr_number": str(pr_number), + "pr_title": pr_title, + "reviewer": reviewer, + "result": result, + "review_body": review_body, + }) + + title = f"Review {result}: {pr_title} ({repo}#{pr_number})" + if state == "APPROVED": + tc_steps = [ + f"合并 PR(Gitea API: POST /repos/{repo}/pulls/{pr_number}/merge)", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ] + else: # REQUEST_CHANGES + tc_steps = [ + "按审查意见逐条修改代码", + "push 到原分支 → CI 自动跑", + "CI 通过后等重新 Review", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ] + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="review_result", + action_type="review_result", + steps=tc_steps, + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "result": result, + "reviewer": reviewer, + "review_body": review_body, + }, + ) + + # S5: Review body @mention 通知(非 COMMENTED 路径) + await _send_review_mentions(review_body, reviewer, pr_author, pr, repo, pr_number) + + +async def _fetch_latest_reviewer(repo: str, pr_number: int) -> str: + """查询 PR 最近一次非 PENDING review 的提交者。 + + Returns: + reviewer login 或空字符串 + """ + if not _GITEA_TOKEN: + return "" + + url = f"{_GITEA_BASE}/repos/{repo}/pulls/{pr_number}/reviews" + headers = {"Authorization": f"token {_GITEA_TOKEN}"} + + try: + async with httpx.AsyncClient(timeout=5.0) as client: + resp = await client.get(url, headers=headers) + resp.raise_for_status() + reviews = resp.json() + + # 取最后一个非 PENDING 的 review 的 user + for review in reversed(reviews): + state = review.get("state", "") + if state in ("APPROVED", "REQUEST_CHANGES", "COMMENTED"): + user = review.get("user", {}) + return user.get("login", "") + except Exception as e: + logger.warning("Failed to fetch reviews for %s#%d: %s", repo, pr_number, e) + + return "" + + +async def _handle_pr_synchronize(payload: Dict[str, Any]) -> None: + """PR 更新(新 push)→ 通知 reviewer 重新 review。 + + 查询最近一次 review 的提交者作为通知目标。 + 只在有 review 历史时才通知(避免和 opened 重复)。 + """ + pr = payload.get("pull_request") + if not pr or not isinstance(pr, dict): + return + + repo = _repo_fullname(payload) + pr_number = pr.get("number", 0) + pr_title = pr.get("title", "") + pr_author = pr.get("user", {}).get("login", "unknown") + new_sha = pr.get("head", {}).get("sha", "unknown")[:12] + + # 查询最近 review 的提交者 + reviewer = await _fetch_latest_reviewer(repo, pr_number) + if not reviewer: + # 没有已有 review 历史,fallback 到默认 reviewer + reviewer = "simayi-challenger" + logger.info("No review history for PR #%s, using default reviewer %s", pr_number, reviewer) + + text = render_template("review_updated", { + "repo": repo, + "pr_number": str(pr_number), + "pr_title": pr_title, + "pr_author": pr_author, + "new_sha": new_sha, + "reviewer": reviewer, + }) + + title = f"PR 更新: {pr_title} ({repo}#{pr_number})" + _send_toolchain_task( + to_agent=reviewer, + title=title, + description=text, + event_type="review_updated", + action_type="review_updated", + steps=[ + f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", + "重点检查上次 Review 意见的修改部分", + f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "pr_author": pr_author, + "new_sha": new_sha, + "reviewer": reviewer, + }, + ) + + +def _send_deploy_failure_task(repo: str, pr_number: int, pr_title: str, reason: str) -> None: + """CD 部署失败通知,走 ToolchainHandler。""" + text = render_template("deploy_failure", { + "repo": repo, + "commit_sha": f"PR #{pr_number}", + }) + title = f"部署失败: {repo} (auto-deploy, PR #{pr_number})" + full_text = f"{text}\n\n失败原因: {reason}" + for agent_id in ("jiangwei-infra", "pangtong-fujunshi"): + _send_toolchain_task( + to_agent=agent_id, + title=title, + description=full_text, + event_type="deploy_failure", + action_type="deploy_failure", + steps=[ + "检查 deploy 日志", + "排查失败原因", + "修复并重新部署", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "repo": repo, + "pr_number": pr_number, + "pr_title": pr_title, + "reason": reason, + }, + ) + + +async def _handle_pr_closed(payload: Dict[str, Any]) -> None: + """PR closed → 如果 merged,通知 PR 作者。""" + pr = payload.get("pull_request") + if not pr or not isinstance(pr, dict): + return + + # 只处理 merged 的 PR + if not pr.get("merged", False): + return + + repo = _repo_fullname(payload) + pr_number = pr.get("number", 0) + pr_title = pr.get("title", "") + pr_author = pr.get("user", {}).get("login", "unknown") + # merged_by 可能不在 payload 中,fallback 到 sender + merged_by = ( + pr.get("merged_by", {}).get("login", "") + or payload.get("sender", {}).get("login", "unknown") + ) + + text = render_template("review_merged", { + "repo": repo, + "pr_number": str(pr_number), + "pr_title": pr_title, + "pr_author": pr_author, + "merged_by": merged_by, + }) + + title = f"PR 已合并: {pr_title} ({repo}#{pr_number})" + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="review_merged", + action_type="review_merged", + steps=[], # 纯通知,无步骤 + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "pr_author": pr_author, + "merged_by": merged_by, + }, + ) + + # 自动部署:git pull + rsync + 按需 post_deploy + try: + import yaml + + # 加载部署配置 + config_path = Path(__file__).parent.parent.parent / "config" / "deploy-targets.yaml" + if not config_path.exists(): + return + + with open(config_path, "r", encoding="utf-8") as f: + deploy_config = yaml.safe_load(f) or {} + + targets = deploy_config.get("targets", {}) + target = targets.get(repo) + if not target: + return # 该仓库不在部署配置中,跳过 + + dev_dir = os.path.expanduser(target["dev_dir"]) + install_dir = os.path.expanduser(target.get("install_dir", target["dev_dir"])) + rsync_excludes = target.get("rsync_exclude", []) + + # Step 1: git pull in dev dir + proc = await asyncio.create_subprocess_exec( + "git", "pull", "origin", "main", + cwd=dev_dir, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30) + + if proc.returncode != 0: + logger.warning("Auto-deploy: git pull failed for %s: %s", repo, stderr.decode()) + return + + logger.info("Auto-deploy: git pull success for %s", repo) + + # Step 2: rsync to install dir + rsync_args = ["rsync", "-a"] + for exc in rsync_excludes: + rsync_args.extend(["--exclude", exc]) + rsync_args.extend([f"{dev_dir}/", f"{install_dir}/"]) + + rsync_proc = await asyncio.create_subprocess_exec( + *rsync_args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _, rsync_err = await asyncio.wait_for(rsync_proc.communicate(), timeout=60) + + if rsync_proc.returncode != 0: + logger.error("Auto-deploy: rsync failed: %s", rsync_err.decode()) + _send_deploy_failure_task(repo, pr_number, pr_title, f"rsync 失败: {rsync_err.decode()}") + return + + # Step 3: 判断是否需要执行 post_deploy + files = await _fetch_pr_files(repo, pr_number) + file_list = files[0] + needs_restart = any( + f.startswith("src/") or f.startswith("templates/") or f.startswith("frontend/") or f.endswith(".py") + for f in file_list + ) + + if needs_restart: + post_deploy_cmds = target.get("post_deploy", []) + pm2_name = target.get("pm2_name", "") + for cmd in post_deploy_cmds: + logger.info("Auto-deploy: executing post_deploy: %s", cmd) + + # M2: 检测当前进程是否会被此命令杀掉(而非脆弱的字符串匹配) + # 通过 PM2 环境变量判断:pm2 启动的进程有 PM2_HOME + self_restart = False + if pm2_name and os.environ.get("PM2_HOME") and "pm2 restart" in cmd: + # 检查命令是否包含当前进程名 + if re.search(rf'pm2\s+restart\s+{re.escape(pm2_name)}', cmd): + self_restart = True + + if self_restart: + # M1: 用 asyncio.sleep 延迟而非 nohup,保留子进程输出和错误检测 + # 先 sleep 让 handler 正常返回,再启动 restart 命令 + # restart 的子进程会在父进程死后被 pm2 新进程接管 + logger.info("Auto-deploy: self-restart detected, deferring 2s: %s", cmd) + await asyncio.sleep(2) + deploy_proc = await asyncio.create_subprocess_exec( + "sh", "-c", cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + # restart 会杀掉当前进程,communicate 可能不会完成 + # 但我们至少尝试读取输出 + try: + _, deploy_err = await asyncio.wait_for( + deploy_proc.communicate(), timeout=10) + except (asyncio.TimeoutError, ProcessLookupError): + # 预期行为:进程被 pm2 restart 杀掉 + logger.info("Auto-deploy: process killed by self-restart (expected)") + break + else: + deploy_proc = await asyncio.create_subprocess_exec( + "sh", "-c", cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _, deploy_err = await asyncio.wait_for(deploy_proc.communicate(), timeout=30) + + if deploy_proc.returncode != 0: + logger.error("Auto-deploy: post_deploy failed: %s", deploy_err.decode()) + _send_deploy_failure_task(repo, pr_number, pr_title, f"post_deploy 失败 ({cmd}): {deploy_err.decode()}") + break + else: + logger.info("Auto-deploy: all post_deploy commands succeeded (files: %s)", ", ".join(file_list[:5])) + else: + logger.info("Auto-deploy: docs-only change for %s, skip post_deploy", repo) + + except asyncio.TimeoutError: + logger.error("Auto-deploy: timeout for %s", repo) + _send_deploy_failure_task(repo, pr_number, pr_title, "部署超时") + except Exception as e: + logger.error("Auto-deploy: unexpected error: %s", e) + + +async def _handle_issues(payload: Dict[str, Any]) -> None: + """处理 issues 事件:assigned → 通知被指派人;opened+部署失败 → 通知运维。""" + action = payload.get("action", "") + issue = payload.get("issue") + if not issue or not isinstance(issue, dict): + logger.warning("issues event missing issue field, skipping") + return + repo = _repo_fullname(payload) + issue_number = issue.get("number", 0) + issue_title = issue.get("title", "") + + if action == "assigned": + assignee = "" + assignees = issue.get("assignees") or [] + if not assignees: + single = issue.get("assignee") + if single and isinstance(single, dict): + assignees = [single] + if assignees: + assignee = assignees[-1].get("login", "") + else: + assignee = "" + if not assignee: + logger.debug("Issue assigned but no assignee found, skipping") + return + + labels_list = [lbl.get("name", "") + for lbl in (issue.get("labels") or [])] + labels = ", ".join(labels_list) if labels_list else "(无标签)" + issue_body = issue.get("body", "(无描述)") + brief = issue_title[:20].replace(" ", "-").lower() + + text = render_template("issue_assigned", { + "repo": repo, + "issue_number": str(issue_number), + "issue_title": issue_title, + "labels": labels, + "issue_body": issue_body or "(无描述)", + "brief": brief, + }) + + title = f"Issue 指派: {issue_title} ({repo}#{issue_number})" + _send_toolchain_task( + to_agent=assignee, + title=title, + description=text, + event_type="issue_assigned", + action_type="issue_assigned", + steps=[ + f"创建分支 fix/{issue_number}-{brief}", + "编码 + 写 UT", + "push → 等 CI", + f"CI 通过后创建 PR(Gitea API: POST /repos/{repo}/pulls)", + "等 Review", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "issue_number": issue_number, + "repo": repo, + "issue_title": issue_title, + "labels": labels, + "issue_body": issue_body or "(无描述)", + "brief": brief, + }, + ) + + elif action == "opened": + if "部署失败" in issue_title: + # 从 Issue body 提取 commit hash(Gitea deploy workflow 格式) + sha_match = re.search(r'[0-9a-f]{40}', issue.get("body", "")) + commit_sha = sha_match.group(0) if sha_match else "(未知)" + + text = render_template("deploy_failure", { + "repo": repo, + "commit_sha": commit_sha or "(未知)", + }) + + title = f"部署失败: {repo}" + for agent_id in ("jiangwei-infra", "pangtong-fujunshi"): + _send_toolchain_task( + to_agent=agent_id, + title=title, + description=text, + event_type="deploy_failure", + action_type="deploy_failure", + steps=[ + "检查 deploy 日志", + "排查失败原因", + "修复并重新部署", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "repo": repo, + "commit_sha": commit_sha or "(未知)", + }, + ) + + # Issue body @mention(opened 时检查) + issue_body = issue.get("body", "") or "" + sender = payload.get("sender", {}).get("login", "") + mentions = extract_mentions(issue_body, sender) + if mentions: + # 自动流转已通知 assignee + assignees = issue.get("assignees") or [] + if not assignees: + single = issue.get("assignee") + if single and isinstance(single, dict): + assignees = [single] + auto_targets = [a.get("login", "") for a in assignees if isinstance(a, dict)] + await _send_mention_mails( + mentions=mentions, + auto_targets=auto_targets, + source_type="Issue", + mention_type="Issue @mention", + source_url=issue.get("html_url", ""), + commenter=sender, + content=issue_body, + repo=repo, + issue_number=issue_number, + is_pr=False, + ) + + +async def _handle_issue_comment(payload: Dict[str, Any]) -> None: + """处理 issue_comment 事件:CI 失败关键词 → 通知 PR 作者;@mention → 通知被提及者。""" + comment = payload.get("comment") + if not comment or not isinstance(comment, dict): + logger.warning("issue_comment event missing comment field, skipping") + return + body = comment.get("body", "") + sender = comment.get("user", {}).get("login", "") + + issue = payload.get("issue") + if not issue or not isinstance(issue, dict): + logger.warning("issue_comment event missing issue field, skipping") + return + + action = payload.get("action", "") + if action != "created": + return + + # === 路径 1:CI 失败通知(原有逻辑,改为正向 if) === + if ("[CI]" in body or "CI 失败" in body) and issue.get("state") != "closed": + repo = _repo_fullname(payload) + issue_number = issue.get("number", 0) + + # 尝试从关联 PR 获取信息 + pr_author = issue.get("user", {}).get("login", "unknown") + branch_match = re.search(r"分支:\s*(\S+)", body) + branch = branch_match.group(1) if branch_match else "(未知)" + + # 提取错误摘要(取 comment body 前 500 字符) + error_summary = body[:500] if body else "(无错误信息)" + + text = render_template("ci_failure", { + "repo": repo, + "pr_number": str(issue_number), + "branch": branch, + "error_summary": error_summary, + }) + + title = f"CI 失败: {repo}#{issue_number}" + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="ci_failure", + action_type="ci_failure", + steps=[ + "查看完整 CI 日志(PR 页面或 Gitea Actions 页面)", + "修复失败的测试", + "push → CI 自动重跑", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": issue_number, + "repo": repo, + "branch": branch, + "error_summary": error_summary, + }, + ) + # CI 处理完不 return,继续检查 @mention + + # === 路径 2:@mention 通知(新增,独立路径) === + # 注意:@mention 检测与 CI 检测是独立的,同一条评论可同时触发两者 + mentions = extract_mentions(body, sender) + if mentions: + # 判断是 PR 还是 Issue(Gitea 中 PR 本质是特殊的 Issue) + is_pr = issue.get("pull_request") is not None + source_type = "PR" if is_pr else "Issue" + mention_type = "PR @mention" if is_pr else "Issue @mention" + + issue_number = issue.get("number", 0) + repo = _repo_fullname(payload) + + # 自动流转已通知的人(CI 失败通知的 PR 作者) + auto_targets: list[str] = [] + if ("[CI]" in body or "CI 失败" in body) and issue.get("state") != "closed": + auto_targets.append(issue.get("user", {}).get("login", "")) + + await _send_mention_mails( + mentions=mentions, + auto_targets=auto_targets, + source_type=source_type, + mention_type=mention_type, + source_url=issue.get("html_url", ""), + commenter=sender, + content=body, + repo=repo, + issue_number=issue_number, + is_pr=is_pr, + ) + + +# --------------------------------------------------------------------------- +# 事件分发 +# --------------------------------------------------------------------------- + +_EVENT_HANDLERS: Dict[str, Any] = { + "pull_request": _handle_pull_request, + "pull_request_sync": _handle_pr_synchronize, # Gitea: PR branch push 是独立事件类型 + "pull_request_review": _handle_pull_request_review, + "pull_request_review_approved": _handle_pull_request_review, + "pull_request_review_rejected": _handle_pull_request_review, + "pull_request_review_comment": _handle_pull_request_review, + "pull_request_comment": _handle_pull_request_review, # Gitea: review comment 独立事件类型 + # Gitea v1.23.4 实际发出的 review 子事件(无 _review_ 中间段) + "pull_request_approved": _handle_pull_request_review, + "pull_request_rejected": _handle_pull_request_review, + "issues": _handle_issues, + "issue_comment": _handle_issue_comment, +} + + +# --------------------------------------------------------------------------- +# Webhook 端点 +# --------------------------------------------------------------------------- + + +@router.post("/webhook/gitea") +async def gitea_webhook( + request: Request, + x_gitea_event: Optional[str] = Header(None, alias="X-Gitea-Event"), + x_gitea_delivery: Optional[str] = Header(None, alias="X-Gitea-Delivery"), + x_gitea_signature: Optional[str] = Header(None, alias="X-Gitea-Signature"), +) -> Response: + """Gitea Webhook 接收端点。 + + 处理流程:签名验证 → 幂等检查 → 事件分发 → Mail 推送。 + + 返回策略: + - payload 解析失败 / 未知事件 / 幂等重复 → 200(不触发重试) + - Mail 创建失败 → 500(触发 Gitea 重试) + """ + body = await request.body() + + # 1. 签名验证 + if not _verify_signature(body, x_gitea_signature): + logger.warning("Webhook signature verification failed") + return Response(status_code=403, + content="signature verification failed") + + # 3. 解析 payload(提前解析,用于幂等检查) + try: + payload = await request.json() + except Exception: + logger.warning("Failed to parse webhook payload") + return Response(status_code=200, content="invalid payload") + + # 2. 幂等检查(需要在 payload 解析后,以支持内容去重) + if x_gitea_event and x_gitea_delivery: + async with _idempotency_lock: + if _is_duplicate(x_gitea_event, x_gitea_delivery, payload): + logger.debug( + "Duplicate webhook: %s/%s", + x_gitea_event, + x_gitea_delivery) + return Response(status_code=200, content="duplicate") + + # 4. 查找 handler + action = payload.get("action", "") + logger.info("[WEBHOOK] event=%s action=%s delivery=%s", x_gitea_event, action, x_gitea_delivery) + handler = _EVENT_HANDLERS.get(x_gitea_event or "") + if not handler: + logger.info("[WEBHOOK] Unhandled event type: %s", x_gitea_event) + return Response(status_code=200, + content=f"unhandled event: {x_gitea_event}") + + # 5. 执行 handler + try: + await handler(payload) + except Exception: + logger.exception("Mail creation failed for %s event", x_gitea_event) + return Response(status_code=500, content="mail creation failed") + + return Response(status_code=200, content="ok") diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py new file mode 100644 index 0000000..bf7908d --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py @@ -0,0 +1,129 @@ +""" +prompt_composer.py — PromptSection Protocol + PromptContext + PromptComposer + +拼装器:有序管理 prompt 段落,按优先级排序后合并为最终 prompt。 +""" + +import logging +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Protocol, runtime_checkable + +logger = logging.getLogger("moziplus-v2.prompt_composer") + +# --------------------------------------------------------------------------- +# Section 优先级范围约定 +# --------------------------------------------------------------------------- +PRIORITY_CONTEXT = 10 # 任务上下文 +PRIORITY_PRIOR = 20 # 前序信息 +PRIORITY_ROLE = 30 # 角色规范 +PRIORITY_API = 40 # API 操作指令 +PRIORITY_CONSTRAINTS = 50 # 硬约束 +PRIORITY_EXTENSION = 60 # 扩展段 + + +# --------------------------------------------------------------------------- +# PromptSection Protocol +# --------------------------------------------------------------------------- +@runtime_checkable +class PromptSection(Protocol): + """一个 prompt 段""" + + name: str # 段名(去重用,同名覆盖) + priority: int # 排序优先级(小数字=靠前) + + def render(self, context: "PromptContext") -> str: + """渲染此段的文本内容。返回空字符串表示不注入。""" + ... + + def should_include(self, context: "PromptContext") -> bool: + """是否注入此段(默认 True,条件段可覆盖)。""" + ... + + +# --------------------------------------------------------------------------- +# PromptContext 数据对象 +# --------------------------------------------------------------------------- +@dataclass +class PromptContext: + """Prompt 渲染的统一上下文""" + + task_id: str + title: str + description: str + must_haves: str + project_id: str + agent_id: str + + task: Optional[Dict] = None + role: str = "executor" + spawn_type: str = "executor" + + # mail 专用 + from_agent: str = "" + mail_type: str = "" # inform / request + + # toolchain 专用 + event_type: str = "" # ci_failure / review_request / ... + event_data: Dict = field(default_factory=dict) + action_type: str = "" # 动作分类(review_result / ci_failure / ...) + action_steps: list = field(default_factory=list) # 结构化编号步骤列表 + + # 前序产出 + depends_on_outputs: Optional[List] = None + + +# --------------------------------------------------------------------------- +# PromptComposer 拼装器 +# --------------------------------------------------------------------------- +class PromptComposer: + """有序拼装 prompt sections""" + + SEPARATOR = "\n\n---\n\n" + TOKEN_BUDGET_WARN = 800 # token 预算警告阈值 + CHARS_PER_TOKEN = 3.5 # 估算比率 + + def __init__(self) -> None: + self._sections: List[PromptSection] = [] + + def add(self, section: PromptSection) -> None: + """添加一个 section(同名覆盖)""" + self._sections = [s for s in self._sections if s.name != section.name] + self._sections.append(section) + + def add_many(self, sections: List[PromptSection]) -> None: + """批量添加""" + for s in sections: + self.add(s) + + def compose(self, context: PromptContext) -> str: + """拼装最终 prompt + + 1. 过滤 should_include=False 的段 + 2. 按 priority 排序 + 3. 逐段 render + 4. 过滤空段 + 5. 用分隔符连接 + 6. Token 预算警告(不截断) + """ + active = [s for s in self._sections if s.should_include(context)] + active.sort(key=lambda s: s.priority) + + parts = [s.render(context) for s in active] + parts = [p for p in parts if p.strip()] + + result = self.SEPARATOR.join(parts) + + # Token 估算 + tokens = max(1, int(len(result) / self.CHARS_PER_TOKEN)) + logger.debug( + "Composed prompt from %d sections, %d tokens", + len(parts), tokens, + ) + + if tokens > self.TOKEN_BUDGET_WARN: + logger.warning( + "Prompt exceeds %d token budget: %d tokens (task_id=%s)", + self.TOKEN_BUDGET_WARN, tokens, context.task_id, + ) + + return result diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py new file mode 100644 index 0000000..28451bb --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py @@ -0,0 +1,2088 @@ +"""Agent Spawner - 异步 spawn Full Agent / Subagent + +Full Agent: asyncio.create_subprocess_exec(异步非阻塞,不 await 完成) +Subagent: 占位(实际通过 OpenClaw Gateway API sessions_spawn,F17 完善) +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +import uuid +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional + +from src.blackboard.db import get_connection +from src.daemon.task_type_registry import TaskTypeRegistry + +logger = logging.getLogger("moziplus-v2.spawner") + + +# ── Prompt 模板 ── + +# Mail 专用模板:inform 类型(纯通知,状态由系统管理) +MAIL_INFORM_TEMPLATE = """你收到一封飞鸽传书(纯通知)。 + +发件者: {from_agent} +主题: {title} +内容: {text} + +已阅即可。如需回复,用 in_reply_to 回复发件者(不需要填 to)。 +⚠️ 不要执行任何状态转换命令。 +""" + +# Mail 专用模板:request 类型(需要处理并回复,状态由系统管理) +MAIL_REQUEST_TEMPLATE = """你收到一封飞鸽传书,需要你处理并回复。 + +发件者: {from_agent} +主题: {title} +内容: {text} + +### 如何回复发件者 + +curl -s -X POST http://localhost:8083/api/mail \\ + -H 'Content-Type: application/json' \\ + -d '{{"from": "{agent_id}", "in_reply_to": "{task_id}", "title": "回复: {title}", "text": "你的回复内容"}}' + +⚠️ 不需要填 "to",系统自动回复给发件者。 + +### 如何给其他人发新邮件 + +curl -s -X POST http://localhost:8083/api/mail \\ + -H 'Content-Type: application/json' \\ + -d '{{"from": "{agent_id}", "to": "对方agent-id", "title": "标题", "text": "正文", "type": "inform"}}' + +⚠️ to 必须是有效的 agent id: {valid_agents} +⚠️ 纯通知用 type=inform,需要对方回复不填 type(默认 request) +⚠️ 不能给自己发邮件 +⚠️ 不要执行任何状态转换命令(标 working/done/review/failed 等),系统会自动处理。 +""" + +SPAWN_PROMPT_TEMPLATE = """{identity_section} + +## 任务 +{title} +{description} + +项目: {project_id} | ID: {task_id} +类型: {task_type} | 优先级: {priority} +验收标准: {must_haves} + +{retry_context} + +## 你能做什么 +- 读任务详情(含依赖、讨论、产出): GET {api_base}/projects/{project_id}/tasks/{task_id}?expand=all +- 读所有活跃任务: GET {api_base}/projects/{project_id}/tasks?status=working,claimed,review +- 写产出: POST {api_base}/projects/{project_id}/tasks/{task_id}/outputs +- 写评论/交接: POST {api_base}/projects/{project_id}/tasks/{task_id}/comments +- 更新状态: POST {api_base}/projects/{project_id}/tasks/{task_id}/status +- 创建子任务: POST {api_base}/projects/{project_id}/tasks +- 认领任务: POST {api_base}/projects/{project_id}/tasks/{{{{id}}}}/claim + +## 约束 +- 完成后必须写产出物(output)并标 review,不能无产出就提交 +- 失败了标 failed 并写明原因 +- 产出物 handoff comment ≥ 50 字符(用于系统验证) +- 禁止使用 sessions_send 直接发消息(用 Mail API 或黑板 comment) +- 委托他人做事用黑板 comment @agent-id,系统自动路由(如 @zhaoyun-data 你来获取数据,无需手动传 mentions 数组) +- 安全红线: {guardrails_summary} + +### API 请求体示例 +写产出: POST .../outputs +```json +{{{{"agent": "{agent_id}", "content_type": "code", "title": "产出标题", "content_path": "/path/to/file", "summary": "简要说明"}}}} +``` + +写评论: POST .../comments +```json +{{{{"author": "{agent_id}", "body": "评论内容(≥50字符)", "comment_type": "handoff"}}}} +``` +""" + + +DISCUSSION_PROMPT_TEMPLATE = """你被 spawn 来参与黑板讨论。这是一个 v2.9 四相循环的讨论环节。 + +## 你的任务 + +{goal_snapshot} + +## 约束 + +{constraints} + +## 黑板 API + +你可以随时: +- 读黑板:GET http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}?expand=all(含 comments、outputs) +- 写 comment:POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/comments + body: {{"author": "{agent_id}", "body": "内容(@agent-id 自动路由)"}} +- 创建 sub task:POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks + body: {{"title": "...", "description": "...", "task_type": "...", "parent_task": "{task_id}", "must_haves": "{{\"capability\": \"...\"}}"}} +- 认领任务:POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{{sub_task_id}}/claim + +## 行为准则 + +1. **你是自主的。**读黑板、思考、行动,不要等指令。 +2. **不重复别人的工作。**动手前先读黑板看谁在做什么(Separation)。 +3. **保持方向对齐。**你的产出方向和 parent goal 对齐,不确定时 @pangtong-fujunshi(Alignment)。 +4. **产出可共享。**产出写入黑板,让其他人能看到你的成果(Cohesion)。 +5. **不越界。**安全红线不要碰,超出能力的 @ 庞统升级(Boundary)。 +6. **随时讨论。**执行过程中需要协作时 @ 对应 Agent,讨论是灵活的不是固定阶段的。 + +## 讨论完成后 + +- 如果讨论收敛到可执行的任务,直接创建 sub task +- 如果有分歧或不确定,在黑板上写 comment @ 庞统裁决 +- 标记完成: +```bash +curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/status \ + -H 'Content-Type: application/json' \ + -d '{{"status": "done", "agent": "{agent_id}"}}' +``` +""" + + +# Mail 续杯专用模板:不包含状态转换指令(系统自动标 done) +MAIL_RETRY_PROMPT = """你收到一个续杯提醒。你的任务在执行过程中被中断了。 + +发件者: {from_agent} +主题: {title} +续杯次数: 第 {retry_count} 次(上限 {max_retries} 次) + +请检查 session 历史中你之前做了什么,然后继续未完成的工作。 + +⚠️ 不要执行任何状态转换命令(标 working/done/review/failed 等),系统会自动处理。 +⚠️ 如果任务已完成,直接写产出即可,不要调 status API。 +""" + + +class AgentBusyError(Exception): + """Agent 无法 spawn(被占用/冷却/session 锁等) + + #07: reason 字段区分具体原因,便于 dispatcher 层区分处理。 + """ + + def __init__(self, agent_id: str, reason: str = "busy", + detail: Optional[dict] = None): + self.agent_id = agent_id + # counter_blocked / session_locked / session_running / session_compacting / session_stuck + self.reason = reason + self.detail = detail or {} + super().__init__(f"{agent_id}: {reason}") + + +class AgentSpawner: + """Agent spawn 管理""" + + def __init__( + self, + db_path: Optional[Path] = None, + agent_timeout: float = 630.0, + dry_run: bool = False, + api_host: str = "127.0.0.1", + api_port: int = 8083, + bootstrap_builder: Optional[Any] = None, + gateway_timeout: float = 600.0, + max_retries: int = 3, + max_monitor_timeouts: int = 3, + counter: Optional[Any] = None, + ): + """ + Args: + db_path: 项目黑板 DB 路径(用于写 task_attempts) + agent_timeout: Agent 超时秒数 + dry_run: 测试模式,不实际 spawn + api_host: API 地址(供 Agent 回写) + api_port: API 端口(供 Agent 回写) + """ + self.db_path = db_path + self.agent_timeout = agent_timeout + self.dry_run = dry_run + self.api_host = api_host + self.api_port = api_port + self.bootstrap_builder = bootstrap_builder + self.gateway_timeout = gateway_timeout + self.max_retries = max_retries + self.max_monitor_timeouts = max_monitor_timeouts + # v2.7.2: counter 引用(spawn_full_agent 内部 acquire/release) + self.counter = counter + # guardrails: 由 main.py 在初始化后赋值 + self.guardrails = None + + # session 注册表 {session_id: {...}} + self._sessions: Dict[str, Dict[str, Any]] = {} + # B2 compact 等待计数器 {task_id: count} + self._compact_waits: Dict[str, int] = {} + # B1 假死计数器 {task_id: count} + self._stuck_counts: Dict[str, int] = {} + self._valid_agents_cache: Optional[set] = None + + def _load_valid_agents(self) -> set: + """从 config/default.yaml 读取有效 Agent ID 列表(带缓存)""" + if self._valid_agents_cache is not None: + return self._valid_agents_cache + config_path = Path(__file__).parent.parent / "config" / "default.yaml" + if config_path.exists(): + try: + import yaml + with open(config_path) as f: + cfg = yaml.safe_load(f) + profiles = cfg.get("daemon", {}).get("agent_profiles", {}) + if profiles: + self._valid_agents_cache = set(profiles.keys()) + return self._valid_agents_cache + except Exception: + pass + self._valid_agents_cache = { + "zhangfei-dev", "guanyu-dev", "zhaoyun-data", + "jiangwei-infra", "pangtong-fujunshi", "simayi-challenger" + } + return self._valid_agents_cache + + @property + def active_sessions(self) -> Dict[str, Dict[str, Any]]: + """当前活跃的 spawn sessions""" + return {sid: s for sid, s in self._sessions.items() + if s.get("status") == "running"} + + def build_spawn_message( + self, + task_id: str, + title: str, + description: str, + task_type: str = "", + priority: int = 5, + must_haves: str = "", + project_id: str = "", + agent_id: str = "", + current_status: str = "claimed", + retry_context: str = "", + task: Optional[Any] = None, + project_config: Optional[Dict[str, Any]] = None, + spawn_type: str = "executor", # executor | discussion | review + ) -> str: + """构建 Agent spawn 的消息(优先用 BootstrapBuilder,fallback 用模板) + + Args: + current_status: 任务当前状态(动态生成状态机提示) + retry_context: 重试上下文(前轮产出摘要 + 审查意见) + task: Task 对象(BootstrapBuilder 用) + project_config: 项目配置(BootstrapBuilder 用) + spawn_type: spawn 类型(executor=执行, discussion=讨论, review=审查) + """ + # discussion 类型直接用模板(不走 BootstrapBuilder) + if spawn_type == "discussion": + return self._build_discussion_prompt( + task_id, title, description, must_haves, + project_id, agent_id) + + # handler 路径:Task/Mail/Toolchain 用各自的 PromptSection 构建 + handler = TaskTypeRegistry.get_by_project(project_id) + if handler: + from src.daemon.prompt_composer import PromptContext + # 从 must_haves 解析 mail 元数据(from / performative) + from_agent = "" + mail_type = "" + action_type = "" + action_steps = [] + try: + meta = json.loads(must_haves) if must_haves else {} + from_agent = meta.get("from", "") + mail_type = meta.get("performative", meta.get("type", "")) + # toolchain 字段提取 + action_type = meta.get("action_type", "") + action_steps = meta.get("steps", []) + except Exception: + pass + ctx = PromptContext( + task_id=task_id, title=title, description=description or "", + must_haves=must_haves or "", project_id=project_id, + agent_id=agent_id, role=spawn_type, + spawn_type=spawn_type, + from_agent=from_agent, mail_type=mail_type, + action_type=action_type, action_steps=action_steps, + ) + return handler.build_prompt(ctx) + + # 旧路径保留:_general 等非 handler 项目 + + # 走 BootstrapBuilder 新路径 + if self.bootstrap_builder and task is not None: + role_map = { + "executor": "executor", + "review": "reviewer", + "discussion": "planner"} + role = role_map.get(spawn_type, "executor") + bootstrap_prompt = self.bootstrap_builder.build_for_task( + task=task, + role=role, + ) + api_section = self._build_api_section( + project_id, task_id, agent_id) + return bootstrap_prompt + "\n\n---\n\n" + api_section + + # 无 BootstrapBuilder 或无 task 对象 → 最小 fallback + # 只保留任务上下文 + API 操作指令 + logger.warning( + "No BootstrapBuilder or task object, using minimal fallback") + return self._build_minimal_fallback( + task_id, title, description, must_haves, + project_id, agent_id) + + def _build_minimal_fallback(self, task_id, title, description, must_haves, + project_id, agent_id): + """最小 fallback:只有任务上下文 + API 指令""" + task_section = f"""## 任务 +{title} +{description or "(无描述)"} + +项目: {project_id} | ID: {task_id} +验收标准: {must_haves or "(无)"}""" + api_section = self._build_api_section(project_id, task_id, agent_id) + return task_section + "\n\n---\n\n" + api_section + + def _build_api_section(self, project_id: str, task_id: str, + agent_id: str) -> str: + """构建 API 回写操作指令(BootstrapBuilder 模式下补充)""" + # handler 项目(_mail/_toolchain)的 success_status 由 PromptSection 处理 + # 这里只处理无 handler 的项目(normal task) + handler = TaskTypeRegistry.get_by_project(project_id) + if handler: + success_status = '"done"' if handler.target_success_status == "done" else '"review"' + else: + success_status = '"review"' + return f"""## 操作指令 + +### 状态回写 +开始工作: +```bash +curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/tasks/{task_id}/status \ + -H 'Content-Type: application/json' \ + -d '{{"status": "working", "agent": "{agent_id}"}}' +``` + +### 写入产出 +```bash +curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/tasks/{task_id}/outputs \ + -H 'Content-Type: application/json' \ + -d '{{"agent": "{agent_id}", "type": "<类型>", "title": "<标题>", "content": "<内容>", "summary": "<摘要>"}}' +``` + +### 完成后 +成功:status → {success_status} | 失败:status → "failed" +""" + + def _build_discussion_prompt(self, task_id: str, title: str, + description: str, must_haves: str, + project_id: str, agent_id: str) -> str: + """构建讨论类 spawn prompt(§3.3 框架 + Boids)""" + goal_snapshot = description or title + constraints = must_haves or "(无特殊约束)" + + return DISCUSSION_PROMPT_TEMPLATE.format( + goal_snapshot=goal_snapshot, + constraints=constraints, + project_id=project_id, + task_id=task_id, + agent_id=agent_id, + api_host=self.api_host, + api_port=self.api_port, + ) + + def _inject_agent_identity(self, agent_id: str) -> str: + """#03: 注入 Agent 身份+专长""" + caps = "通用" + router = getattr(self, '_router_ref', None) + if router: + profile = router.agent_profiles.get(agent_id) + if profile and getattr(profile, 'capabilities_zh', None): + caps = ", ".join(profile.capabilities_zh) + return f"你是 {agent_id},专长: {caps}。" + + def _get_guardrails_summary(self) -> str: + """#03: 从 GuardrailEngine 提取红线摘要""" + if not self.guardrails: + return "无特殊限制" + try: + return "、".join(r.get("name", r.get("rule_id", "")) + for r in self.guardrails.rules[:6]) + except Exception: + return "无特殊限制" + + def _get_agent_profile(self, agent_id: str): + """获取 Agent 能力画像""" + router = getattr(self, '_router_ref', None) + if router: + return router.agent_profiles.get(agent_id) + return None + + def _build_mail_prompt(self, task_id: str, title: str, description: str, + must_haves: str, agent_id: str) -> str: + """构建 Mail 专用精简模板""" + # 解析 must_haves 获取 from 和 performative + from_agent = agent_id + performative = "request" + try: + meta = json.loads(must_haves) if must_haves else {} + from_agent = meta.get("from", agent_id) + performative = meta.get( + "performative", meta.get( + "type", "request")) + except Exception: + pass + + # 截断 title 和 text 用于模板安全 + safe_title = (title or "").replace('"', '\\"')[:100] + safe_text = (description or "").replace('"', '\\"') + + # 获取有效 Agent 列表(从 config/default.yaml 读取) + valid_agents_list = self._load_valid_agents() + valid_agents_str = " / ".join(sorted(valid_agents_list)) + + common_kwargs = dict( + from_agent=from_agent, + title=safe_title, + text=safe_text, + task_id=task_id, + agent_id=agent_id, + api_host=self.api_host, + api_port=self.api_port, + valid_agents=valid_agents_str, + ) + + if performative == "inform": + return MAIL_INFORM_TEMPLATE.format(**common_kwargs) + else: + return MAIL_REQUEST_TEMPLATE.format(**common_kwargs) + + async def spawn_full_agent( + self, + agent_id: str, + message: str, + new_session: bool = False, + task_id: Optional[str] = None, + on_complete: Optional[Any] = None, + use_main_session: bool = False, + task_db_path: Optional[Path] = None, + reuse_session_id: Optional[str] = None, + on_checks_passed: Optional[Any] = None, + skip_counter: bool = False, + broadcast_task_ids: Optional[List[str]] = None, + ) -> str: + """Spawn Full Agent(异步非阻塞) + + v2.7.2: counter acquire/release 在内部统一管理。 + 调用级生命周期:spawn 时 acquire,进程退出时 release(通过 wrapped_on_complete)。 + + Args: + on_complete: 业务回调(agent_id, outcome) - 不含 counter.release, + counter.release 由内部 wrapped_on_complete 保证。 + use_main_session: True = 投递到主 Agent session(不传 --session-id) + on_checks_passed: 所有检查通过后的回调(session check + counter acquire 后、subprocess 前) + reuse_session_id: 传入指定 session-id 复用(用于续杯) - deprecated,use_main_session=True 已替代 + + Returns: + session_id + + Raises: + AgentBusyError: agent 被 counter 占用或冷却中 + """ + # ── #07 Acquire-First: counter 前置 → session check 在锁内贴近 spawn ── + + # Step 0: 分配 session_id(纯计算,无 IO) + if use_main_session: + session_id = None + elif reuse_session_id: + session_id = reuse_session_id + else: + session_id = str(uuid.uuid4()) + _sid_key = session_id or "main" # counter 用的 key + + # Phase 0: Pre-acquire 修复(无锁) + # timeout/failed 状态先修复再 acquire。revive 只改 running→idle,幂等安全。 + # asyncio 协作式并发保证同一时刻只有一个协程在执行,revive 的 sessions.json + # 写操作不会真正并行。 + if use_main_session: + pre_state = self._check_session_state(agent_id) + if pre_state.get("status") in ("timeout", "failed"): + logger.info("Phase 0: %s status=%s, reviving before acquire", + agent_id, pre_state["status"]) + self._revive_session(agent_id) + elif pre_state.get("status") == "running" and not pre_state.get("lock_pid_alive"): + # status=running 但 lock PID 已死 → 假死,revive + logger.warning( + "Phase 0: %s status=running but lock PID dead, reviving", + agent_id) + self._revive_session(agent_id) + + # Phase 1: Counter acquire(互斥锁) + # v2.8.1 Bug-4 fix: retry 时跳过 counter(counter 从原始 spawn 保持到 retry 完成) + if self.counter and not skip_counter: + acquired = await self.counter.acquire(agent_id, _sid_key) + if not acquired: + raise AgentBusyError(agent_id, reason="counter_blocked") + + # Phase 2: Session check(在锁保护下,贴近 spawn) + # 并列收集所有 block 原因,统一判定。 + if use_main_session: + session_state = self._check_session_state(agent_id) + logger.info("Phase 2 session check for %s: status=%s lock_pid=%s lock_pid_alive=%s compact=%s", + agent_id, session_state.get( + 'status'), session_state.get('lock_pid'), + session_state.get('lock_pid_alive'), session_state.get('recent_compact')) + + blockers = [] + if session_state.get( + "lock_pid_alive") and not session_state.get("lock_expired"): + blockers.append( + ("session_locked", session_state.get("lock_pid"))) + if session_state.get("status") == "running": + if session_state.get("lock_pid_alive"): + # 真 running:外部进程占用 + blockers.append(("session_running", None)) + else: + # 假 running:lock PID 死了但 status 还在 running → Phase 2.5 处理 + pass + if session_state.get("recent_compact"): + blockers.append(("session_compacting", None)) + + if blockers: + # 释放 counter,报具体原因 + if self.counter and not skip_counter: + self.counter.release(agent_id, _sid_key) + primary_reason, primary_detail = blockers[0] + logger.info("Phase 2 blocked %s: %s (all=%s)", + agent_id, primary_reason, blockers) + raise AgentBusyError(agent_id, reason=primary_reason, + detail={"blockers": blockers}) + + # Phase 2.5: 假死修复(status=running + lock PID 死 → revive → 重检) + # 此场景应被 Phase 0 提前修复,这里做兜底 + if session_state.get("status") == "running" and not session_state.get( + "lock_pid_alive"): + logger.warning("Phase 2.5: %s status=running + lock dead (should be caught in Phase 0), reviving", + agent_id) + self._revive_session(agent_id) + session_state = self._check_session_state(agent_id) + if session_state.get("status") == "running": + if self.counter and not skip_counter: + self.counter.release(agent_id, _sid_key) + raise AgentBusyError(agent_id, reason="session_stuck", + detail={"status": "running after revive"}) + + # Phase 3: on_checks_passed 回调 + # 注意:如果回调抛异常,counter 已 acquire 但 subprocess 未启动, + # wrapped_on_complete 不会执行。需在此 try/except 中手动 release。 + if on_checks_passed: + try: + on_checks_passed() + except Exception: + if self.counter and not skip_counter: + self.counter.release(agent_id, _sid_key) + raise + + if self.dry_run: + logger.info( + "[DRY RUN] Would spawn agent %s (session=%s)", + agent_id, + _sid_key) + self._register_session(_sid_key, agent_id, task_id, pid=None) + return _sid_key + + # 4. wrapped_on_complete 保证 counter release(闭包捕获 _sid_key) + async def _wrapped_on_complete(aid, outcome): + try: + if self.counter: + self.counter.release(aid, _sid_key) + finally: + if on_complete: + try: + result = on_complete(aid, outcome) + if asyncio.iscoroutine(result): + await result + except Exception: + logger.warning( + "Business on_complete failed for %s", aid, exc_info=True) + + cmd = [ + "openclaw", "agent", + "--agent", agent_id, + ] + if session_id: + cmd.extend(["--session-id", session_id]) + cmd.extend([ + "--message", message, + "--json", + "--timeout", str(int(self.gateway_timeout)), + ]) + + try: + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + self._register_session(session_id, agent_id, task_id, proc.pid, + broadcast_task_ids=broadcast_task_ids) + logger.info("Spawned agent %s (session=%s, pid=%d)", + agent_id, session_id, proc.pid) + + # Schedule monitor(传 wrapped_on_complete) + asyncio.create_task( + self._monitor_process(session_id, proc, agent_id, task_id, + on_complete=_wrapped_on_complete, + db_path=task_db_path or self.db_path) + ) + + return session_id + + except Exception as e: + # spawn 失败也要 release counter + if self.counter: + self.counter.release(agent_id, _sid_key) + logger.exception("Failed to spawn agent %s", agent_id) + self._record_attempt( + task_id, + agent_id, + "spawn_failed", + error=str(e)) + raise + + async def spawn_subagent( + self, + task_description: str, + task_id: Optional[str] = None, + ) -> str: + """Spawn Subagent(占位,实际通过 Gateway API) + + Returns: + session_id + """ + session_id = str(uuid.uuid4()) + + if self.dry_run: + logger.info( + "[DRY RUN] Would spawn subagent (session=%s)", + session_id) + self._register_session(session_id, "subagent", task_id, pid=None) + return session_id + + # TODO: F17 通过 Gateway API sessions_spawn 实现 + logger.info("Subagent spawn (session=%s) - placeholder", session_id) + self._register_session(session_id, "subagent", task_id, pid=None) + return session_id + + # ── 续杯 Prompt 模板 ── + + RETRY_PROMPT = """你收到一个续杯提醒。你的任务在执行过程中被中断了。 + +## 任务信息 + +- 项目: {project_id} +- 任务ID: {task_id} +- 标题: {title} +- 续杯次数: 第 {retry_count} 次(上限 {max_retries} 次) + +请检查 session 历史中你之前做了什么,然后继续未完成的工作。 + +## 操作指令 + +### 查看任务当前状态 +```bash +curl http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}?expand=all +``` + +### 如果已经完成,标记 review +```bash +curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/status \\ + -H 'Content-Type: application/json' \\ + -d '{{"status": "review", "agent": "{agent_id}"}}' +``` + +### 写入产出(如果之前没写) +```bash +curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/outputs \\ + -H 'Content-Type: application/json' \\ + -d '{{"agent": "{agent_id}", "type": "<类型>", "title": "<标题>", "content": "<内容>", "summary": "<摘要>"}}' +``` + +### 如果无法解决,标记失败 +```bash +curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/status \\ + -H 'Content-Type: application/json' \\ + -d '{{"status": "failed", "agent": "{agent_id}", "detail": "<失败原因>"}}' +``` + +{fallback_hint}""" + + async def _monitor_process( + self, + session_id: Optional[str], + proc: asyncio.subprocess.Process, + agent_id: str, + task_id: Optional[str], + on_complete: Optional[Any] = None, + db_path: Optional[Path] = None, + monitor_timeout_count: int = 0, + ) -> None: + """监控子进程全生命周期(设计文档 spawner-monitor-design.md)""" + stdout_chunks: list = [] + stderr_chunks: list = [] + + try: + # ── 等待进程退出 + 流式读取 ── + async def _read_streams(): + async def _read_out(): + while True: + chunk = await proc.stdout.read(4096) + if not chunk: + break + stdout_chunks.append(chunk) + + async def _read_err(): + while True: + chunk = await proc.stderr.read(4096) + if not chunk: + break + stderr_chunks.append(chunk) + + await asyncio.gather(_read_out(), _read_err(), proc.wait()) + + await asyncio.wait_for(_read_streams(), timeout=self.agent_timeout) + # ── 情况 A:进程退出 ── + exit_code = proc.returncode + await self._handle_exit( + session_id, agent_id, task_id, exit_code, + stdout_chunks, stderr_chunks, on_complete, db_path + ) + + except asyncio.TimeoutError: + # ── 情况 B:monitor timeout(进程没退出)── + logger.warning("Agent %s monitor timeout (session=%s, count=%d/%d)", + agent_id, session_id, monitor_timeout_count + 1, + self.max_monitor_timeouts) + await self._handle_monitor_timeout( + session_id, agent_id, task_id, proc, + on_complete, db_path, stderr_chunks, monitor_timeout_count + ) + + async def _handle_exit(self, session_id, agent_id, task_id, exit_code, + stdout_chunks, stderr_chunks, on_complete, db_path): + """情况 A:进程退出后的处理 + + v2.7.2: 进程退出 = counter release(由 on_complete = wrapped_on_complete 保证)。 + 只有 A2/A3(gateway_timeout)触发续杯,其他都不 retry。 + A9(api_error/429)额外推回 pending + 设冷却。 + """ + stdout_text = b"".join(stdout_chunks).decode("utf-8", errors="replace") + stderr_text = b"".join(stderr_chunks).decode("utf-8", errors="replace") + + # 解析 stdout JSON + json_result = self._parse_stdout_json(stdout_text) + logger.info("Parsed JSON result for agent=%s session=%s: %s", + agent_id, session_id, json_result) + + # 查任务实际状态 + task_status = self._get_task_status( + db_path, task_id) if task_id else None + + # 分类 + cls = self._classify_outcome( + exit_code, + json_result, + stderr_text, + task_status, + stdout_text) + outcome = cls["outcome"] + + # 更新 session 状态 + sid = session_id or "main" + if sid in self._sessions: + self._sessions[sid]["status"] = outcome + self._sessions[sid]["completed_at"] = datetime.utcnow().isoformat() + self._sessions[sid]["exit_code"] = exit_code + if json_result: + self._sessions[sid]["meta"] = json_result + + # 记录 attempt + self._record_attempt( + task_id, agent_id, outcome, exit_code=exit_code, + db_path=db_path, + metadata={ + "status": json_result.get("status"), + "summary": json_result.get("summary"), + "fallback_used": json_result.get("fallback_used"), + "fallback_reason": json_result.get("fallback_reason"), + "task_status_at_exit": task_status, + } + ) + + logger.info("Agent %s finished (session=%s, outcome=%s, exit=%d, task_status=%s)", + agent_id, session_id, outcome, exit_code, task_status) + + # 广播反馈追踪(Phase 1 bug fix) + if task_id == "broadcast" and hasattr( + self, '_ticker') and self._ticker: + # 广播任务:从 session 信息取真实 task_id 列表,逐一回调 tracker + sess_info = self._sessions.get(session_id or "main", {}) + bt_ids = sess_info.get("broadcast_task_ids") or [] + # 广播场景一律标 no_reply:Agent 只 claim 一个任务, + # 其余任务的 tracker 不能被 claimed 清除 + for real_task_id in bt_ids: + self._ticker.record_broadcast_response( + real_task_id, agent_id, "no_reply") + elif task_id and hasattr(self, '_ticker') and self._ticker: + outcome_str = "claimed" if cls.get( + "status") == "ok" else "no_reply" + self._ticker.record_broadcast_response( + task_id, agent_id, outcome_str) + + if cls["should_retry"]: + # cooldown: 新增的可恢复场景(A14/A15/A16/A8/A10) + cooldown_seconds = cls.get("cooldown_seconds", 0) + if cooldown_seconds and self.counter: + self.counter.set_cooldown(agent_id, seconds=cooldown_seconds) + # A2/A3: gateway_timeout → 续杯(on_complete 会 release counter) + await self._do_retry( + session_id, agent_id, task_id, on_complete, db_path, + cls.get("retry_field", "retry_count") + ) + elif outcome == "api_error": + # A9: [DEPRECATED] api_error 已改为 should_retry=True 走续杯路径。 + # 此分支理论上不再命中,保留作为安全兜底。 + # A9: 429/API 错误 → release counter(on_complete)+ 推回 pending + 冷却 + # 有上限:api_retry_count 累计达 max_retries 则标 failed + await self._do_on_complete_async(on_complete, agent_id, outcome) + if self.counter: + self.counter.set_cooldown(agent_id) + if db_path and task_id: + retry_counts = self._get_retry_counts(db_path, task_id) + api_count = retry_counts.get("api_retry_count", 0) + 1 + retry_counts["api_retry_count"] = api_count + self._update_retry_counts(db_path, task_id, retry_counts) + if api_count >= self.max_retries: + logger.error("Task %s api_retry_count=%d >= max_retries, marking failed", + task_id, api_count) + self._mark_task(db_path, task_id, "failed", { + "reason": "max_api_retry_count", "count": api_count, + }) + else: + self._mark_task(db_path, task_id, "pending", { + "reason": "api_error_retry", + "api_retry_count": api_count, + }) + logger.info("Task %s pushed back to pending (api_error, api_retry=%d/%d)", + task_id, api_count, self.max_retries) + elif outcome == "fallback_timeout" and not cls["should_retry"]: + # A3/A3b: fallback 分级处理 + # fallback_count 从 task_attempts.metadata 读取, + # 达 max_retries 标 failed(A3),否则 retry + cooldown(A3b) + fallback_count = 0 + if db_path and task_id: + retry_counts = self._get_retry_counts(db_path, task_id) + fallback_count = retry_counts.get("fallback_count", 0) + 1 + retry_counts["fallback_count"] = fallback_count + self._update_retry_counts(db_path, task_id, retry_counts) + + if fallback_count >= self.max_retries: + # A3: 连续 fallback 达上限,标 failed + logger.error("A3 fallback exhausted: agent=%s session=%s task=%s " + "fallback_count=%d reason=%s", + agent_id, session_id, task_id, fallback_count, + json_result.get("fallback_reason")) + await self._do_on_complete_async(on_complete, agent_id, outcome) + if db_path and task_id: + self._mark_task(db_path, task_id, "failed", { + "reason": "fallback_exhausted", + "fallback_count": fallback_count, + "fallback_reason": json_result.get("fallback_reason"), + }) + else: + # A3b: fallback 未达上限,retry + cooldown + logger.warning("A3b fallback retry: agent=%s session=%s task=%s " + "fallback_count=%d/%d reason=%s", + agent_id, session_id, task_id, fallback_count, + self.max_retries, json_result.get("fallback_reason")) + if self.counter: + self.counter.set_cooldown(agent_id, seconds=60) + await self._do_retry( + session_id, agent_id, task_id, on_complete, db_path, + "fallback_retry_count" # 独立计数,不与 gateway_timeout 的 retry_count 共用 + ) + else: + # 其他:A1(completed), A4(agent_failed), A7(auth_failed), + # A8(gateway_unreachable), A11(lock_conflict), + # A10(compact_failed), A12(agent_error) + # v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间 + if outcome == "crashed" and self.counter: + self.counter.set_cooldown(agent_id, seconds=60) + logger.info( + "Crash cooldown set for %s: 60s (outcome=%s)", + agent_id, + outcome) + elif outcome in ("compact_failed", "process_crash", "session_stuck", + "compact_hanging", "agent_error", "compact_interrupted") and self.counter: + self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟 + logger.info( + "Error cooldown set for %s: 300s (outcome=%s)", + agent_id, + outcome) + # F1: 不可恢复 outcome → 立刻标 failed + 写黑板 + if outcome in ("auth_failed", + "agent_error") and db_path and task_id: + logger.error( + "Task %s: unrecoverable outcome=%s, marking failed immediately", + task_id, + outcome) + self._mark_task(db_path, task_id, "failed", { + "reason": outcome, + "stderr_preview": (stderr_text or "")[:500], + }) + # 注意: cooldown 期间任务状态仍为 working,但 counter 已释放。 + # DB 中的 working 是"假 working"——ticker 不会重新分配,_check_timeouts 会 + # 在 cooldown 结束后回收。如果 ticker 在此期间给同一 agent 分配新任务,属正常行为。 + # 进程退出 → on_complete release counter + # 任务状态由各 outcome 自行处理(或等 ticker) + await self._do_on_complete_async(on_complete, agent_id, outcome) + + async def _handle_monitor_timeout(self, session_id, agent_id, task_id, proc, + on_complete, db_path, stderr_chunks, + monitor_timeout_count): + """情况 B:monitor timeout""" + # 读已缓冲的 stderr + try: + remaining = await asyncio.wait_for(proc.stderr.read(), timeout=2.0) + if remaining: + stderr_chunks.append(remaining) + except Exception: + pass + + # stderr collected but not used in this handler + # (kept for potential future diagnostics) + b"".join(stderr_chunks).decode("utf-8", errors="replace") + + # 检查 session 状态 + state = self._check_session_state(agent_id) + + # B1: 假死 - 先复活,连续假死 ≥2 次再 failed + if state.get("status") == "running" and not state.get( + "lock_pid_alive", True): + # 假死计数 + stuck_count = self._stuck_counts.get(task_id, 0) + 1 + self._stuck_counts[task_id] = stuck_count + + if stuck_count >= 2: + # 连续假死 ≥2 次,标 failed + logger.error("Agent %s session stuck %d times (session=%s, lock PID dead)", + agent_id, stuck_count, session_id) + self._mark_task(db_path, task_id, "failed", + {"reason": "session_stuck", "stuck_count": stuck_count, + "diagnostics": state}) + await self._do_on_complete_async(on_complete, agent_id, "session_stuck") + return + + # 第 1 次假死 → 尝试复活 + logger.warning("Agent %s session stuck (attempt %d), reviving (session=%s)", + agent_id, stuck_count, session_id) + revived = self._revive_session(agent_id) + if revived: + logger.info("Agent %s session revived, releasing counter for ticker re-dispatch", + agent_id) + # release counter → 任务保持 working → ticker 下次 re-dispatch + await self._do_on_complete_async(on_complete, agent_id, "session_revived") + else: + # 复活失败 → 标 failed + logger.error( + "Agent %s revive failed, marking failed", agent_id) + self._mark_task(db_path, task_id, "failed", + {"reason": "revive_failed", "stuck_count": stuck_count, + "diagnostics": state}) + await self._do_on_complete_async(on_complete, agent_id, "revive_failed") + return + + # B2/B3/B4: 进程还活着 + # B2: compact 进行中 - 不计入 monitor timeout 计数,继续等 + if state.get("recent_compact"): + logger.info("Agent %s recent compaction detected, extending patience " + "(session=%s, monitor=%d/%d)", + agent_id, session_id, monitor_timeout_count, self.max_monitor_timeouts) + # 不递增 monitor_timeout_count,但最多额外等 max_monitor_timeouts 次 + # 用独立计数器防止无限等待 + compact_wait_count = self._compact_waits.get(task_id, 0) + 1 + self._compact_waits[task_id] = compact_wait_count + if compact_wait_count >= self.max_monitor_timeouts: + # #07.3 ACT-2: compact_hanging 不标 failed,只 release counter + # 进程还活着但不 monitor,等 ticker _check_timeouts 超时回收 → 重新 dispatch + logger.warning("Agent %s compact hanging after %d waits, releasing counter for ticker re-dispatch", + agent_id, compact_wait_count) + self._compact_waits.pop(task_id, None) + await self._do_on_complete_async(on_complete, agent_id, "compact_hanging") + return + # 继续等 + asyncio.create_task( + self._monitor_process( + session_id, proc, agent_id, task_id, + on_complete=on_complete, db_path=db_path, + monitor_timeout_count=monitor_timeout_count, + ) + ) + return + + # B3/B4: 无 compact,正常计数 + monitor_timeout_count += 1 + if monitor_timeout_count >= self.max_monitor_timeouts: + logger.error("Agent %s max monitor timeouts (session=%s, count=%d)", + agent_id, session_id, monitor_timeout_count) + self._mark_task(db_path, task_id, "failed", { + "reason": "max_monitor_timeouts", + "count": monitor_timeout_count, + "elapsed_seconds": monitor_timeout_count * int(self.agent_timeout), + "diagnostics": state, + }) + await self._do_on_complete_async(on_complete, agent_id, "max_monitor_timeouts") + return + + # 未超限:继续等(不 release counter) + logger.info("Agent %s continuing monitor (session=%s, count=%d/%d)", + agent_id, session_id, monitor_timeout_count, self.max_monitor_timeouts) + asyncio.create_task( + self._monitor_process( + session_id, proc, agent_id, task_id, + on_complete=on_complete, db_path=db_path, + monitor_timeout_count=monitor_timeout_count, + ) + ) + + async def _do_retry(self, session_id, agent_id, task_id, on_complete, + db_path, retry_field="retry_count"): + """续杯:手动 release counter 后通过 spawn_full_agent 重新 spawn + + v2.7.2: 进程已退出但 wrapped_on_complete 未被调用(只有 should_retry 分支走到这里)。 + 需要手动 release counter,然后 spawn_full_agent 内部会 acquire。 + on_complete(含 counter release)置为 None,避免 double release。 + """ + # v2.8.1 Bug-4 fix: 不再手动 release counter + 置 None on_complete + # counter 从原始 spawn 保持到 retry 完成,避免窗口期 ticker acquire 同一 agent + # on_complete 保留原始 wrapped_on_complete,retry 完成后自然 release counter + + # 续杯前检查任务状态,已终态则跳过 + if db_path and task_id: + try: + conn = get_connection(db_path) + try: + row = conn.execute( + "SELECT status FROM tasks WHERE id=?", (task_id,) + ).fetchone() + # Bug-6 fix: pending 不是终态 + if row and row["status"] in ( + "done", "failed", "cancelled", "review"): + logger.info("Retry skip: task %s already %s (agent=%s)", + task_id, row["status"], agent_id) + # on_complete = wrapped_on_complete,会 release counter + await self._do_on_complete_async(on_complete, agent_id, "task_already_done") + return + finally: + conn.close() + except Exception: + logger.warning( + "Retry status check failed for %s, proceeding", task_id) + + # 直接读写 tasks 表的 retry_count + if retry_field == "retry_count" and db_path and task_id: + try: + conn = get_connection(db_path) + try: + conn.execute("BEGIN IMMEDIATE") + conn.execute( + "UPDATE tasks SET retry_count = COALESCE(retry_count, 0) + 1 WHERE id=?", + (task_id,), + ) + conn.commit() + row = conn.execute( + "SELECT retry_count FROM tasks WHERE id=?", (task_id,) + ).fetchone() + count = row["retry_count"] if row else 1 + finally: + conn.close() + except Exception: + logger.exception( + "Failed to update retry_count for task %s", task_id) + count = 1 + else: + retry_counts = self._get_retry_counts(db_path, task_id) + count = retry_counts.get(retry_field, 0) + 1 + retry_counts[retry_field] = count + self._update_retry_counts(db_path, task_id, retry_counts) + + if count >= self.max_retries: + logger.error("Agent %s max retries (session=%s, %s=%d)", + agent_id, session_id, retry_field, count) + self._mark_task(db_path, task_id, "failed", { + "reason": f"max_{retry_field}", "count": count, + }) + await self._do_on_complete_async(on_complete, agent_id, "max_retries") + return + + logger.info("Agent %s retry %s=%d/%d (session=%s)", + agent_id, retry_field, count, self.max_retries, session_id) + + # 构建续杯 message(Mail 用专用模板,Task 用标准模板) + task_info = self._get_task_info(db_path, task_id) or {} + project_id = task_info.get("project_id", "") + handler = TaskTypeRegistry.get_by_project(project_id) + is_handler = handler is not None + + if is_handler: + must_haves = task_info.get("must_haves", "{}") + try: + meta = json.loads(must_haves) if must_haves else {} + except Exception: + meta = {} + message = MAIL_RETRY_PROMPT.format( + from_agent=meta.get("from", "unknown"), + title=task_info.get("title", ""), + retry_count=count, + max_retries=self.max_retries, + ) + else: + fallback_hint = "\n⚠️ 之前有 fallback 执行,请调 API 检查任务当前状态和已有产出,确认是否已完成。" if retry_field == "retry_count" else "" + message = self.RETRY_PROMPT.format( + project_id=project_id, + task_id=task_id or "", + title=task_info.get("title", ""), + retry_count=count, + max_retries=self.max_retries, + api_host=self.api_host, + api_port=self.api_port, + agent_id=agent_id, + fallback_hint=fallback_hint, + ) + + # v2.7.2: 通过 spawn_full_agent 重新 spawn(内部 can_acquire + acquire) + # on_complete = wrapped_on_complete(含 counter release),作为业务回调传入 + try: + await self.spawn_full_agent( + agent_id=agent_id, + message=message, + task_id=task_id, + on_complete=on_complete, + use_main_session=True, # #02: 续杯走 main session + task_db_path=db_path, + skip_counter=True, # Bug-4 fix: counter 已在原始 spawn 中持有 + ) + except AgentBusyError as e: + # #07.3 ACT-3: session busy(compact/lock/running)= 暂时性阻塞 + # release counter → 任务保持 working → ticker 重新 dispatch + logger.warning("Retry spawn deferred: %s session busy (%s), releasing counter for ticker re-dispatch", + agent_id, e.reason) + await self._do_on_complete_async(on_complete, agent_id, "retry_session_busy") + except Exception: + logger.exception("Retry spawn failed for %s", agent_id) + await self._do_on_complete_async(on_complete, agent_id, "retry_spawn_failed") + + # ── 辅助方法 ── + + @staticmethod + def _parse_stdout_json(stdout_text: str) -> dict: + """解析 openclaw agent --json 的 stdout 输出 + + 返回可直接使用的字段:status, summary, fallback_used, fallback_reason, payloads + 不再提取 meta,直接用顶层字段。 + """ + text = stdout_text.strip() + if not text: + return {"status": None, "summary": None, "fallback_used": False, + "fallback_reason": None, "payloads": []} + try: + data = json.loads(text) + except json.JSONDecodeError: + # 多行输出,找最后一个 JSON + for line in reversed(text.splitlines()): + try: + data = json.loads(line) + break + except json.JSONDecodeError: + continue + else: + return {"status": None, "summary": None, "fallback_used": False, + "fallback_reason": None, "payloads": []} + + # 从 data.result.meta.executionTrace 取 fallback 信息 + result = data.get("result", {}) + meta = result.get("meta", {}) + trace = meta.get("executionTrace", {}) + + return { + "status": data.get("status"), + "summary": data.get("summary"), + "fallback_used": trace.get("fallbackUsed", False), + "fallback_reason": trace.get("fallbackReason"), + "payloads": result.get("payloads", []), + } + + @staticmethod + def _get_task_status( + db_path: Optional[Path], task_id: Optional[str]) -> Optional[str]: + """查任务实际 API 状态""" + if not db_path or not task_id: + return None + try: + conn = get_connection(db_path) + try: + row = conn.execute( + "SELECT status FROM tasks WHERE id=?", (task_id,) + ).fetchone() + return row["status"] if row else None + finally: + conn.close() + except Exception: + return None + + @staticmethod + def _get_task_info(db_path: Optional[Path], + task_id: Optional[str]) -> Optional[dict]: + """查任务基本信息""" + if not db_path or not task_id: + return None + try: + conn = get_connection(db_path) + try: + row = conn.execute( + "SELECT id, title, status FROM tasks WHERE id=?", ( + task_id,) + ).fetchone() + if not row: + return None + info = dict(row) + # 从 db_path 推断 project_id: data//blackboard.db + info["project_id"] = db_path.parent.name + return info + finally: + conn.close() + except Exception: + return None + + @staticmethod + def _revive_session(agent_id: str) -> bool: + """假死复活术:修改 sessions.json status 从 running 改为 idle""" + sessions_path = Path(os.environ.get( + "OPENCLAW_HOME", str(Path.home() / ".openclaw") + )) / "agents" / agent_id / "sessions" / "sessions.json" + if not sessions_path.exists(): + return False + try: + with open(sessions_path) as f: + sessions = json.load(f) + main_key = f"agent:{agent_id}:main" + main_session = sessions.get(main_key, {}) + if main_session.get("status") != "running": + return False # 不是 running 状态,不需要复活 + main_session["status"] = "idle" + sessions[main_key] = main_session + with open(sessions_path, "w") as f: + json.dump(sessions, f, indent=2) + logger.info( + "Revived %s: sessions.json status changed running→idle", + agent_id) + # #07 O4: 同时清理残留 lock 文件 + sf = main_session.get("sessionFile", "") + if sf: + lock_path = Path(sf + ".lock") + if lock_path.exists(): + try: + lock_path.unlink() + logger.info( + "Cleaned stale lock for %s: %s", + agent_id, + lock_path.name) + except Exception: + pass + return True + except Exception: + logger.exception("Failed to revive %s", agent_id) + return False + + # deprecated: §24 v3, 保留供方案 B 备选 + @staticmethod + def _get_recent_gateway_logs() -> list: + """获取当天和昨天的 gateway 日志路径。 + + 日志路径通过 OPENCLAW_LOG_DIR 环境变量配置,默认 /tmp/openclaw。 + 文件名格式:openclaw-{YYYY-MM-DD}.log + """ + from datetime import timedelta + log_dir = os.environ.get("OPENCLAW_LOG_DIR", "/tmp/openclaw") + now_local = datetime.now() + today = now_local.strftime("%Y-%m-%d") + yesterday = (now_local - timedelta(days=1)).strftime("%Y-%m-%d") + paths = [] + for d in [today, yesterday]: + p = os.path.join(log_dir, f"openclaw-{d}.log") + if os.path.exists(p): + paths.append(p) + return paths + + # deprecated: §24 v3, 保留供方案 B 备选(旧 rotation 结束标记检测,已被 v5 取代) + @staticmethod + def _check_compact_in_progress_gateway( + session_key: str, window_seconds: int = 120) -> bool: + """§24 v3 rotation-only: 检查 gateway 日志,判断指定 session 是否刚完成 compact。 + + 检测逻辑:读日志尾部 2MB,按目标 sessionKey 过滤, + 找最后一个 rotation 事件,如果在窗口内 → compact 可能仍在 retry 循环中。 + """ + from datetime import datetime as _dt, timezone as _tz, timedelta + log_paths = AgentSpawner._get_recent_gateway_logs() + if not log_paths: + return False + + now = _dt.now(_tz.utc) + window_start = now - timedelta(seconds=window_seconds) + + last_rotation_time = None + + for log_path in log_paths: + if not os.path.exists(log_path): + continue + try: + with open(log_path, "rb") as f: + f.seek(0, 2) + size = f.tell() + f.seek(max(0, size - 2 * 1024 * 1024)) + tail = f.read().decode("utf-8", errors="replace") + except Exception: + continue + + for line in tail.splitlines(): + if not line.strip(): + continue + try: + obj = json.loads(line) + except (json.JSONDecodeError, ValueError): + continue + + msg = obj.get("message", "") + # 只看包含目标 sessionKey 的事件 + if session_key not in msg: + continue + + # rotation 事件 + if "[compaction] rotated active transcript" in msg: + ts_str = obj.get("time", "") + if ts_str: + try: + event_time = _dt.fromisoformat( + ts_str.replace("Z", "+00:00")) + # timezone-aware: normalize to UTC + if event_time.tzinfo is None: + event_time = event_time.replace(tzinfo=_tz.utc) + if last_rotation_time is None or event_time > last_rotation_time: + last_rotation_time = event_time + except (ValueError, TypeError): + continue + + if last_rotation_time is not None: + return last_rotation_time >= window_start + + return False + + # ─── v5: compact 开始标记检测(gateway log)+ 结束标记检测(jsonl) ─── + + @staticmethod + def _find_compact_start_in_gateway_log( + agent_id: str, window_seconds: int = 900) -> Optional[str]: + """v5: 检查 gateway 日志,找最近的 compact 开始标记。 + + 只检测 precheck 路径:message 含 "[context-overflow-precheck]" 且 + "route=compact_then_truncate"。原因: + - overflow 标记("attempting auto-compaction")不含 sessionKey, + 被 `session_key not in msg` 前置过滤跳过,是死代码。 + - timeout 标记推测同理不含 sessionKey。 + - precheck 标记含 sessionKey 且实测总在 overflow 之前触发(同一 compact + 事件,precheck 先检测到,overflow 是 fallback),所以 precheck 已覆盖 + overflow 场景。 + - threshold/manual 触发的 compact 无开始标记(静默执行),依赖 + counter+lock+status 保护,不需要 gateway 日志检测。 + + 超时兜底:开始标记超过 window_seconds(默认 15 分钟)自动忽略。 + + 返回最近一个开始标记的 UTC ISO 时间字符串(带 Z 后缀),或 None。 + """ + from datetime import datetime as _dt, timezone as _tz, timedelta + log_paths = AgentSpawner._get_recent_gateway_logs() + if not log_paths: + return None + + session_key = f"agent:{agent_id}:main" + now = _dt.now(_tz.utc) + window_start = now - timedelta(seconds=window_seconds) + + latest_start_time = None # type: Optional[_dt] + latest_start_str = None # type: Optional[str] + + for log_path in log_paths: + if not os.path.exists(log_path): + continue + try: + with open(log_path, "rb") as f: + f.seek(0, 2) + size = f.tell() + f.seek(max(0, size - 2 * 1024 * 1024)) + tail = f.read().decode("utf-8", errors="replace") + except Exception: + continue + + for line in tail.splitlines(): + if not line.strip(): + continue + try: + obj = json.loads(line) + except (json.JSONDecodeError, ValueError): + continue + + msg = obj.get("message", "") + if session_key not in msg: + continue + + # 只检测 precheck 路径:route=compact_then_truncate + # overflow/timeout 标记不含 sessionKey,被前置过滤跳过(死代码),已删除 + if ("[context-overflow-precheck]" not in msg + or "route=compact_then_truncate" not in msg): + continue + + # 解析时间 + ts_str = obj.get("time", "") + if not ts_str: + continue + try: + event_time = _dt.fromisoformat( + ts_str.replace("Z", "+00:00")) + if event_time.tzinfo is None: + event_time = event_time.replace(tzinfo=_tz.utc) + else: + # 确保 UTC + event_time = event_time.astimezone(_tz.utc) + except (ValueError, TypeError): + continue + + # 超时兜底:超过窗口的忽略 + if event_time < window_start: + continue + + if latest_start_time is None or event_time > latest_start_time: + latest_start_time = event_time + latest_start_str = event_time.strftime( + "%Y-%m-%dT%H:%M:%S.") + f"{event_time.microsecond:06d}" + "Z" + + return latest_start_str + + @staticmethod + def _check_compaction_finished_in_jsonl( + session_file: str, after_time: str) -> bool: + """v5: 检查 jsonl 是否有 after_time 之后的 compaction entry。 + + 有 → compact 已完成 → True + 没有 → compact 可能仍在进行 → False + + after_time 格式:UTC ISO(如 2026-06-12T10:25:27.581Z)。 + jsonl timestamp 格式也是 UTC ISO。 + """ + if not session_file or not Path(session_file).exists(): + return False + try: + from datetime import datetime as _dt, timezone as _tz + after_dt = _dt.fromisoformat(after_time.replace("Z", "+00:00")) + if after_dt.tzinfo is None: + after_dt = after_dt.replace(tzinfo=_tz.utc) + + with open(session_file, "rb") as sf: + sf.seek(0, 2) + size = sf.tell() + sf.seek(max(0, size - 1048576)) + tail = sf.read().decode("utf-8", errors="replace") + + for line in reversed(tail.splitlines()): + if not line.strip(): + continue + try: + obj = json.loads(line) + except (json.JSONDecodeError, ValueError): + continue + if obj.get("type") == "compaction": + ts = obj.get("timestamp", "") + if ts: + try: + ct = _dt.fromisoformat(ts.replace("Z", "+00:00")) + if ct.tzinfo is None: + ct = ct.replace(tzinfo=_tz.utc) + if ct >= after_dt: + return True + except (ValueError, TypeError): + pass + # 遇到早于 after_time 的 entry → 不需要继续往前扫 + ts = obj.get("timestamp", "") + if ts: + try: + ct = _dt.fromisoformat(ts.replace("Z", "+00:00")) + if ct.tzinfo is None: + ct = ct.replace(tzinfo=_tz.utc) + if ct < after_dt: + break + except (ValueError, TypeError): + pass + return False + except Exception: + return False + + @staticmethod + def _check_recent_compaction_jsonl( + session_file: str, window_seconds: int = 900) -> bool: + """v2.8.2 Fix-2: 读 session jsonl 末尾,检查是否有 window_seconds 内的 compaction 记录。 + + 比 compactionCheckpoints 更可靠:Gateway 每次完成 compact 必然在 jsonl 末尾追加记录, + 但不保证更新 compactionCheckpoints。 + + v2.8.2: 窗口从 300s→900s(15min), 尾部读取从 50KB→1MB。 + 实测 50KB 在长对话中不够(compact 记录被推出窗口导致漏检)。 + 正常扫描量不变:从尾部往前扫,遇到超过 15min 的 timestamp 即 break。 + """ + if not session_file or not Path(session_file).exists(): + return False + try: + from datetime import datetime, timezone + now = datetime.now(timezone.utc) + with open(session_file, "rb") as sf: + sf.seek(0, 2) + size = sf.tell() + sf.seek(max(0, size - 1048576)) + tail = sf.read().decode("utf-8", errors="replace") + for line in reversed(tail.splitlines()): + if not line.strip(): + continue + try: + import json as _json + obj = _json.loads(line) + except (_json.JSONDecodeError, ValueError): + continue + if obj.get("type") == "compaction": + ts = obj.get("timestamp", "") + if ts: + try: + ct = datetime.fromisoformat( + ts.replace("Z", "+00:00")) + if (now - ct).total_seconds() < window_seconds: + return True + except (ValueError, TypeError): + pass + ts = obj.get("timestamp", "") + if ts: + try: + ct = datetime.fromisoformat(ts.replace("Z", "+00:00")) + if (now - ct).total_seconds() >= window_seconds: + break + except (ValueError, TypeError): + pass + return False + except Exception: + return False + + @staticmethod + def _check_session_state(agent_id: str) -> dict: + """检查 sessions.json 和 lock 状态 + + v2.8.1: compact 检测改用 session jsonl 末尾扫描(Fix-1), + 替代失效的 compactionCheckpoints 检测。 + """ + result = { + "status": "unknown", + "lock_pid": None, + "lock_pid_alive": False, + "recent_compact": False} + sessions_path = Path(os.environ.get( + "OPENCLAW_HOME", str(Path.home() / ".openclaw") + )) / "agents" / agent_id / "sessions" / "sessions.json" + if not sessions_path.exists(): + return result + try: + with open(sessions_path) as f: + sessions = json.load(f) + main_key = f"agent:{agent_id}:main" + main_session = sessions.get(main_key, {}) + result["status"] = main_session.get("status", "unknown") + + # 检查 lock (v3.1: done/timeout 时 lock 视为过期) + sf = main_session.get("sessionFile", "") + if sf: + lock_path = Path(sf + ".lock") + if lock_path.exists(): + try: + lock_data = json.loads(lock_path.read_text()) + pid = lock_data.get("pid") + result["lock_pid"] = pid + if pid: + try: + os.kill(pid, 0) + result["lock_pid_alive"] = True + except ProcessLookupError: + result["lock_pid_alive"] = False + # session 已完成/超时 > lock 是 Gateway 冷却锁,不阻塞新 turn + if result["status"] in ("done", "timeout"): + result["lock_pid_alive"] = False + result["lock_expired"] = True + # running + lock 超时 >30分钟 > 视为 idle,允许 dispatch + elif result["status"] == "running" and result["lock_pid_alive"]: + try: + lock_data = json.loads(lock_path.read_text()) + created_at_str = lock_data.get("createdAt", "") + if created_at_str: + from datetime import datetime as _dt, timezone as _tz + created_dt = _dt.fromisoformat( + created_at_str.replace("Z", "+00:00")) + elapsed = (_dt.now(_tz.utc) - + created_dt).total_seconds() + if elapsed > 1800: # 30 minutes + result["lock_pid_alive"] = False + result["lock_expired"] = True + logger.info("Lock expired for %s: running + lock age %.0fs > 1800s", + agent_id, elapsed) + except Exception: + pass + except Exception: + pass + + # §24 v5: compact 检测 = gateway log 开始标记 + jsonl 结束标记配对 + # 旧方法 (_check_compact_in_progress_trajectory, _check_recent_compaction_jsonl) + # 保留为 deprecated 但不再调用。 + # + # 逻辑: + # 1. 查 gateway log 最近的 compact 开始标记(precheck route=compact_then_truncate) + # 2. 有开始标记 → 查 jsonl 是否有对应的 compaction entry(结束标记) + # 3. 有开始无结束 → 阻塞(recent_compact=True) + # 4. 有开始有结束 → 放行 + # 5. 无开始标记 → threshold/manual 静默触发,靠 counter+lock+status 保护 + # 6. 超时兜底:开始标记超过 15 分钟自动忽略 + if result["status"] not in ("idle", "unknown", None) and sf: + compact_start = AgentSpawner._find_compact_start_in_gateway_log(agent_id) + if compact_start: + finished = AgentSpawner._check_compaction_finished_in_jsonl(sf, compact_start) + if not finished: + # 有开始标记且未完成 → 阻塞 + result["recent_compact"] = True + # 如果已完成 → recent_compact 保持 False(放行) + # 没有开始标记 → threshold/manual 静默触发,不阻塞 + except Exception: + pass + return result + + @staticmethod + def _check_compact_in_progress_trajectory( + session_file: str, timeout_minutes: int = 30) -> bool: + """§24 v4: 检查 trajectory jsonl 尾部,判断 session 是否处于非正常状态。 + + 检测逻辑:最后一个完整 turn 没有 prompt.submitted/skipped → 非正常 → skip。 + 覆盖:compact、timeout、hook block、session 结束等所有非正常状态。 + + Returns: + True = 非正常状态(skip ticker) + False = 正常(不 skip)或超时兜底放行 + """ + if not session_file: + return False + traj_path = f"{session_file}.trajectory.jsonl" + if not os.path.exists(traj_path): + return False + + try: + from datetime import datetime as _dt, timezone as _tz + + # 读尾部 500KB + with open(traj_path, "rb") as f: + f.seek(0, 2) + size = f.tell() + f.seek(max(0, size - 500 * 1024)) + tail = f.read().decode("utf-8", errors="replace") + + if not tail.strip(): + return False + + # 解析所有有效行 + events = [] + for line in tail.splitlines(): + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + events.append(obj) + except (json.JSONDecodeError, ValueError): + continue + + if not events: + return False + + # 按 session.started 分组找 turn + # 每个 turn 以 session.started 开始 + turns = [] + current_turn = [] + for evt in events: + if evt.get("type") == "session.started": + if current_turn: + turns.append(current_turn) + current_turn = [evt] + else: + current_turn.append(evt) + if current_turn: + turns.append(current_turn) + + if not turns: + return False + + # 检查最后一个完整 turn(包含 session.started) + last_turn = turns[-1] + turn_types = {evt.get("type") for evt in last_turn} + + # 有 prompt.submitted 或 prompt.skipped → 正常 turn + if "prompt.submitted" in turn_types or "prompt.skipped" in turn_types: + return False + + # 非正常状态 → 检查超时兜底 + # 找最后一个有 ts 的事件 + last_ts = None + for evt in reversed(events): + ts_str = evt.get("ts") + if ts_str: + try: + last_ts = _dt.fromisoformat( + ts_str.replace("Z", "+00:00")) + if last_ts.tzinfo is None: + last_ts = last_ts.replace(tzinfo=_tz.utc) + except (ValueError, TypeError): + continue + break + + if last_ts is None: + # 没有 ts 信息,无法判断超时 → 非正常 → skip + return True + + now = _dt.now(_tz.utc) + elapsed = (now - last_ts).total_seconds() + if elapsed > timeout_minutes * 60: + logger.debug("Trajectory last event %.0fs ago > %dm, fallback pass", + elapsed, timeout_minutes) + return False # 兜底放行 + + return True # 非正常状态且未超时 + + except Exception as e: + logger.debug("_check_compact_in_progress_trajectory error: %s", e) + return False + + @staticmethod + def _classify_outcome(exit_code: int, json_result: dict, stderr_text: str, + task_status: Optional[str], stdout_text: str = "") -> dict: + """分类退出原因,返回处理策略 + + v3.1: A0 拆分为 A14-A17(信号中断/stderr 智能分类)。 + A8/A10 改为可恢复 retry。cooldown 统一 60s。 + """ + status = json_result.get("status") + summary = json_result.get("summary", "") + fallback_used = json_result.get("fallback_used", False) + + # A4: 任务 DB status=failed(Agent 自己标的) + if task_status == "failed": + return {"outcome": "agent_failed", "should_retry": False} + + # A1: status=ok + completed + 非 fallback + if status == "ok" and summary == "completed" and not fallback_used: + return {"outcome": "completed", "should_retry": False} + + # A5/A6: status=ok + fallback + if status == "ok" and fallback_used: + return {"outcome": "fallback_timeout", "should_retry": False} + + # A2/A3: status=timeout → 唯一续杯场景 + # 注意: PM2 restart 时 daemon 自身也收到 SIGTERM,此时 retry spawn 的新进程 + # 会随 daemon 一起被杀。A14 retry 假设 daemon 存活,PM2 级重启不在此场景内。 + if status == "timeout": + return {"outcome": "gateway_timeout", "should_retry": True, + "retry_field": "retry_count"} + + # A0 拆分: 无 JSON 输出 + exit≠0 + if status is None and not stdout_text.strip() and exit_code != 0: + # A14: SIGINT(130) / SIGTERM(143) → 外部中断,可恢复 + if exit_code in (130, 143): + return {"outcome": "interrupted", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + # A15/A16: stderr 含 network/compact 关键字 → 可恢复 + if stderr_text: + stderr_lower = stderr_text.lower() + if any(kw in stderr_lower for kw in [ + "econnrefused", "etimedout", "gateway closed", "econnreset"]): + return {"outcome": "gateway_unreachable", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + if any(kw in stderr_lower for kw in [ + "compaction-diag", "context-overflow"]): + return {"outcome": "compact_interrupted", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + # A17: 真正的 crash → 保持 working,ticker 兜底 + return {"outcome": "crashed", "should_retry": False, + "original": "process_crash"} + + # A13 revised: stdout 为空但 exit=0 → 信任进程退出码,视为正常完成 + # 实测发现 openclaw session=None + exit=0 是正常场景(inform 通知等) + # 旧逻辑按 task_status 区分,非终态判 agent_error → 导致 inform 邮件永不标 done + if status is None and not stdout_text.strip() and exit_code == 0: + return {"outcome": "completed", "should_retry": False} + + # A7-A12: status=error → 不续杯,stderr 辅助分类 + if status == "error": + stderr_lower = stderr_text.lower() + if any(kw in stderr_lower for kw in [ + "401", "403", "unauthorized", "auth"]): + return {"outcome": "auth_failed", "should_retry": False} + if any(kw in stderr_lower for kw in [ + "econnrefused", "etimedout", "gateway closed", "econnreset"]): + return {"outcome": "gateway_unreachable", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + if any(kw in stderr_lower for kw in [ + "rate_limit", "500", "503", "api error"]): + return {"outcome": "api_error", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + if any(kw in stderr_lower for kw in [ + "compaction-diag", "context-overflow"]): + return {"outcome": "compact_failed", "should_retry": False} + if any(kw in stderr_lower for kw in [ + "lock", "busy", "concurrent", "lane task error"]): + return {"outcome": "lock_conflict", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + return {"outcome": "agent_error", "should_retry": False} + + # 兜底:status 未知值 + return {"outcome": "agent_error", + "should_retry": False, "original": "unknown_status"} + + @staticmethod + def _get_retry_counts( + db_path: Optional[Path], task_id: Optional[str]) -> dict: + """从最新 task_attempt 的 metadata 读计数器""" + defaults = {"retry_count": 0, "connect_retry_count": 0, + "api_retry_count": 0, "lock_retry_count": 0, + "monitor_timeout_count": 0} + if not db_path or not task_id: + return defaults + try: + conn = get_connection(db_path) + try: + row = conn.execute( + "SELECT metadata FROM task_attempts WHERE task_id=? ORDER BY attempt_number DESC LIMIT 1", + (task_id,) + ).fetchone() + if row and row["metadata"]: + stored = json.loads(row["metadata"]) + for k in defaults: + if k in stored: + defaults[k] = stored[k] + finally: + conn.close() + except Exception: + pass + return defaults + + def _update_retry_counts(self, db_path: Optional[Path], + task_id: Optional[str], counts: dict): + """将 retry counts 写回最新 task_attempt 的 metadata""" + if not db_path or not task_id: + return + try: + conn = get_connection(db_path) + try: + conn.execute("BEGIN IMMEDIATE") + row = conn.execute( + "SELECT rowid, metadata FROM task_attempts " + "WHERE task_id=? ORDER BY attempt_number DESC LIMIT 1", + (task_id,) + ).fetchone() + if row: + meta = json.loads( + row["metadata"]) if row["metadata"] else {} + meta.update(counts) + conn.execute( + "UPDATE task_attempts SET metadata=? WHERE rowid=?", + (json.dumps(meta), row["rowid"]) + ) + conn.commit() + finally: + conn.close() + except Exception: + logger.exception( + "Failed to update retry counts for task %s", task_id) + + def _mark_task(self, db_path: Optional[Path], task_id: Optional[str], + status: str, detail: Optional[dict] = None): + """标记任务状态(用于 failed/escalate)""" + if not db_path or not task_id: + return + try: + conn = get_connection(db_path) + try: + conn.execute("BEGIN IMMEDIATE") + conn.execute( + "UPDATE tasks SET status=?, completed_at=datetime('now') WHERE id=?", + (status, task_id) + ) + if detail: + conn.execute( + "INSERT INTO events (task_id, agent, event_type, detail) VALUES (?,?,?,?)", + (task_id, "daemon", status, json.dumps( + detail, ensure_ascii=False)) + ) + conn.commit() + finally: + conn.close() + # F2: conn 已关闭,Blackboard 内部自己 get_connection + if status == "failed": + reason = (detail or {}).get("reason", "unknown") + try: + from src.daemon.mail_notify import _is_mail_project, notify_mail_failed + if _is_mail_project(db_path): + # Mail 失败:通知发件人,不 @pangtong + notify_mail_failed(db_path, task_id, reason, detail) + else: + # Task 失败:@pangtong(F2 原逻辑) + from src.blackboard.operations import Blackboard + bb = Blackboard(db_path) + cid = bb.add_comment(task_id, "daemon", + f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入", + comment_type="system") + bb.record_mentions(cid, task_id, ["pangtong-fujunshi"]) + logger.info( + "Task %s: failure notified pangtong via comment+mention (reason=%s)", + task_id, + reason) + except Exception as e: + logger.warning("Task %s: failed to notify: %s", task_id, e) + except Exception: + logger.exception("Failed to mark task %s as %s", task_id, status) + + @staticmethod + def _do_on_complete(on_complete, agent_id, outcome): + """执行 on_complete 回调(同步+异步兼容)""" + if not on_complete: + return + try: + result = on_complete(agent_id, outcome) + if asyncio.iscoroutine(result): + # 注意:这里是同步调用的,不能 await + # 在 _monitor_process 的 async 上下文中应该用 await + pass + except Exception: + pass + + async def _do_on_complete_async(self, on_complete, agent_id, outcome): + """异步执行 on_complete 回调""" + if not on_complete: + return + try: + result = on_complete(agent_id, outcome) + if asyncio.iscoroutine(result): + await result + except Exception: + logger.warning( + "on_complete callback failed for %s", + agent_id, + exc_info=True) + + def _register_session( + self, + session_id: str, + agent_id: str, + task_id: Optional[str], + pid: Optional[int], + broadcast_task_ids: Optional[List[str]] = None, + ) -> None: + """注册 spawn session""" + self._sessions[session_id] = { + "agent_id": agent_id, + "task_id": task_id, + "pid": pid, + "status": "running", + "started_at": datetime.utcnow().isoformat(), + "completed_at": None, + "broadcast_task_ids": broadcast_task_ids, + } + + def _record_attempt( + self, + task_id: Optional[str], + agent_id: str, + outcome: str, + exit_code: Optional[int] = None, + error: Optional[str] = None, + metadata: Optional[dict] = None, + db_path: Optional[Path] = None, + ) -> None: + """记录 task_attempt""" + # 广播 spawn 产生的 "broadcast" task_id 不记录 attempts,避免脏数据 + if task_id == "broadcast": + return + effective_db = db_path or self.db_path + if not task_id or not effective_db: + return + + try: + conn = get_connection(effective_db) + try: + conn.execute("BEGIN IMMEDIATE") + row = conn.execute( + "SELECT MAX(attempt_number) as max_a FROM task_attempts WHERE task_id=?", + (task_id,), + ).fetchone() + attempt_number = (row["max_a"] or 0) + 1 + + meta = metadata or {} + if error: + meta["error"] = error + conn.execute( + "INSERT INTO task_attempts " + "(task_id, attempt_number, agent, outcome, exit_code, metadata, completed_at) " + "VALUES (?,?,?,?,?,?,datetime('now'))", + (task_id, attempt_number, agent_id, outcome, + exit_code, json.dumps(meta)), + ) + conn.execute( + "INSERT INTO events (task_id, agent, event_type, detail) VALUES (?,?,?,?)", + (task_id, agent_id, + "agent_completed" if outcome == "completed" else "daemon_tick", + json.dumps({"outcome": outcome, "attempt": attempt_number})), + ) + conn.commit() + finally: + conn.close() + except Exception: + logger.exception("Failed to record attempt for task %s", task_id) + + def get_session(self, session_id: str) -> Optional[Dict[str, Any]]: + """获取 session 信息""" + return self._sessions.get(session_id) + + def get_session_by_agent(self, agent_id: str) -> Optional[Dict[str, Any]]: + """v2.7.2: 根据 agent_id 获取活跃 session 信息(用于进程存活性检查)""" + for sid, info in self._sessions.items(): + if info.get("agent_id") == agent_id and info.get( + "status") == "running": + return info + return None + + def cleanup_session(self, session_id: str) -> None: + """清理 session""" + if session_id in self._sessions: + session = self._sessions[session_id] + task_id = session.get("task_id") + del self._sessions[session_id] + # 清理 B2 compact 等待计数器 + if task_id and task_id in self._compact_waits: + del self._compact_waits[task_id] diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py new file mode 100644 index 0000000..4ecf503 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py @@ -0,0 +1,512 @@ +"""toolchain_handler.py - 工具链事件 handler。 + +处理 Gitea Webhook 事件(CI 失败、Review 请求、Issue 指派等)。 +L2 引擎层强约束:输入(结构化步骤)+ 执行(Red Flags)+ 输出(action_report 验证)。 +""" +from __future__ import annotations + +import json +import logging +import urllib.request +from pathlib import Path +from typing import Dict, List + +from src.daemon.base_task_handler import BaseTaskHandler, VerifyResult +from src.daemon.prompt_composer import PromptComposer, PromptContext +from src.daemon.toolchain_templates import render_template, _TEMPLATE_MAP +from src.blackboard.db import get_connection + +logger = logging.getLogger("moziplus-v2.handler.toolchain") + +# --------------------------------------------------------------------------- +# Gitea API 配置 +# --------------------------------------------------------------------------- + +_GITEA_BASE = "http://192.168.2.154:3000/api/v1" +_GITEA_TOKEN = "a6d596b826f4bfeaf983ef4d25ac25dab95bbc4e" + +# 业务失败连续次数阈值,超过则升级为系统失败 +_BUSINESS_FAIL_THRESHOLD = 3 + +# action_type → action_hint 映射 +_ACTION_HINTS: Dict[str, str] = { + "review_result": "你收到一个 Review 结果通知,这是一个需要你执行动作的事件(不是纯通知)。", + "review_request": "你收到一个 Review 请求,这是一个需要你审查并提交 Review 的事件。", + "review_updated": "你收到一个 PR 更新通知,这是一个需要你重新审查修改部分的事件。", + "review_comment": "你收到一个 Review 评论,这是一个需要你查看并响应的事件。", + "ci_failure": "你收到一个 CI 失败通知,这是一个需要你修复失败测试的事件。", + "issue_assigned": "你收到一个 Issue 指派,这是一个需要你编码实现的事件。", + "deploy_failure": "你收到一个部署失败通知,这是一个需要你排查并修复的事件。", + "mention": "你收到一个 @mention 通知,这是一个需要你按指引响应的事件。", + "review_merged": "你收到一个 PR 合并通知。这是一条纯通知,阅读即可。", + "infrastructure_failure": "你收到一个基础设施问题报告,请排查并修复。", +} + + +# --------------------------------------------------------------------------- +# Toolchain PromptSections +# --------------------------------------------------------------------------- + +class ToolchainContextSection: + """事件类型 + 事件详情 + 结构化步骤 + action_hint(priority=10)""" + + name: str = "toolchain_context" + priority: int = 10 + + def render(self, context: PromptContext) -> str: + event_type = context.event_type + event_data: Dict = context.event_data or {} + + # Part 1: 事件信息(现有模板引擎) + if event_type in _TEMPLATE_MAP: + variables = {k: str(v) for k, v in event_data.items()} + event_text = render_template(event_type, variables) + else: + lines = ["## 工具链事件", ""] + lines.append(f"- **事件类型**: {event_type or '未知'}") + if event_data: + lines.append("- **事件详情**:") + for key, value in event_data.items(): + lines.append(f" - {key}: {value}") + lines.append("") + event_text = "\n".join(lines) + + # Part 2: 结构化编号步骤(新增,从 action_steps 渲染) + steps: List[str] = context.action_steps or [] + if steps: + step_lines = ["", "### 必须执行的步骤", ""] + for i, step in enumerate(steps, 1): + step_lines.append(f"{i}. {step}") + steps_text = "\n".join(step_lines) + else: + steps_text = "" + + # Part 3: action 指引(新增,按 action_type 选择) + action_hint = _ACTION_HINTS.get( + context.action_type, + "你收到一个工具链事件,这是一个需要你执行动作的事件。", + ) + + return f"{action_hint}\n\n{event_text}{steps_text}" + + def should_include(self, context: PromptContext) -> bool: + return True + + +class ToolchainApiSection: + """API 操作指令(priority=40)-- action_report 提交指引""" + + name: str = "toolchain_api" + priority: int = 40 + + API_HOST = "localhost:8083" + + def render(self, context: PromptContext) -> str: + task_id = context.task_id + project_id = context.project_id + agent_id = context.agent_id + + lines = [ + "## API 操作指令", + "", + f"项目 ID: `{project_id}`", + f"任务 ID: `{task_id}`", + "", + "### 完成后必须提交 action report", + "", + "执行完所有步骤后,必须提交 action report:", + "```bash", + f'curl -s -X POST "http://{self.API_HOST}/api/projects/{project_id}/tasks/{task_id}/comments" \\', + ' -H "Content-Type: application/json" \\', + f' -d \'{{"author": "{agent_id}", "comment_type": "action_report", "body": "简要描述你执行了什么操作及结果"}}\'', + "```", + "", + "⚠️ 不提交 action report 的任务会被标记为 failed。", + "", + "### 提交产出", + "", + "如有产出(如 review 结果、修复方案),提交到任务 outputs:", + "```bash", + f'curl -s -X POST "http://{self.API_HOST}/api/projects/{project_id}/tasks/{task_id}/outputs" \\', + ' -H "Content-Type: application/json" \\', + ' -d \'{"content": "<你的产出内容>", "type": "text"}\'', + "```", + "", + "### 需要其他角色支持时", + "", + "如果在执行过程中需要其他角色协助(如缺数据、需要审批等),在关联的 PR/Issue 上创建 comment @对方:", + "```bash", + f'curl -s -X POST "{_GITEA_BASE}/repos/{{repo}}/issues/{{pr_number}}/comments" \\', + f' -H "Authorization: token " \\', + ' -H "Content-Type: application/json" \\', + ' -d \'{"body": "@{agent-id} 需要你的支持:{描述问题}"}\'', + "```", + "", + "⚠️ 不要使用 Mail API(飞鸽传书)。所有协作通过 Gitea 留痕。", + "", + ] + return "\n".join(lines) + + def should_include(self, context: PromptContext) -> bool: + return True + + +class ToolchainConstraintsSection: + """硬约束 + Red Flags(priority=50)""" + + name: str = "toolchain_constraints" + priority: int = 50 + + def render(self, context: PromptContext) -> str: + lines = [ + "## 硬约束(必须遵守)", + "", + "⚠️ 以下是强制要求,不是建议或参考。违反任何一条都会导致任务失败。", + "", + "### 1. 必须按步骤执行", + '- 检查上方“必须执行的步骤”列表', + '- 逐条执行每个步骤,不可跳过', + '- 不要只读不做——这不是纯通知', + "", + "### 2. 必须提交 action report", + '- 执行完所有步骤后,必须提交 action report', + "- 提交方式:POST comment(comment_type='action_report')", + '- 报告内容:简要描述你执行了什么操作、结果如何', + '- ⚠️ 不提交 action report 的任务会被标记为 failed', + "", + "### 3. 不要执行任何状态转换命令", + '- 不要手动标 working/done/review/failed,系统会自动处理', + "", + "### 4. 不需要回复", + '- action report 就是你的完成凭证', + '- 不要发送 Mail(飞鸽传书),你的所有操作在 toolchain 流程内完成', + "", + "### 5. 所有协作通过 Gitea 完成", + '- 如果遇到问题需要其他角色支持,在关联的 PR/Issue 上创建 comment @对方', + '- 不要使用 Mail API(飞鸽传书)发送消息', + '- 你的所有操作都在 toolchain 流程内,通过 Gitea 留痕', + "", + "### Red Flags(如果脑海中出现以下想法,说明你错了)", + "", + '| Agent 想法 | Red Flag 驳回 |', + '|------------|--------------|', + '| “这个通知看看就行了” | ❌ 错!这是 action 指令,必须执行步骤列表中的每一项 |', + '| “我不需要做任何事” | ❌ 错!检查“必须执行的步骤”列表,每一步都要执行 |', + '| “先放着等会处理” | ❌ 错!立即执行,不要推迟 |', + '| “我已经知道了” | ❌ 知道不等于执行。执行步骤 + 提交 action report 才算完成 |', + '| “步骤太多了,选几个做就行” | ❌ 错!必须逐条执行,不可跳过 |', + '| “这个步骤不适用于当前情况” | ❌ 如果确实不适用,在 action report 中说明原因,但其他步骤必须执行 |', + "", + ] + return "\n".join(lines) + + def should_include(self, context: PromptContext) -> bool: + return True + + +# --------------------------------------------------------------------------- +# ToolchainHandler +# --------------------------------------------------------------------------- + +class ToolchainHandler(BaseTaskHandler): + """工具链事件 handler。""" + + task_type = "toolchain" + virtual_project = "_toolchain" + display_name = "工具链事件" + + def target_success_status(self) -> str: + return "done" + + def pre_spawn(self, task_id: str, db_path: Path) -> bool: + """auto_working:pending → working""" + return self._auto_mark_working(task_id, db_path) + + def get_sections(self) -> list: + """返回 3 个 Toolchain PromptSection 实例""" + return [ + ToolchainContextSection(), + ToolchainApiSection(), + ToolchainConstraintsSection(), + ] + + def build_prompt(self, context: PromptContext) -> str: + """通过 PromptComposer 拼装 sections 为最终 prompt""" + composer = PromptComposer() + composer.add_many(self.get_sections()) + return composer.compose(context) + + def verify_completion(self, task_id: str, db_path: Path) -> VerifyResult: + """检查 action report(精确验证)+ 三层 fallback""" + try: + conn = get_connection(db_path) + try: + # 特殊处理:infrastructure_failure 始终通过(防递归) + row = conn.execute( + "SELECT must_haves FROM tasks WHERE id=?", (task_id,) + ).fetchone() + if row and row["must_haves"]: + try: + meta = json.loads(row["must_haves"]) + except Exception: + meta = {} + if meta.get("action_type") == "infrastructure_failure": + return VerifyResult(True, "infrastructure_passthrough", + "infrastructure_failure auto-pass") + + # 特殊处理:review_merged 始终通过(纯通知) + if meta.get("action_type") == "review_merged": + return VerifyResult(True, "merged_passthrough", + "review_merged auto-pass") + + # 1. 优先检查 action_report comment + report_row = conn.execute( + "SELECT id FROM comments WHERE task_id=? " + "AND comment_type='action_report' LIMIT 1", + (task_id,) + ).fetchone() + if report_row: + return VerifyResult(True, "has_action_report", "action_report found") + + # 2. fallback:检查 output(向后兼容) + output_count = conn.execute( + "SELECT COUNT(*) FROM outputs WHERE task_id=?", (task_id,) + ).fetchone()[0] + if output_count > 0: + return VerifyResult(True, "has_output", f"output_count={output_count}") + + # 3. fallback:检查有实质内容的 comment(向后兼容) + comment_count = conn.execute( + "SELECT COUNT(*) FROM comments WHERE task_id=? " + "AND author != 'system' AND LENGTH(body) >= 20", + (task_id,) + ).fetchone()[0] + if comment_count > 0: + return VerifyResult(True, "has_comment", f"comment_count={comment_count}") + + return VerifyResult(False, "no_action", + "no action_report, no output, no valid comment") + finally: + conn.close() + except Exception as e: + logger.error("Toolchain %s: verify error: %s", task_id, e) + return VerifyResult(False, "verify_error", str(e)) + + def on_failure(self, task_id: str, agent_id: str, + db_path: Path, verify: VerifyResult) -> None: + """验证失败 → 三分路处理(业务/系统/基础设施)""" + self._mark_task_status(db_path, task_id, "failed") + logger.info("Toolchain %s: verify failed (%s), marked failed", + task_id, verify.reason) + + # 读取 must_haves 获取事件上下文 + meta = {} + try: + conn = get_connection(db_path) + row = conn.execute( + "SELECT must_haves FROM tasks WHERE id=?", (task_id,) + ).fetchone() + if row and row["must_haves"]: + meta = json.loads(row["must_haves"]) + # 统计该 task 的业务失败次数 + fail_count = conn.execute( + "SELECT COUNT(*) FROM events WHERE task_id=? " + "AND event_type='status_change' AND payload LIKE '%failed%'", + (task_id,) + ).fetchone()[0] + conn.close() + except Exception: + fail_count = 0 + + action_type = meta.get("action_type", "") + context_data = meta.get("context", {}) + assignee = meta.get("assignee", "") or meta.get("from", "") + + # 三分路决策 + route = self._classify_failure(verify, fail_count) + + if route == "business": + self._handle_business_failure( + task_id, agent_id, verify, action_type, context_data, assignee, db_path) + elif route == "system": + self._handle_system_failure( + task_id, agent_id, verify, action_type, context_data, db_path) + else: # infrastructure + self._handle_infrastructure_failure( + task_id, agent_id, verify, db_path) + + def _classify_failure(self, verify: VerifyResult, fail_count: int) -> str: + """分类失败类型:business / system / infrastructure""" + # verify_error 或 DB 不可用 → 基础设施失败 + if verify.reason == "verify_error": + return "infrastructure" + # 连续业务失败超过阈值 → 升级为系统失败 + if fail_count >= _BUSINESS_FAIL_THRESHOLD: + return "system" + # 默认:业务失败 + return "business" + + def _handle_business_failure( + self, task_id: str, agent_id: str, verify: VerifyResult, + action_type: str, context_data: dict, assignee: str, + db_path: Path, + ) -> None: + """业务失败 → 在关联 PR/Issue 上创建 comment @原始 assignee""" + repo = context_data.get("repo", "") + pr_number = context_data.get("pr_number") or context_data.get("issue_number", "") + + if repo and pr_number: + comment_body = ( + f"@{assignee or agent_id} 工具链任务执行失败\n\n" + f"任务 ID: {task_id}\n" + f"失败原因: {verify.reason}\n" + f"证据: {verify.evidence}\n\n" + f"请检查黑板任务并处理。" + ) + success = self._create_gitea_comment(repo, pr_number, comment_body) + if success: + logger.info("Toolchain %s: business failure → Gitea comment on %s#%s", + task_id, repo, pr_number) + return + # Gitea API failed → escalate to system failure + logger.warning( + "Toolchain %s: Gitea comment failed, escalating to system failure", + task_id) + self._handle_system_failure( + task_id, agent_id, verify, action_type, context_data, db_path) + else: + # 没有 PR/Issue 关联 → fallback 到系统失败 + logger.warning( + "Toolchain %s: no PR/Issue context for business failure, " + "escalating to system failure", task_id) + self._handle_system_failure( + task_id, agent_id, verify, action_type, context_data, db_path) + + def _handle_system_failure( + self, task_id: str, agent_id: str, verify: VerifyResult, + action_type: str, context_data: dict, db_path: Path, + ) -> None: + """系统失败 → 创建 Gitea Issue @pangtong-fujunshi""" + repo = context_data.get("repo", "sanguo/sanguo_moziplus_v2") + title = f"[toolchain-handler] 工具链事件处理失败: {task_id}" + body = ( + f"任务 {task_id} 验证失败\n\n" + f"事件类型: {action_type or '未知'}\n" + f"失败原因: {verify.reason}\n" + f"证据: {verify.evidence}\n\n" + f"@pangtong-fujunshi 请检查黑板任务并手动处理。" + ) + + # 尝试在 Gitea 创建 Issue + created = self._create_gitea_issue(repo, title, body, ["pangtong-fujunshi"]) + if created: + logger.info("Toolchain %s: system failure → Gitea Issue created on %s", + task_id, repo) + else: + # Gitea API 不可用 → 基础设施失败 + logger.error( + "Toolchain %s: Gitea API unavailable, escalating to infrastructure failure", + task_id) + self._handle_infrastructure_failure( + task_id, agent_id, verify, db_path) + + def _handle_infrastructure_failure( + self, task_id: str, agent_id: str, + verify: VerifyResult, db_path: Path, + ) -> None: + """基础设施失败 → _send_toolchain_task @jiangwei-infra(防递归)""" + # 直接在 _toolchain DB 创建 task(不走 Gitea webhook) + try: + from src.api.toolchain_routes import _send_toolchain_task + _send_toolchain_task( + to_agent="jiangwei-infra", + title=f"[基础设施] Gitea API 不可用 - {task_id}", + description=( + f"Gitea API 不可用,原任务 {task_id} 无法通过正常路径处理。\n" + f"请检查 Gitea 服务状态和网络连通性。" + ), + event_type="infrastructure_failure", + action_type="infrastructure_failure", + steps=[ + "检查 Gitea 服务状态(http://192.168.2.154:3000)", + "检查网络连通性", + "恢复后提交 action report", + ], + context_data={"original_task_id": task_id, "verify_reason": verify.reason}, + source="toolchain_handler", + ) + logger.info("Toolchain %s: infrastructure failure → task created for jiangwei-infra", + task_id) + except Exception as e: + logger.error( + "Toolchain %s: failed to create infrastructure_failure task: %s", + task_id, e) + + # ----------------------------------------------------------------------- + # Gitea API 辅助 + # ----------------------------------------------------------------------- + + def _create_gitea_comment( + self, repo: str, pr_number: int, body: str, + ) -> bool: + """在 PR/Issue 上创建 comment。返回是否成功。""" + payload = json.dumps({"body": body}, ensure_ascii=False).encode("utf-8") + try: + req = urllib.request.Request( + f"{_GITEA_BASE}/repos/{repo}/issues/{pr_number}/comments", + data=payload, + headers={ + "Authorization": f"token {_GITEA_TOKEN}", + "Content-Type": "application/json", + }, + ) + urllib.request.urlopen(req, timeout=5) + return True + except Exception as e: + logger.warning("Gitea comment failed on %s#%s: %s", repo, pr_number, e) + return False + + def _create_gitea_issue( + self, repo: str, title: str, body: str, + assignees: list = None, + ) -> bool: + """创建 Gitea Issue。返回是否成功。""" + data = {"title": title, "body": body} + if assignees: + data["assignees"] = assignees + payload = json.dumps(data, ensure_ascii=False).encode("utf-8") + try: + req = urllib.request.Request( + f"{_GITEA_BASE}/repos/{repo}/issues", + data=payload, + headers={ + "Authorization": f"token {_GITEA_TOKEN}", + "Content-Type": "application/json", + }, + ) + urllib.request.urlopen(req, timeout=5) + return True + except Exception as e: + logger.warning("Gitea create issue failed on %s: %s", repo, e) + return False + + # ----------------------------------------------------------------------- + # 兼容:保留旧方法签名(但不再被 on_failure 调用) + # ----------------------------------------------------------------------- + + def _build_gitea_links(self, event_type: str, event_data: dict) -> str: + """根据事件类型构建 Gitea 链接。""" + links = [] + repo = event_data.get("repo", "") + base_url = "http://192.168.2.154:3000" + + if "pr_number" in event_data: + links.append(f"PR: {base_url}/{repo}/pulls/{event_data['pr_number']}") + if "issue_number" in event_data: + links.append(f"Issue: {base_url}/{repo}/issues/{event_data['issue_number']}") + if "commit" in event_data: + links.append(f"Commit: {base_url}/{repo}/commit/{event_data['commit']}") + if "branch" in event_data and "commit" not in event_data: + links.append(f"分支: {event_data['branch']}") + + return "\n".join(links) if links else "(无法提取链接,请检查黑板任务详情)" diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py new file mode 100644 index 0000000..44ab599 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py @@ -0,0 +1,89 @@ +"""工具链事件模板引擎(Toolchain Event Hub) + +加载 templates/toolchain/ 下的 Markdown 模板,提供 {variable} 占位符渲染。 +""" + +from __future__ import annotations + +import logging +from collections import defaultdict +from pathlib import Path +from typing import Dict + +logger = logging.getLogger(__name__) + +TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "toolchain" + +# 模板文件名映射 +_TEMPLATE_MAP: Dict[str, str] = { + "review_request": "review_request.md", + "review_result": "review_result.md", + "issue_assigned": "issue_assigned.md", + "ci_failure": "ci_failure.md", + "deploy_failure": "deploy_failure.md", + "review_updated": "review_updated.md", + "review_comment": "review_comment.md", + "review_merged": "review_merged.md", + "mention": "mention.md", +} + +# 模板缓存 +_template_cache: Dict[str, str] = {} + + +def _load_template(name: str) -> str: + """加载并缓存模板文件内容。 + + Args: + name: 模板名称(不含 .md 后缀) + + Returns: + 模板文本内容 + + Raises: + FileNotFoundError: 模板文件不存在 + """ + if name in _template_cache: + return _template_cache[name] + + filename = _TEMPLATE_MAP.get(name) + if not filename: + raise ValueError(f"Unknown template: {name}") + + path = TEMPLATES_DIR / filename + if not path.exists(): + raise FileNotFoundError(f"Template not found: {path}") + + content = path.read_text(encoding="utf-8") + _template_cache[name] = content + logger.debug("Loaded template: %s (%d bytes)", name, len(content)) + return content + + +def _escape_braces(value: str) -> str: + """转义花括号防止 format_map 报错""" + return str(value).replace("{", "{{").replace("}", "}}") + + +def render_template(name: str, variables: Dict[str, str]) -> str: + """渲染模板,将 {variable} 占位符替换为实际值。 + + 使用 defaultdict(str) 确保未提供的变量替换为空字符串而非报错。 + + Args: + name: 模板名称 + variables: 变量字典 + + Returns: + 渲染后的文本 + """ + template_text = _load_template(name) + # 先对所有变量值转义花括号,防止 format_map 报错 + escaped_vars = {k: _escape_braces(v) for k, v in variables.items()} + safe_vars: Dict[str, str] = defaultdict(str, escaped_vars) + return template_text.format_map(safe_vars) + + +def clear_cache() -> None: + """清空模板缓存(用于测试或热更新)""" + _template_cache.clear() diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md b/~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md new file mode 100644 index 0000000..22d2895 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md @@ -0,0 +1,16 @@ +{mention_type}通知 + +来源: {source_type} {source_url} +评论者: {commenter} +意图: {intent_hint} +内容: +{content_snippet} + +📋 获取完整上下文: +1. 查看{source_type}详情: GET {gitea_api}/repos/{repo}/{source_detail_api_path} +2. 查看评论列表: GET {gitea_api}/repos/{repo}/{source_comments_api_path} + +📌 响应指引: +{response_guidance} + +完成后按指引操作。 diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py b/~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py new file mode 100644 index 0000000..0a32cc7 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py @@ -0,0 +1,129 @@ +"""mention_utils 单元测试 — §25.7 覆盖。""" + +import pytest + +from src.api.mention_utils import ( + extract_mentions, + should_suppress_mention, + infer_intent, +) + + +# --------------------------------------------------------------------------- +# extract_mentions +# --------------------------------------------------------------------------- + +class TestExtractMentions: + """测试 @mention 提取逻辑。""" + + def test_exact_match(self): + """@zhangfei-dev 精确匹配。""" + assert extract_mentions("@zhangfei-dev 请看一下", "someone") == ["zhangfei-dev"] + + def test_chinese_alias(self): + """@张飞 中文别名匹配。""" + assert extract_mentions("@张飞 帮忙看看", "someone") == ["zhangfei-dev"] + + def test_english_short_name(self): + """@zhangfei 英文短名匹配。""" + assert extract_mentions("@zhangfei 快来", "someone") == ["zhangfei-dev"] + + def test_prefix_unique(self): + """@zhangf 前缀唯一匹配。""" + assert extract_mentions("@zhangf 来一下", "someone") == ["zhangfei-dev"] + + def test_prefix_ambiguous_no_match(self): + """@z 前缀模糊,多个候选,不匹配。""" + assert extract_mentions("@z 看看", "someone") == [] + + def test_dedup_same_person(self): + """@张飞 @zhangfei-dev 同时出现去重。""" + result = extract_mentions("@张飞 @zhangfei-dev 来一下", "someone") + assert result == ["zhangfei-dev"] + + def test_exclude_self(self): + """@zhangfei-dev 排除自己(sender=zhangfei-dev)。""" + assert extract_mentions("@zhangfei-dev 自己说", "zhangfei-dev") == [] + + def test_unknown_person(self): + """@unknown 不匹配任何 Agent。""" + assert extract_mentions("@unknown 你好", "someone") == [] + + def test_multiple_mentions(self): + """多个 @mention 返回多个 Agent。""" + result = set(extract_mentions("@张飞 @关羽 来讨论", "someone")) + assert result == {"zhangfei-dev", "guanyu-dev"} + + def test_mention_with_hyphen_in_middle(self): + """@mention 后面紧跟标点也能识别。""" + result = extract_mentions("@赵云,请看下", "someone") + assert result == ["zhaoyun-data"] + + +# --------------------------------------------------------------------------- +# should_suppress_mention +# --------------------------------------------------------------------------- + +class TestShouldSuppressMention: + """测试 @mention 通知抑制逻辑。""" + + def test_suppress_when_in_list(self): + """被提及者在自动通知列表中 → 抑制。""" + assert should_suppress_mention("zhangfei-dev", ["zhangfei-dev", "guanyu-dev"]) is True + + def test_not_suppress_when_not_in_list(self): + """被提及者不在自动通知列表中 → 不抑制。""" + assert should_suppress_mention("zhangfei-dev", ["guanyu-dev"]) is False + + def test_suppress_empty_list(self): + """自动通知列表为空 → 不抑制。""" + assert should_suppress_mention("zhangfei-dev", []) is False + + +# --------------------------------------------------------------------------- +# infer_intent +# --------------------------------------------------------------------------- + +class TestInferIntent: + """测试意图推断逻辑。 + + 优先级:assign → collaborate → help → notify(默认) + """ + + def test_help_question_mark(self): + """疑问句 → help。""" + assert infer_intent("@赵云 数据格式是什么?") == "help" + + def test_notify_plain_mention(self): + """纯通知(无关键词) → notify。""" + assert infer_intent("@关羽 这个 PR 涉及风控变更") == "notify" + + def test_collaborate_please_help(self): + """'请帮忙' → collaborate(NOT help!)。""" + assert infer_intent("@庞统 请帮忙澄清需求") == "collaborate" + + def test_assign_keywords(self): + """'交给你' → assign。""" + assert infer_intent("@张飞 前端部分交给你") == "assign" + + def test_help_how_to(self): + """'如何' → help。""" + assert infer_intent("@姜维 如何部署这个服务") == "help" + + def test_collaborate_please_review(self): + """'请review' → collaborate。""" + assert infer_intent("@司马懿 请review 这个方案") == "collaborate" + + def test_notify_default(self): + """无任何关键词 → notify。""" + assert infer_intent("@赵云 已更新数据") == "notify" + + def test_assign_takes_priority_over_help(self): + """assign 关键词优先于 help 关键词。""" + # "交给" in body → assign, even though "?" also present + assert infer_intent("@张飞 这个模块交给你,有问题?") == "assign" + + def test_collaborate_takes_priority_over_help(self): + """collaborate 关键词优先于 help 关键词。""" + # "请帮忙" in body → collaborate, even though "?" absent + assert infer_intent("@赵云 请帮忙看看数据") == "collaborate" -- 2.45.4 From a5d5d2d9743e0203e4056e9eb32579a6c7e8377c Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sat, 13 Jun 2026 23:43:20 +0800 Subject: [PATCH 03/10] =?UTF-8?q?fix:=20P0=20token=20=E7=8E=AF=E5=A2=83?= =?UTF-8?q?=E5=8F=98=E9=87=8F=20+=20P1=20fail=5Fcount=20=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E7=AE=80=E5=8C=96=EF=BC=88=E5=A7=9C=E7=BB=B4=20Review=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/daemon/toolchain_handler.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/src/daemon/toolchain_handler.py b/src/daemon/toolchain_handler.py index 65bf8a6..1d1b58e 100644 --- a/src/daemon/toolchain_handler.py +++ b/src/daemon/toolchain_handler.py @@ -7,6 +7,7 @@ from __future__ import annotations import json import logging +import os import urllib.request from pathlib import Path from typing import Dict, List @@ -23,10 +24,7 @@ logger = logging.getLogger("moziplus-v2.handler.toolchain") # --------------------------------------------------------------------------- _GITEA_BASE = "http://192.168.2.154:3000/api/v1" -_GITEA_TOKEN = "a6d596b826f4bfeaf983ef4d25ac25dab95bbc4e" - -# 业务失败连续次数阈值,超过则升级为系统失败 -_BUSINESS_FAIL_THRESHOLD = 3 +_GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "") # action_type → action_hint 映射 _ACTION_HINTS: Dict[str, str] = { @@ -308,22 +306,16 @@ class ToolchainHandler(BaseTaskHandler): ).fetchone() if row and row["must_haves"]: meta = json.loads(row["must_haves"]) - # 统计该 task 的业务失败次数 - fail_count = conn.execute( - "SELECT COUNT(*) FROM events WHERE task_id=? " - "AND event_type='status_change' AND payload LIKE '%failed%'", - (task_id,) - ).fetchone()[0] conn.close() except Exception: - fail_count = 0 + pass action_type = meta.get("action_type", "") context_data = meta.get("context", {}) assignee = meta.get("assignee", "") or meta.get("from", "") # 三分路决策 - route = self._classify_failure(verify, fail_count) + route = self._classify_failure(verify) if route == "business": self._handle_business_failure( @@ -335,14 +327,11 @@ class ToolchainHandler(BaseTaskHandler): self._handle_infrastructure_failure( task_id, agent_id, verify, db_path) - def _classify_failure(self, verify: VerifyResult, fail_count: int) -> str: + def _classify_failure(self, verify: VerifyResult) -> str: """分类失败类型:business / system / infrastructure""" # verify_error 或 DB 不可用 → 基础设施失败 if verify.reason == "verify_error": return "infrastructure" - # 连续业务失败超过阈值 → 升级为系统失败 - if fail_count >= _BUSINESS_FAIL_THRESHOLD: - return "system" # 默认:业务失败 return "business" @@ -450,6 +439,8 @@ class ToolchainHandler(BaseTaskHandler): self, repo: str, pr_number: int, body: str, ) -> bool: """在 PR/Issue 上创建 comment。返回是否成功。""" + if not _GITEA_TOKEN: + return False payload = json.dumps({"body": body}, ensure_ascii=False).encode("utf-8") try: req = urllib.request.Request( @@ -471,6 +462,8 @@ class ToolchainHandler(BaseTaskHandler): assignees: list = None, ) -> bool: """创建 Gitea Issue。返回是否成功。""" + if not _GITEA_TOKEN: + return False data = {"title": title, "body": body} if assignees: data["assignees"] = assignees -- 2.45.4 From 3bca794902b813bd78e03575809dc69b502b6cec Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sat, 13 Jun 2026 23:47:12 +0800 Subject: [PATCH 04/10] =?UTF-8?q?fix:=20M2=20on=5Ffailure=20assignee=20?= =?UTF-8?q?=E4=BB=8E=20tasks=20=E8=A1=A8=E8=AF=BB=E5=8F=96=20+=20infrastru?= =?UTF-8?q?cture=20=E9=98=B2=E9=80=92=E5=BD=92=EF=BC=88=E5=8F=B8=E9=A9=AC?= =?UTF-8?q?=E6=87=BF=20Review=20#65=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit M2: on_failure 中 assignee = meta.get('from', '') 读到 'system' 而非实际 Agent → 改为 SELECT must_haves, assignee FROM tasks 直接读 tasks.assignee 字段 附带:infrastructure failure 改为直接 DB INSERT,不走 _send_toolchain_task 防递归 --- src/daemon/toolchain_handler.py | 60 ++++++++++++++++--------- tests/unit/test_toolchain_handler_v2.py | 36 ++++++++++----- 2 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/daemon/toolchain_handler.py b/src/daemon/toolchain_handler.py index 1d1b58e..47deb42 100644 --- a/src/daemon/toolchain_handler.py +++ b/src/daemon/toolchain_handler.py @@ -297,22 +297,24 @@ class ToolchainHandler(BaseTaskHandler): logger.info("Toolchain %s: verify failed (%s), marked failed", task_id, verify.reason) - # 读取 must_haves 获取事件上下文 + # 读取 must_hives 获取事件上下文 + assignee 从 tasks 表读取 meta = {} + assignee = agent_id try: conn = get_connection(db_path) row = conn.execute( - "SELECT must_haves FROM tasks WHERE id=?", (task_id,) + "SELECT must_haves, assignee FROM tasks WHERE id=?", (task_id,) ).fetchone() - if row and row["must_haves"]: - meta = json.loads(row["must_haves"]) + if row: + if row["must_haves"]: + meta = json.loads(row["must_haves"]) + assignee = row["assignee"] or agent_id conn.close() except Exception: pass action_type = meta.get("action_type", "") context_data = meta.get("context", {}) - assignee = meta.get("assignee", "") or meta.get("from", "") # 三分路决策 route = self._classify_failure(verify) @@ -403,29 +405,43 @@ class ToolchainHandler(BaseTaskHandler): self, task_id: str, agent_id: str, verify: VerifyResult, db_path: Path, ) -> None: - """基础设施失败 → _send_toolchain_task @jiangwei-infra(防递归)""" - # 直接在 _toolchain DB 创建 task(不走 Gitea webhook) + """基础设施失败 → 直接在 _toolchain DB 创建 task @jiangwei-infra(防递归)""" try: - from src.api.toolchain_routes import _send_toolchain_task - _send_toolchain_task( - to_agent="jiangwei-infra", - title=f"[基础设施] Gitea API 不可用 - {task_id}", - description=( - f"Gitea API 不可用,原任务 {task_id} 无法通过正常路径处理。\n" - f"请检查 Gitea 服务状态和网络连通性。" - ), - event_type="infrastructure_failure", - action_type="infrastructure_failure", - steps=[ + from datetime import datetime + new_task_id = f"tc-{int(datetime.now().timestamp() * 1000)}" + must_hives = json.dumps({ + "event_type": "infrastructure_failure", + "action_type": "infrastructure_failure", + "steps": [ "检查 Gitea 服务状态(http://192.168.2.154:3000)", "检查网络连通性", "恢复后提交 action report", ], - context_data={"original_task_id": task_id, "verify_reason": verify.reason}, - source="toolchain_handler", + "context": {"original_task_id": task_id, "verify_reason": verify.reason}, + "from": "system", + "source": "toolchain_handler_on_failure", + }, ensure_ascii=False) + conn = get_connection(db_path) + conn.execute( + "INSERT INTO tasks (id, title, description, assignee, assigned_by, " + "must_haves, task_type, status) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + ( + new_task_id, + f"[基础设施] Gitea API 不可用 - {task_id}", + f"Gitea API 不可用,原任务 {task_id} 无法通过正常路径处理。\n" + f"请检查 Gitea 服务状态和网络连通性。", + "jiangwei-infra", + "system", + must_hives, + "toolchain", + "pending", + ) ) - logger.info("Toolchain %s: infrastructure failure → task created for jiangwei-infra", - task_id) + conn.commit() + conn.close() + logger.info( + "Toolchain %s: infrastructure failure → task %s created for jiangwei-infra", + task_id, new_task_id) except Exception as e: logger.error( "Toolchain %s: failed to create infrastructure_failure task: %s", diff --git a/tests/unit/test_toolchain_handler_v2.py b/tests/unit/test_toolchain_handler_v2.py index 3bcd311..620495b 100644 --- a/tests/unit/test_toolchain_handler_v2.py +++ b/tests/unit/test_toolchain_handler_v2.py @@ -398,12 +398,14 @@ class TestSendToolchainTask: class TestOnFailureRouting: def test_business_failure_creates_gitea_comment(self, handler, tmp_db): - """Business failure → Gitea PR comment""" + """Business failure → Gitea PR comment @task assignee (not must_hives field)""" + # S4: must_hives does NOT contain assignee — production data doesn't have it must_haves = json.dumps({ "action_type": "review_result", "context": {"repo": "sanguo/test", "pr_number": 42}, - "assignee": "zhangfei-dev", + "from": "system", }) + # assignee is set on the tasks table row (as production code writes it) _insert_task(tmp_db, "t-fail", must_haves) with patch.object(handler, "_create_gitea_comment") as mock_comment: @@ -414,9 +416,12 @@ class TestOnFailureRouting: call_args = mock_comment.call_args assert call_args[0][0] == "sanguo/test" assert call_args[0][1] == 42 + # M2: comment body should @ the task's assignee from tasks table + comment_body = call_args[0][2] + assert "@zhangfei-dev" in comment_body def test_infrastructure_failure_creates_task(self, handler, tmp_db): - """Infrastructure failure → _send_toolchain_task for jiangwei-infra""" + """Infrastructure failure → direct DB task for jiangwei-infra (no reverse dep)""" must_haves = json.dumps({ "action_type": "review_result", "context": {"repo": "sanguo/test", "pr_number": 42}, @@ -427,15 +432,22 @@ class TestOnFailureRouting: mock_comment.return_value = False # Gitea API down with patch.object(handler, "_create_gitea_issue") as mock_issue: mock_issue.return_value = False # Gitea API still down - with patch("src.api.toolchain_routes._send_toolchain_task") as mock_send: - mock_send.return_value = "tc-infra" - verify = VerifyResult(False, "no_action", "no action_report") - handler.on_failure("t-infra", "zhangfei-dev", tmp_db, verify) - # Should eventually try to create infrastructure_failure task - mock_send.assert_called() - call_kwargs = mock_send.call_args - assert call_kwargs[1]["action_type"] == "infrastructure_failure" - assert call_kwargs[1]["to_agent"] == "jiangwei-infra" + verify = VerifyResult(False, "no_action", "no action_report") + handler.on_failure("t-infra", "zhangfei-dev", tmp_db, verify) + + # S3: should directly INSERT into DB, not call _send_toolchain_task + # Verify a new task was created in DB for jiangwei-infra + conn = get_connection(tmp_db) + rows = conn.execute( + "SELECT * FROM tasks WHERE assignee=?", + ("jiangwei-infra",) + ).fetchall() + conn.close() + assert len(rows) >= 1, "No infrastructure_failure task created" + infra_task = rows[0] + assert infra_task["task_type"] == "toolchain" + meta = json.loads(infra_task["must_haves"]) + assert meta["action_type"] == "infrastructure_failure" # --------------------------------------------------------------------------- -- 2.45.4 From 6e6b52fe3ba7c3a304b33ff8864bb5d3b9978e6e Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sun, 14 Jun 2026 00:09:27 +0800 Subject: [PATCH 05/10] =?UTF-8?q?fix:=20asyncio.Lock=20=E6=87=92=E5=8A=A0?= =?UTF-8?q?=E8=BD=BD=E9=98=B2=20event=20loop=20=E5=85=B3=E9=97=AD=E5=90=8E?= =?UTF-8?q?=20import=20=E5=A4=B1=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/api/toolchain_routes.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/api/toolchain_routes.py b/src/api/toolchain_routes.py index e8c1e92..39ed51d 100644 --- a/src/api/toolchain_routes.py +++ b/src/api/toolchain_routes.py @@ -50,7 +50,15 @@ router = APIRouter(tags=["toolchain"]) _delivery_cache: Set[str] = set() _delivery_timestamps: List[Tuple[float, str]] = [] _TTL_SECONDS = 7 * 24 * 3600 -_idempotency_lock = asyncio.Lock() +_idempotency_lock: Optional[asyncio.Lock] = None + + +def _get_idempotency_lock() -> asyncio.Lock: + """懒加载 asyncio.Lock,避免模块级创建时 event loop 不存在(Python 3.9)。""" + global _idempotency_lock + if _idempotency_lock is None: + _idempotency_lock = asyncio.Lock() + return _idempotency_lock def _is_duplicate(event: str, delivery: str, @@ -1219,7 +1227,7 @@ async def gitea_webhook( # 2. 幂等检查(需要在 payload 解析后,以支持内容去重) if x_gitea_event and x_gitea_delivery: - async with _idempotency_lock: + async with _get_idempotency_lock(): if _is_duplicate(x_gitea_event, x_gitea_delivery, payload): logger.debug( "Duplicate webhook: %s/%s", -- 2.45.4 From 50d1d0b5e6ff222b5a46b0d5e9f0408cec77a4ab Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sun, 14 Jun 2026 00:11:23 +0800 Subject: [PATCH 06/10] chore: trigger CI retry -- 2.45.4 From 4ef9f68ff3ea932a713555b901ba70346eb12f1a Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sun, 14 Jun 2026 00:13:01 +0800 Subject: [PATCH 07/10] =?UTF-8?q?fix(ci):=20PYTHONPATH=3D.=20=E9=98=B2?= =?UTF-8?q?=E6=AD=A2=20runner=20=E7=8E=AF=E5=A2=83=E5=8A=A0=E8=BD=BD?= =?UTF-8?q?=E5=AE=89=E8=A3=85=E7=9B=AE=E5=BD=95=E6=97=A7=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitea/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 4b98af7..bf96ed3 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -47,7 +47,7 @@ jobs: - name: Run tests (exclude E2E) run: | - /tmp/ci-venv-test/bin/pytest tests/ -m "not e2e" -x -q + PYTHONPATH=. /tmp/ci-venv-test/bin/pytest tests/ -m "not e2e" -x -q # ── Job 3: CI 失败通知 ─────────────────────────────── # 使用 needs..result 直接判断,不查询 commit status API -- 2.45.4 From 925ebe855652353430fc806db3f8a794cb59973f Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sun, 14 Jun 2026 00:16:11 +0800 Subject: [PATCH 08/10] =?UTF-8?q?ci:=20=E5=8A=A0=20debug=20=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=E5=AE=9A=E4=BD=8D=20test=20failure=20=E6=A0=B9?= =?UTF-8?q?=E5=9B=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitea/workflows/ci.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index bf96ed3..2d78406 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -45,9 +45,19 @@ jobs: python3 -m venv /tmp/ci-venv-test /tmp/ci-venv-test/bin/pip install --quiet fastapi pydantic pyyaml uvicorn requests pytest pytest-asyncio httpx + - name: Debug environment + run: | + echo "PWD=$(pwd)" + echo "PYTHONPATH=$PYTHONPATH" + python3 -c "import sys; [print(p) for p in sys.path if 'sanguo' in p.lower() or 'openclaw' in p.lower()]" + grep -c "assignee = agent_id" src/daemon/toolchain_handler.py || true + grep -c "_BUSINESS_FAIL_THRESHOLD" src/daemon/toolchain_handler.py || true + - name: Run tests (exclude E2E) run: | - PYTHONPATH=. /tmp/ci-venv-test/bin/pytest tests/ -m "not e2e" -x -q + PYTHONPATH=$(pwd) /tmp/ci-venv-test/bin/pytest tests/ -m "not e2e" -x -q || \ + (echo '=== RETRY WITH VERBOSE ===' && \ + PYTHONPATH=$(pwd) /tmp/ci-venv-test/bin/pytest tests/ -m "not e2e" -x -v 2>&1 | tail -30) # ── Job 3: CI 失败通知 ─────────────────────────────── # 使用 needs..result 直接判断,不查询 commit status API -- 2.45.4 From fd3a889faebf436e8d0620f0a3781cb4992d6da7 Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sun, 14 Jun 2026 00:17:44 +0800 Subject: [PATCH 09/10] =?UTF-8?q?ci:=20=E6=AF=8F=E6=AC=A1=E6=B8=85=20venv?= =?UTF-8?q?=20=E9=98=B2=E6=AD=A2=E6=97=A7=E7=BC=93=E5=AD=98=E6=8D=9F?= =?UTF-8?q?=E5=9D=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitea/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 2d78406..5b1fbb8 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -42,6 +42,7 @@ jobs: - name: Setup Python run: | + rm -rf /tmp/ci-venv-test python3 -m venv /tmp/ci-venv-test /tmp/ci-venv-test/bin/pip install --quiet fastapi pydantic pyyaml uvicorn requests pytest pytest-asyncio httpx -- 2.45.4 From 976d9ce7c865034d59d19195d7bc7aa2639b0c07 Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sun, 14 Jun 2026 00:22:13 +0800 Subject: [PATCH 10/10] =?UTF-8?q?fix:=20M1=20git=20rm=20=E8=AF=AF=E6=8F=90?= =?UTF-8?q?=E4=BA=A4=E7=9A=84=E5=AE=89=E8=A3=85=E7=9B=AE=E5=BD=95=E6=96=87?= =?UTF-8?q?=E4=BB=B6=20+=20S1=20docstring=20=E4=BF=AE=E6=AD=A3=20+=20S2=20?= =?UTF-8?q?=E5=8E=BB=E6=8E=89=20CHECK=20=E7=BA=A6=E6=9D=9F=EF=BC=88?= =?UTF-8?q?=E5=8F=B8=E9=A9=AC=E6=87=BF=20Review=20#111=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/blackboard/db.py | 2 +- src/daemon/toolchain_handler.py | 2 +- .../src/api/mention_utils.py | 169 -- .../src/api/toolchain_routes.py | 1246 ---------- .../src/daemon/prompt_composer.py | 129 - .../sanguo_moziplus_v2/src/daemon/spawner.py | 2088 ----------------- .../src/daemon/toolchain_handler.py | 512 ---- .../src/daemon/toolchain_templates.py | 89 - .../templates/toolchain/mention.md | 16 - .../tests/unit/test_mention_utils.py | 129 - 10 files changed, 2 insertions(+), 4380 deletions(-) delete mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py delete mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py delete mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py delete mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py delete mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py delete mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py delete mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md delete mode 100644 ~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py diff --git a/src/blackboard/db.py b/src/blackboard/db.py index b56d8be..e92aaeb 100644 --- a/src/blackboard/db.py +++ b/src/blackboard/db.py @@ -293,7 +293,7 @@ _SCHEMA_STATEMENTS = [ id INTEGER PRIMARY KEY AUTOINCREMENT, task_id TEXT NOT NULL REFERENCES tasks(id), author TEXT NOT NULL, - comment_type TEXT NOT NULL DEFAULT 'general' CHECK (comment_type IN ('general','handoff','observation','review','rebuttal','rebuttal_response','debate_argument','debate_rebuttal','debate_judgment','action_report')), + comment_type TEXT NOT NULL DEFAULT 'general', body TEXT NOT NULL, mentions TEXT, created_at TEXT NOT NULL DEFAULT (datetime('now')) diff --git a/src/daemon/toolchain_handler.py b/src/daemon/toolchain_handler.py index 47deb42..e45736f 100644 --- a/src/daemon/toolchain_handler.py +++ b/src/daemon/toolchain_handler.py @@ -330,7 +330,7 @@ class ToolchainHandler(BaseTaskHandler): task_id, agent_id, verify, db_path) def _classify_failure(self, verify: VerifyResult) -> str: - """分类失败类型:business / system / infrastructure""" + """分类失败类型:business / infrastructure(system 通过升级到达)""" # verify_error 或 DB 不可用 → 基础设施失败 if verify.reason == "verify_error": return "infrastructure" diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py deleted file mode 100644 index 5922ff0..0000000 --- a/~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py +++ /dev/null @@ -1,169 +0,0 @@ -"""@mention 解析工具模块。供所有 toolchain handler 复用。""" - -import re -import logging -from typing import List, Set - -from src.config.agents import AGENT_IDS - -logger = logging.getLogger(__name__) - -# Gitea API 基地址常量(避免硬编码) -GITEA_API_BASE = "http://192.168.2.154:3000/api/v1" -GITEA_WEB_BASE = "http://192.168.2.154:3000" - -# Agent 别名映射 -# 规则: -# 1. 中文名(如"张飞")→ 完整 Agent ID -# 2. 英文短名(如"zhangfei")→ 完整 Agent ID -# 3. 前缀模糊匹配需唯一匹配(见 extract_mentions 假设 A2) -AGENT_ALIAS: dict[str, str] = { - # 中文名 - "张飞": "zhangfei-dev", - "关羽": "guanyu-dev", - "赵云": "zhaoyun-data", - "姜维": "jiangwei-infra", - "司马懿": "simayi-challenger", - "庞统": "pangtong-fujunshi", - # 字+号(常见写法) - "翼德": "zhangfei-dev", - "云长": "guanyu-dev", - "子龙": "zhaoyun-data", - "伯约": "jiangwei-infra", - "仲达": "simayi-challenger", - "士元": "pangtong-fujunshi", - # 英文短名 - "zhangfei": "zhangfei-dev", - "guanyu": "guanyu-dev", - "zhaoyun": "zhaoyun-data", - "jiangwei": "jiangwei-infra", - "simayi": "simayi-challenger", - "pangtong": "pangtong-fujunshi", -} - -# 正则:匹配 @后面跟着的合法 Agent 名(英文字母/中文/数字/连字符) -_MENTION_PATTERN = re.compile(r"@([a-zA-Z\u4e00-\u9fa5][a-zA-Z0-9\u4e00-\u9fff-]*)") - - -def extract_mentions(body: str, sender: str) -> list[str]: - """从文本中提取 @mention 的 Agent ID 列表。 - - Args: - body: 评论文本 - sender: 评论者 Gitea 用户名(用于排除自己 @自己) - - Returns: - 去重后的 Agent ID 列表 - - 匹配优先级:精确 > 别名 > 前缀模糊(需唯一匹配,多候选则跳过) - """ - candidates = _MENTION_PATTERN.findall(body) - result: Set[str] = set() - - for c in candidates: - # 1. 精确匹配(@zhangfei-dev) - if c in AGENT_IDS: - result.add(c) - # 2. 别名匹配(@张飞、@zhangfei) - elif c in AGENT_ALIAS: - result.add(AGENT_ALIAS[c]) - else: - # 3. 前缀模糊匹配(@zhangf → zhangfei-dev) - # 假设 A2:多个候选时不匹配,只 log warning - matches = [aid for aid in AGENT_IDS if aid.startswith(c)] - if len(matches) == 1: - result.add(matches[0]) - elif len(matches) > 1: - logger.warning( - "Prefix '%s' matched %d agents (%s), skipping ambiguous mention", - c, len(matches), matches) - - # 排除自己 @自己(假设 A1:Gitea login = Agent ID) - result.discard(sender) - return list(result) - - -def should_suppress_mention( - mentioned_agent: str, - auto_notify_targets: List[str], -) -> bool: - """判断 @mention 通知是否应被抑制(因为自动流转已通知同一人)。 - - Args: - mentioned_agent: 被 @的 Agent ID - auto_notify_targets: 本次事件自动流转已通知的目标列表 - - Returns: - True 表示应抑制(不发 @mention Mail) - """ - return mentioned_agent in auto_notify_targets - - -def infer_intent(body: str) -> str: - """从 @mention 内容推断意图。 - - Returns: - "help" | "notify" | "collaborate" | "assign" - """ - # 分配子任务关键词 - assign_keywords = ["交给", "分配", "负责", "认领", "做一下", "帮忙做", "implement"] - if any(kw in body for kw in assign_keywords): - return "assign" - - # 求助关键词(注意:"帮忙"已由 assign_keywords 的"帮忙做"覆盖,"请帮忙"由 collab_keywords 覆盖) - help_keywords = ["怎么", "如何", "?", "?", "什么", "哪个", "能否"] - if any(kw in body for kw in help_keywords): - return "help" - - # 协作请求关键词 - collab_keywords = ["请帮忙", "请协助", "请澄清", "请review", "请审查", "评估"] - if any(kw in body for kw in collab_keywords): - return "collaborate" - - # 默认为通知关注 - return "notify" - - -def _build_response_guidance( - intent: str, - gitea_api: str, - repo: str, - issue_number: int, - commenter: str, -) -> str: - """根据意图类型生成响应指引文本。""" - if intent == "help": - return ( - f"这是一条求助,请到 Gitea 评论回复:\n" - f"1. 获取评论上下文(上方 API)\n" - f"2. 组织回答\n" - f"3. 在 Gitea 评论回复: POST {gitea_api}/repos/{repo}/issues/{issue_number}/comments\n" - f' Body: {{"body": "你的回答内容"}}' - ) - elif intent == "notify": - return ( - f"这是一条通知,请查看并知晓。如有意见,可到 Gitea 评论:\n" - f"- 查看 Issue/PR 详情(上方 API)\n" - f"- 如有意见,评论回复: POST {gitea_api}/repos/{repo}/issues/{issue_number}/comments" - ) - elif intent == "collaborate": - return ( - f"这是一条协作请求,请评估后回复(评论或 Mail):\n" - f"1. 获取详情(上方 API)\n" - f"2. 评估可行性\n" - f"3a. 评论回复: POST {gitea_api}/repos/{repo}/issues/{issue_number}/comments\n" - f' Body: {{"body": "你的回复"}}\n' - f"3b. 或通过 Mail 回复评论者: {commenter}" - ) - elif intent == "assign": - return ( - f"这是一条任务分配,请认领并执行:\n" - f"1. 获取 Issue 详情(上方 API)\n" - f"2. 评估可行性\n" - f"3. 认领 Issue: POST {gitea_api}/repos/{repo}/issues/{issue_number}/assignees\n" - f' Body: {{"assignees": ["{{your_agent_id}}"]}}\n' - f"4. 执行任务\n" - f"5. 完成后更新 Issue 状态: PATCH {gitea_api}/repos/{repo}/issues/{issue_number}\n" - f' Body: {{"state": "closed"}}' - ) - return "请查看详情(上方 API)并按需回复。" diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py deleted file mode 100644 index 20cc655..0000000 --- a/~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py +++ /dev/null @@ -1,1246 +0,0 @@ -"""API 路由 — 工具链事件中枢(Toolchain Event Hub) - -接收 Gitea Webhook,翻译成 Mail 通知推送给 Agent。 - -端点: POST /webhook/gitea -支持事件: pull_request, pull_request_review, issues, issue_comment -""" - -from __future__ import annotations - -import asyncio -import hashlib -import hmac -import json -import logging -import os -import re -import time -from datetime import datetime -from pathlib import Path, PurePath -from typing import Any, Dict, List, Optional, Set, Tuple - -import httpx -from fastapi import APIRouter, Header, Request, Response - -from src.blackboard.db import init_db -from src.blackboard.models import Task -from src.blackboard.operations import Blackboard -from src.config.agents import AGENT_IDS -from src.api.mention_utils import ( - extract_mentions, - should_suppress_mention, - infer_intent, - _build_response_guidance, - GITEA_API_BASE, -) -from src.daemon.toolchain_templates import render_template -from src.utils import get_data_root - -logger = logging.getLogger(__name__) - -router = APIRouter(tags=["toolchain"]) - -# --------------------------------------------------------------------------- -# 幂等检查:内存 set,保留最近 7 天 -# --------------------------------------------------------------------------- -# 使用内存 set 而非 SQLite(设计文档原计划 SQLite,简化实现:daemon 重启不频繁, -# 重启后丢失可接受,Webhook 重试窗口内不会重复) - -_delivery_cache: Set[str] = set() -_delivery_timestamps: List[Tuple[float, str]] = [] -_TTL_SECONDS = 7 * 24 * 3600 -_idempotency_lock = asyncio.Lock() - - -def _is_duplicate(event: str, delivery: str, - payload: Optional[Dict[str, Any]] = None) -> bool: - """检查 Webhook 是否重复投递,自动清理过期条目。 - - 双重去重策略: - 1. delivery UUID 去重(标准幂等) - 2. payload 内容去重(应对 Gitea v1.23.4 的 webhookNotifier + actionsNotifier - 对同一 review 生成不同 UUID 的双投递问题) - """ - now = time.time() - # 清理过期条目 - while _delivery_timestamps and ( - now - _delivery_timestamps[0][0]) > _TTL_SECONDS: - _, key = _delivery_timestamps.pop(0) - _delivery_cache.discard(key) - - # 检查 delivery UUID 去重 - key = f"{event}-{delivery}" - if key in _delivery_cache: - return True - - # 检查 payload 内容去重(review 事件:同一 PR + 同一用户 + 同一内容) - # 注意:Gitea webhookNotifier 用 review.body,actionsNotifier 用 review.content - # 所以去重 key 需要同时取两个字段,确保两种格式生成相同 key - if payload and "review" in event: - pr_num = payload.get("pull_request", {}).get("number") - sender = payload.get("sender", {}).get("login") - review = payload.get("review", {}) - # 取 body 或 content,优先 body(webhookNotifier 格式) - content = review.get("body", "") or review.get("content", "") - content_hash = hashlib.sha256(content.encode()).hexdigest()[:16] - review_id = review.get("id", "") - content_key = f"content:{event}:{pr_num}:{sender}:{review_id}:{content_hash}" - if content_key in _delivery_cache: - logger.info( - "Content-based duplicate detected: %s PR#%s by %s", - event, - pr_num, - sender) - return True - _delivery_cache.add(content_key) - _delivery_timestamps.append((now, content_key)) - - _delivery_cache.add(key) - _delivery_timestamps.append((now, key)) - return False - - -# --------------------------------------------------------------------------- -# 签名验证 -# --------------------------------------------------------------------------- - -_WEBHOOK_SECRET: Optional[str] = os.environ.get("GITEA_WEBHOOK_SECRET") - - -def _verify_signature(body: bytes, signature: Optional[str]) -> bool: - """验证 HMAC-SHA256 签名。secret 为空时跳过验签。""" - if not _WEBHOOK_SECRET: - return True - if not signature: - return False - expected = hmac.new( - _WEBHOOK_SECRET.encode(), body, hashlib.sha256 - ).hexdigest() - return hmac.compare_digest(expected, signature) - - -# --------------------------------------------------------------------------- -# Gitea API 调用 -# --------------------------------------------------------------------------- - -_GITEA_TOKEN: str = os.environ.get("GITEA_TOKEN", "") -_GITEA_BASE = "http://192.168.2.154:3000/api/v1" - - -async def _fetch_pr_files(repo: str, pr_number: int) -> Tuple[List[str], str]: - """获取 PR 文件列表,含重试机制。 - - Returns: - (文件列表, 错误信息) — 成功时错误信息为空字符串 - """ - if not _GITEA_TOKEN: - return [], "GITEA_TOKEN 未配置" - - url = f"{_GITEA_BASE}/repos/{repo}/pulls/{pr_number}/files" - headers = {"Authorization": f"token {_GITEA_TOKEN}"} - last_error = "" - for attempt in range(3): - try: - async with httpx.AsyncClient(timeout=5.0) as client: - resp = await client.get(url, headers=headers) - resp.raise_for_status() - files: List[Dict[str, Any]] = resp.json() - return [f.get("filename", "") for f in files], "" - except Exception as e: - last_error = str(e) - if attempt < 2: - await asyncio.sleep(0.5 * (attempt + 1)) - logger.warning( - "Retry %d/3 fetching PR files: %s/pulls/%d", - attempt + 1, - repo, - pr_number) - logger.warning( - "Failed to fetch PR files after 3 retries: %s/pulls/%d - %s", - repo, - pr_number, - last_error) - return [], f"获取文件列表失败(重试3次): {last_error}" - - -# --------------------------------------------------------------------------- -# 风险级别判定 -# --------------------------------------------------------------------------- - -_HIGH_PATTERNS = [ - "**/spawner*", "**/ticker*", "**/dispatcher*", - "**/router*", "**/guardrails*", "**/strategy*", "**/risk*", -] - - -def _calc_risk_level(changed_files: List[str]) -> str: - """根据改动文件列表判定风险级别。""" - for filepath in changed_files: - for pattern in _HIGH_PATTERNS: - if PurePath(filepath).match(pattern): - return "high" - return "standard" - - -# --------------------------------------------------------------------------- -# Mail 创建 -# --------------------------------------------------------------------------- - - -MAIL_PROJECT_ID = "_mail" -TOOLCHAIN_PROJECT_ID = "_toolchain" - - -def _mail_db_path() -> Path: - """获取 Mail 数据库路径,确保目录存在。""" - root = get_data_root() - db = root / MAIL_PROJECT_ID / "blackboard.db" - db.parent.mkdir(parents=True, exist_ok=True) - init_db(db) - return db - - -def _toolchain_db_path() -> Path: - """获取 Toolchain 数据库路径,确保目录和表存在。""" - root = get_data_root() - db = root / TOOLCHAIN_PROJECT_ID / "blackboard.db" - db.parent.mkdir(parents=True, exist_ok=True) - init_db(db) - return db - - -def _send_toolchain_task( - to_agent: str, - title: str, - description: str, - event_type: str, - action_type: str, - steps: list, - context_data: dict | None = None, - source: str = "webhook", -) -> str: - """创建 Toolchain Task 并写入 _toolchain DB。 - - Args: - to_agent: 收件人 Agent ID - title: 任务标题 - description: 任务描述(模板渲染后的事件信息) - event_type: 事件类型(review_result / ci_failure / ...) - action_type: 动作分类(用于步骤选择和日志统计) - steps: 结构化编号步骤列表 - context_data: 事件上下文数据(PR 号、仓库名等) - source: 来源标识 - - Returns: - 创建的 Task ID - """ - if to_agent not in AGENT_IDS: - logger.warning("Unknown agent: %s, skipping toolchain task", to_agent) - return "" - - task_id = f"tc-{int(datetime.now().timestamp() * 1000)}" - must_hives = json.dumps({ - "event_type": event_type, - "action_type": action_type, - "steps": steps, - "context": context_data or {}, - "from": "system", - "source": source, - }, ensure_ascii=False) - - task = Task( - id=task_id, - title=title, - description=description, - assignee=to_agent, - assigned_by="system", - must_haves=must_hives, - task_type="toolchain", - status="pending", - ) - bb = Blackboard(_toolchain_db_path()) - bb.create_task(task) - logger.info( - "Toolchain task sent: %s → %s [%s] action_type=%s", - title[:40], to_agent, task_id, action_type, - ) - return task_id - - -def _send_mail( - to_agent: str, - title: str, - description: str, - source: str = "webhook", -) -> str: - """创建 Mail Task 并写入数据库。 - - Args: - to_agent: 收件人 Agent ID - title: 邮件标题 - description: 邮件正文 - source: 来源标识 - - Returns: - 创建的 Mail ID - - Raises: - Exception: 数据库写入失败 - """ - if to_agent not in AGENT_IDS: - logger.warning("Unknown agent: %s, skipping mail", to_agent) - return "" - - mail_id = f"mail-{int(datetime.now().timestamp() * 1000)}" - notify_meta = { - "type": "inform", - "performative": "inform", - "is_read": False, - "conversation_id": f"conv-{mail_id}", - "from": "system", - "source": source, - } - task = Task( - id=mail_id, - title=title, - description=description, - assignee=to_agent, - assigned_by="system", - must_haves=json.dumps(notify_meta, ensure_ascii=False), - task_type="mail", - status="pending", - ) - bb = Blackboard(_mail_db_path()) - bb.create_task(task) - logger.info("Mail sent: %s → %s [%s]", title[:40], to_agent, mail_id) - return mail_id - - -# --------------------------------------------------------------------------- -# 辅助:从 payload 提取仓库全名 -# --------------------------------------------------------------------------- - - -def _repo_fullname(payload: Dict[str, Any]) -> str: - """从 Webhook payload 提取仓库全名(owner/repo)。""" - repo = payload.get("repository") or {} - return repo.get("full_name", "") - - -# --------------------------------------------------------------------------- -# @mention 通用发送函数 -# --------------------------------------------------------------------------- - - -async def _send_mention_mails( - mentions: list[str], - auto_targets: list[str], - source_type: str, - mention_type: str, - source_url: str, - commenter: str, - content: str, - repo: str, - issue_number: int, - is_pr: bool, -) -> None: - """通用 @mention Mail 发送函数。 - - 自动抑制已在 auto_targets 中的 Agent,避免双重通知。 - 根据内容推断意图,生成不同的响应指引。 - """ - # 确定 API 路径 - if is_pr: - detail_api = f"pulls/{issue_number}" - comments_api = f"issues/{issue_number}/comments" - else: - detail_api = f"issues/{issue_number}" - comments_api = f"issues/{issue_number}/comments" - - for agent_id in mentions: - if should_suppress_mention(agent_id, auto_targets): - logger.info( - "Mention suppressed for %s (already notified by auto flow)", - agent_id) - continue - - # 从 api_path 提取编号用于标题,如 "issues/32" → "#32" - number_str = f"#{issue_number}" if issue_number else "" - intent = infer_intent(content) - intent_hint = {"help": "求助", "notify": "通知关注", - "collaborate": "协作请求", "assign": "分配子任务"}[intent] - - # 生成响应指引 - guidance = _build_response_guidance( - intent=intent, - gitea_api=GITEA_API_BASE, - repo=repo, - issue_number=issue_number, - commenter=commenter, - ) - - text = render_template("mention", { - "mention_type": mention_type, - "source_type": source_type, - "source_url": source_url, - "commenter": commenter, - "intent_hint": intent_hint, - "content_snippet": content[:500], - "gitea_api": GITEA_API_BASE, - "repo": repo, - "source_detail_api_path": detail_api, - "source_comments_api_path": comments_api, - "response_guidance": guidance, - }) - - title = f"@mention ({intent_hint}): {source_type} {number_str} ({repo})" - _send_toolchain_task( - to_agent=agent_id, - title=title, - description=text, - event_type="mention", - action_type="mention", - steps=[ - "按上方 mention 模板中的 response_guidance 执行", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", - ], - context_data={ - "source_type": source_type, - "source_url": source_url, - "commenter": commenter, - "content_snippet": content[:500], - "repo": repo, - "issue_number": issue_number, - }, - ) - - -# --------------------------------------------------------------------------- -# 事件处理函数 -# --------------------------------------------------------------------------- - - -async def _handle_pull_request(payload: Dict[str, Any]) -> None: - """处理 pull_request 事件:opened → 通知 reviewer;closed → merge 通知。""" - action = payload.get("action", "") - if action == "opened": - await _handle_pr_opened(payload) - elif action == "closed": - await _handle_pr_closed(payload) - elif action == "synchronize": - await _handle_pr_synchronize(payload) - - -async def _handle_pr_opened(payload: Dict[str, Any]) -> None: - """PR opened → 通知 simayi-challenger。""" - pr = payload.get("pull_request") - if not pr or not isinstance(pr, dict): - logger.warning( - "pull_request event missing pull_request field, skipping") - return - repo = _repo_fullname(payload) - pr_number = pr.get("number", 0) - pr_title = pr.get("title", "") - pr_author = pr.get("user", {}).get("login", "unknown") - branch = pr.get("head", {}).get("ref", "unknown") - - # 获取改动文件列表 - changed_files, fetch_error = await _fetch_pr_files(repo, pr_number) - risk_level = _calc_risk_level(changed_files) - if fetch_error: - file_list = f"⚠️ {fetch_error}" - else: - file_list = "\n".join( - f"- {f}" for f in changed_files) if changed_files else "(无文件变更)" - - text = render_template("review_request", { - "repo": repo, - "pr_number": str(pr_number), - "pr_title": pr_title, - "pr_author": pr_author, - "branch": branch, - "risk_level": risk_level, - "file_list": file_list, - }) - - title = f"Review 请求: {pr_title} ({repo}#{pr_number})" - _send_toolchain_task( - to_agent="simayi-challenger", - title=title, - description=text, - event_type="review_request", - action_type="review_request", - steps=[ - f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", - "按审查清单审查(参考 code-review Skill)", - f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)— APPROVE 或 REQUEST_CHANGES", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", - ], - context_data={ - "pr_number": pr_number, - "repo": repo, - "pr_title": pr_title, - "pr_author": pr_author, - "branch": branch, - "risk_level": risk_level, - }, - ) - - # S3: PR body @mention 通知 - pr_body = pr.get("body", "") or "" - sender = pr.get("user", {}).get("login", "") - mentions = extract_mentions(pr_body, sender) - if mentions: - # 自动流转已通知 simayi-challenger(review_request) - auto_targets = ["simayi-challenger"] - await _send_mention_mails( - mentions=mentions, - auto_targets=auto_targets, - source_type="PR", - mention_type="PR @mention", - source_url=pr.get("html_url", ""), - commenter=sender, - content=pr_body, - repo=repo, - issue_number=pr_number, - is_pr=True, - ) - - -async def _send_review_mentions( - review_body: str, - reviewer: str, - pr_author: str, - pr: dict, - repo: str, - pr_number: int, -) -> None: - """提取并发送 Review body 中的 @mention 通知(COMMENTED / 非 COMMENTED 通用)。""" - mentions = extract_mentions(review_body, reviewer) - if mentions: - auto_targets = [pr_author] - await _send_mention_mails( - mentions=mentions, - auto_targets=auto_targets, - source_type="Review", - mention_type="Review @mention", - source_url=pr.get("html_url", ""), - commenter=reviewer, - content=review_body, - repo=repo, - issue_number=pr_number, - is_pr=True, - ) - - -async def _handle_pull_request_review(payload: Dict[str, Any]) -> None: - """处理 pull_request_review 事件:非 COMMENTED → 通知 PR 作者。 - - 支持两种 payload 格式: - - repo webhook: review.state = "APPROVED" / "REQUEST_CHANGES" - - org webhook (Gitea v1.23.4): review.type = "pull_request_review_approved" / "pull_request_review_rejected" - """ - review = payload.get("review") - if not review or not isinstance(review, dict): - logger.warning( - "pull_request_review event missing review field, skipping") - return - pr = payload.get("pull_request") - if not pr or not isinstance(pr, dict): - logger.warning( - "pull_request_review event missing pull_request field, skipping") - return - - # 兼容两种 payload 格式提取 state - state = review.get("state", "") - if not state: - # org webhook 格式:review.type = "pull_request_review_approved" - review_type = review.get("type", "") - type_map = { - "pull_request_review_approved": "APPROVED", - "pull_request_review_rejected": "REQUEST_CHANGES", - "pull_request_review_comment": "COMMENTED", - } - state = type_map.get(review_type, "") - - repo = _repo_fullname(payload) - pr_number = pr.get("number", 0) - pr_title = pr.get("title", "") - pr_author = pr.get("user", {}).get("login", "unknown") - # 兼容:org webhook 的 review 没有 user,从 sender 取 - reviewer = review.get( - "user", - {}).get( - "login", - "") or payload.get( - "sender", - {}).get( - "login", - "unknown") - review_body = review.get("body", "") or review.get("content", "(无评论)") - - if state == "COMMENTED": - # Review 评论 → 通知 PR 作者 - review_body = review.get("body", "") or review.get("content", "(无评论)") - reviewer = review.get("user", {}).get("login", "") or payload.get("sender", {}).get("login", "unknown") - - text = render_template("review_comment", { - "repo": repo, - "pr_number": str(pr_number), - "pr_title": pr_title, - "reviewer": reviewer, - "comment_body": review_body, - }) - - title = f"Review 评论: {pr_title} ({repo}#{pr_number})" - _send_toolchain_task( - to_agent=pr_author, - title=title, - description=text, - event_type="review_comment", - action_type="review_comment", - steps=[ - f"查看评论(Gitea API: GET /repos/{repo}/issues/{pr_number}/comments)", - "根据评论内容响应(修改代码或在 PR 上回复 comment)", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", - ], - context_data={ - "pr_number": pr_number, - "repo": repo, - "pr_title": pr_title, - "reviewer": reviewer, - "comment_body": review_body, - }, - ) - - # S5: Review body @mention 通知(COMMENTED 路径) - await _send_review_mentions(review_body, reviewer, pr_author, pr, repo, pr_number) - - return - - result_map = {"APPROVED": "通过 ✓", "REQUEST_CHANGES": "驳回 ✗"} - if state not in result_map: - return - result = result_map[state] - - text = render_template("review_result", { - "repo": repo, - "pr_number": str(pr_number), - "pr_title": pr_title, - "reviewer": reviewer, - "result": result, - "review_body": review_body, - }) - - title = f"Review {result}: {pr_title} ({repo}#{pr_number})" - if state == "APPROVED": - tc_steps = [ - f"合并 PR(Gitea API: POST /repos/{repo}/pulls/{pr_number}/merge)", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", - ] - else: # REQUEST_CHANGES - tc_steps = [ - "按审查意见逐条修改代码", - "push 到原分支 → CI 自动跑", - "CI 通过后等重新 Review", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", - ] - _send_toolchain_task( - to_agent=pr_author, - title=title, - description=text, - event_type="review_result", - action_type="review_result", - steps=tc_steps, - context_data={ - "pr_number": pr_number, - "repo": repo, - "pr_title": pr_title, - "result": result, - "reviewer": reviewer, - "review_body": review_body, - }, - ) - - # S5: Review body @mention 通知(非 COMMENTED 路径) - await _send_review_mentions(review_body, reviewer, pr_author, pr, repo, pr_number) - - -async def _fetch_latest_reviewer(repo: str, pr_number: int) -> str: - """查询 PR 最近一次非 PENDING review 的提交者。 - - Returns: - reviewer login 或空字符串 - """ - if not _GITEA_TOKEN: - return "" - - url = f"{_GITEA_BASE}/repos/{repo}/pulls/{pr_number}/reviews" - headers = {"Authorization": f"token {_GITEA_TOKEN}"} - - try: - async with httpx.AsyncClient(timeout=5.0) as client: - resp = await client.get(url, headers=headers) - resp.raise_for_status() - reviews = resp.json() - - # 取最后一个非 PENDING 的 review 的 user - for review in reversed(reviews): - state = review.get("state", "") - if state in ("APPROVED", "REQUEST_CHANGES", "COMMENTED"): - user = review.get("user", {}) - return user.get("login", "") - except Exception as e: - logger.warning("Failed to fetch reviews for %s#%d: %s", repo, pr_number, e) - - return "" - - -async def _handle_pr_synchronize(payload: Dict[str, Any]) -> None: - """PR 更新(新 push)→ 通知 reviewer 重新 review。 - - 查询最近一次 review 的提交者作为通知目标。 - 只在有 review 历史时才通知(避免和 opened 重复)。 - """ - pr = payload.get("pull_request") - if not pr or not isinstance(pr, dict): - return - - repo = _repo_fullname(payload) - pr_number = pr.get("number", 0) - pr_title = pr.get("title", "") - pr_author = pr.get("user", {}).get("login", "unknown") - new_sha = pr.get("head", {}).get("sha", "unknown")[:12] - - # 查询最近 review 的提交者 - reviewer = await _fetch_latest_reviewer(repo, pr_number) - if not reviewer: - # 没有已有 review 历史,fallback 到默认 reviewer - reviewer = "simayi-challenger" - logger.info("No review history for PR #%s, using default reviewer %s", pr_number, reviewer) - - text = render_template("review_updated", { - "repo": repo, - "pr_number": str(pr_number), - "pr_title": pr_title, - "pr_author": pr_author, - "new_sha": new_sha, - "reviewer": reviewer, - }) - - title = f"PR 更新: {pr_title} ({repo}#{pr_number})" - _send_toolchain_task( - to_agent=reviewer, - title=title, - description=text, - event_type="review_updated", - action_type="review_updated", - steps=[ - f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", - "重点检查上次 Review 意见的修改部分", - f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", - ], - context_data={ - "pr_number": pr_number, - "repo": repo, - "pr_title": pr_title, - "pr_author": pr_author, - "new_sha": new_sha, - "reviewer": reviewer, - }, - ) - - -def _send_deploy_failure_task(repo: str, pr_number: int, pr_title: str, reason: str) -> None: - """CD 部署失败通知,走 ToolchainHandler。""" - text = render_template("deploy_failure", { - "repo": repo, - "commit_sha": f"PR #{pr_number}", - }) - title = f"部署失败: {repo} (auto-deploy, PR #{pr_number})" - full_text = f"{text}\n\n失败原因: {reason}" - for agent_id in ("jiangwei-infra", "pangtong-fujunshi"): - _send_toolchain_task( - to_agent=agent_id, - title=title, - description=full_text, - event_type="deploy_failure", - action_type="deploy_failure", - steps=[ - "检查 deploy 日志", - "排查失败原因", - "修复并重新部署", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", - ], - context_data={ - "repo": repo, - "pr_number": pr_number, - "pr_title": pr_title, - "reason": reason, - }, - ) - - -async def _handle_pr_closed(payload: Dict[str, Any]) -> None: - """PR closed → 如果 merged,通知 PR 作者。""" - pr = payload.get("pull_request") - if not pr or not isinstance(pr, dict): - return - - # 只处理 merged 的 PR - if not pr.get("merged", False): - return - - repo = _repo_fullname(payload) - pr_number = pr.get("number", 0) - pr_title = pr.get("title", "") - pr_author = pr.get("user", {}).get("login", "unknown") - # merged_by 可能不在 payload 中,fallback 到 sender - merged_by = ( - pr.get("merged_by", {}).get("login", "") - or payload.get("sender", {}).get("login", "unknown") - ) - - text = render_template("review_merged", { - "repo": repo, - "pr_number": str(pr_number), - "pr_title": pr_title, - "pr_author": pr_author, - "merged_by": merged_by, - }) - - title = f"PR 已合并: {pr_title} ({repo}#{pr_number})" - _send_toolchain_task( - to_agent=pr_author, - title=title, - description=text, - event_type="review_merged", - action_type="review_merged", - steps=[], # 纯通知,无步骤 - context_data={ - "pr_number": pr_number, - "repo": repo, - "pr_title": pr_title, - "pr_author": pr_author, - "merged_by": merged_by, - }, - ) - - # 自动部署:git pull + rsync + 按需 post_deploy - try: - import yaml - - # 加载部署配置 - config_path = Path(__file__).parent.parent.parent / "config" / "deploy-targets.yaml" - if not config_path.exists(): - return - - with open(config_path, "r", encoding="utf-8") as f: - deploy_config = yaml.safe_load(f) or {} - - targets = deploy_config.get("targets", {}) - target = targets.get(repo) - if not target: - return # 该仓库不在部署配置中,跳过 - - dev_dir = os.path.expanduser(target["dev_dir"]) - install_dir = os.path.expanduser(target.get("install_dir", target["dev_dir"])) - rsync_excludes = target.get("rsync_exclude", []) - - # Step 1: git pull in dev dir - proc = await asyncio.create_subprocess_exec( - "git", "pull", "origin", "main", - cwd=dev_dir, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30) - - if proc.returncode != 0: - logger.warning("Auto-deploy: git pull failed for %s: %s", repo, stderr.decode()) - return - - logger.info("Auto-deploy: git pull success for %s", repo) - - # Step 2: rsync to install dir - rsync_args = ["rsync", "-a"] - for exc in rsync_excludes: - rsync_args.extend(["--exclude", exc]) - rsync_args.extend([f"{dev_dir}/", f"{install_dir}/"]) - - rsync_proc = await asyncio.create_subprocess_exec( - *rsync_args, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - _, rsync_err = await asyncio.wait_for(rsync_proc.communicate(), timeout=60) - - if rsync_proc.returncode != 0: - logger.error("Auto-deploy: rsync failed: %s", rsync_err.decode()) - _send_deploy_failure_task(repo, pr_number, pr_title, f"rsync 失败: {rsync_err.decode()}") - return - - # Step 3: 判断是否需要执行 post_deploy - files = await _fetch_pr_files(repo, pr_number) - file_list = files[0] - needs_restart = any( - f.startswith("src/") or f.startswith("templates/") or f.startswith("frontend/") or f.endswith(".py") - for f in file_list - ) - - if needs_restart: - post_deploy_cmds = target.get("post_deploy", []) - pm2_name = target.get("pm2_name", "") - for cmd in post_deploy_cmds: - logger.info("Auto-deploy: executing post_deploy: %s", cmd) - - # M2: 检测当前进程是否会被此命令杀掉(而非脆弱的字符串匹配) - # 通过 PM2 环境变量判断:pm2 启动的进程有 PM2_HOME - self_restart = False - if pm2_name and os.environ.get("PM2_HOME") and "pm2 restart" in cmd: - # 检查命令是否包含当前进程名 - if re.search(rf'pm2\s+restart\s+{re.escape(pm2_name)}', cmd): - self_restart = True - - if self_restart: - # M1: 用 asyncio.sleep 延迟而非 nohup,保留子进程输出和错误检测 - # 先 sleep 让 handler 正常返回,再启动 restart 命令 - # restart 的子进程会在父进程死后被 pm2 新进程接管 - logger.info("Auto-deploy: self-restart detected, deferring 2s: %s", cmd) - await asyncio.sleep(2) - deploy_proc = await asyncio.create_subprocess_exec( - "sh", "-c", cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - # restart 会杀掉当前进程,communicate 可能不会完成 - # 但我们至少尝试读取输出 - try: - _, deploy_err = await asyncio.wait_for( - deploy_proc.communicate(), timeout=10) - except (asyncio.TimeoutError, ProcessLookupError): - # 预期行为:进程被 pm2 restart 杀掉 - logger.info("Auto-deploy: process killed by self-restart (expected)") - break - else: - deploy_proc = await asyncio.create_subprocess_exec( - "sh", "-c", cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - _, deploy_err = await asyncio.wait_for(deploy_proc.communicate(), timeout=30) - - if deploy_proc.returncode != 0: - logger.error("Auto-deploy: post_deploy failed: %s", deploy_err.decode()) - _send_deploy_failure_task(repo, pr_number, pr_title, f"post_deploy 失败 ({cmd}): {deploy_err.decode()}") - break - else: - logger.info("Auto-deploy: all post_deploy commands succeeded (files: %s)", ", ".join(file_list[:5])) - else: - logger.info("Auto-deploy: docs-only change for %s, skip post_deploy", repo) - - except asyncio.TimeoutError: - logger.error("Auto-deploy: timeout for %s", repo) - _send_deploy_failure_task(repo, pr_number, pr_title, "部署超时") - except Exception as e: - logger.error("Auto-deploy: unexpected error: %s", e) - - -async def _handle_issues(payload: Dict[str, Any]) -> None: - """处理 issues 事件:assigned → 通知被指派人;opened+部署失败 → 通知运维。""" - action = payload.get("action", "") - issue = payload.get("issue") - if not issue or not isinstance(issue, dict): - logger.warning("issues event missing issue field, skipping") - return - repo = _repo_fullname(payload) - issue_number = issue.get("number", 0) - issue_title = issue.get("title", "") - - if action == "assigned": - assignee = "" - assignees = issue.get("assignees") or [] - if not assignees: - single = issue.get("assignee") - if single and isinstance(single, dict): - assignees = [single] - if assignees: - assignee = assignees[-1].get("login", "") - else: - assignee = "" - if not assignee: - logger.debug("Issue assigned but no assignee found, skipping") - return - - labels_list = [lbl.get("name", "") - for lbl in (issue.get("labels") or [])] - labels = ", ".join(labels_list) if labels_list else "(无标签)" - issue_body = issue.get("body", "(无描述)") - brief = issue_title[:20].replace(" ", "-").lower() - - text = render_template("issue_assigned", { - "repo": repo, - "issue_number": str(issue_number), - "issue_title": issue_title, - "labels": labels, - "issue_body": issue_body or "(无描述)", - "brief": brief, - }) - - title = f"Issue 指派: {issue_title} ({repo}#{issue_number})" - _send_toolchain_task( - to_agent=assignee, - title=title, - description=text, - event_type="issue_assigned", - action_type="issue_assigned", - steps=[ - f"创建分支 fix/{issue_number}-{brief}", - "编码 + 写 UT", - "push → 等 CI", - f"CI 通过后创建 PR(Gitea API: POST /repos/{repo}/pulls)", - "等 Review", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", - ], - context_data={ - "issue_number": issue_number, - "repo": repo, - "issue_title": issue_title, - "labels": labels, - "issue_body": issue_body or "(无描述)", - "brief": brief, - }, - ) - - elif action == "opened": - if "部署失败" in issue_title: - # 从 Issue body 提取 commit hash(Gitea deploy workflow 格式) - sha_match = re.search(r'[0-9a-f]{40}', issue.get("body", "")) - commit_sha = sha_match.group(0) if sha_match else "(未知)" - - text = render_template("deploy_failure", { - "repo": repo, - "commit_sha": commit_sha or "(未知)", - }) - - title = f"部署失败: {repo}" - for agent_id in ("jiangwei-infra", "pangtong-fujunshi"): - _send_toolchain_task( - to_agent=agent_id, - title=title, - description=text, - event_type="deploy_failure", - action_type="deploy_failure", - steps=[ - "检查 deploy 日志", - "排查失败原因", - "修复并重新部署", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", - ], - context_data={ - "repo": repo, - "commit_sha": commit_sha or "(未知)", - }, - ) - - # Issue body @mention(opened 时检查) - issue_body = issue.get("body", "") or "" - sender = payload.get("sender", {}).get("login", "") - mentions = extract_mentions(issue_body, sender) - if mentions: - # 自动流转已通知 assignee - assignees = issue.get("assignees") or [] - if not assignees: - single = issue.get("assignee") - if single and isinstance(single, dict): - assignees = [single] - auto_targets = [a.get("login", "") for a in assignees if isinstance(a, dict)] - await _send_mention_mails( - mentions=mentions, - auto_targets=auto_targets, - source_type="Issue", - mention_type="Issue @mention", - source_url=issue.get("html_url", ""), - commenter=sender, - content=issue_body, - repo=repo, - issue_number=issue_number, - is_pr=False, - ) - - -async def _handle_issue_comment(payload: Dict[str, Any]) -> None: - """处理 issue_comment 事件:CI 失败关键词 → 通知 PR 作者;@mention → 通知被提及者。""" - comment = payload.get("comment") - if not comment or not isinstance(comment, dict): - logger.warning("issue_comment event missing comment field, skipping") - return - body = comment.get("body", "") - sender = comment.get("user", {}).get("login", "") - - issue = payload.get("issue") - if not issue or not isinstance(issue, dict): - logger.warning("issue_comment event missing issue field, skipping") - return - - action = payload.get("action", "") - if action != "created": - return - - # === 路径 1:CI 失败通知(原有逻辑,改为正向 if) === - if ("[CI]" in body or "CI 失败" in body) and issue.get("state") != "closed": - repo = _repo_fullname(payload) - issue_number = issue.get("number", 0) - - # 尝试从关联 PR 获取信息 - pr_author = issue.get("user", {}).get("login", "unknown") - branch_match = re.search(r"分支:\s*(\S+)", body) - branch = branch_match.group(1) if branch_match else "(未知)" - - # 提取错误摘要(取 comment body 前 500 字符) - error_summary = body[:500] if body else "(无错误信息)" - - text = render_template("ci_failure", { - "repo": repo, - "pr_number": str(issue_number), - "branch": branch, - "error_summary": error_summary, - }) - - title = f"CI 失败: {repo}#{issue_number}" - _send_toolchain_task( - to_agent=pr_author, - title=title, - description=text, - event_type="ci_failure", - action_type="ci_failure", - steps=[ - "查看完整 CI 日志(PR 页面或 Gitea Actions 页面)", - "修复失败的测试", - "push → CI 自动重跑", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", - ], - context_data={ - "pr_number": issue_number, - "repo": repo, - "branch": branch, - "error_summary": error_summary, - }, - ) - # CI 处理完不 return,继续检查 @mention - - # === 路径 2:@mention 通知(新增,独立路径) === - # 注意:@mention 检测与 CI 检测是独立的,同一条评论可同时触发两者 - mentions = extract_mentions(body, sender) - if mentions: - # 判断是 PR 还是 Issue(Gitea 中 PR 本质是特殊的 Issue) - is_pr = issue.get("pull_request") is not None - source_type = "PR" if is_pr else "Issue" - mention_type = "PR @mention" if is_pr else "Issue @mention" - - issue_number = issue.get("number", 0) - repo = _repo_fullname(payload) - - # 自动流转已通知的人(CI 失败通知的 PR 作者) - auto_targets: list[str] = [] - if ("[CI]" in body or "CI 失败" in body) and issue.get("state") != "closed": - auto_targets.append(issue.get("user", {}).get("login", "")) - - await _send_mention_mails( - mentions=mentions, - auto_targets=auto_targets, - source_type=source_type, - mention_type=mention_type, - source_url=issue.get("html_url", ""), - commenter=sender, - content=body, - repo=repo, - issue_number=issue_number, - is_pr=is_pr, - ) - - -# --------------------------------------------------------------------------- -# 事件分发 -# --------------------------------------------------------------------------- - -_EVENT_HANDLERS: Dict[str, Any] = { - "pull_request": _handle_pull_request, - "pull_request_sync": _handle_pr_synchronize, # Gitea: PR branch push 是独立事件类型 - "pull_request_review": _handle_pull_request_review, - "pull_request_review_approved": _handle_pull_request_review, - "pull_request_review_rejected": _handle_pull_request_review, - "pull_request_review_comment": _handle_pull_request_review, - "pull_request_comment": _handle_pull_request_review, # Gitea: review comment 独立事件类型 - # Gitea v1.23.4 实际发出的 review 子事件(无 _review_ 中间段) - "pull_request_approved": _handle_pull_request_review, - "pull_request_rejected": _handle_pull_request_review, - "issues": _handle_issues, - "issue_comment": _handle_issue_comment, -} - - -# --------------------------------------------------------------------------- -# Webhook 端点 -# --------------------------------------------------------------------------- - - -@router.post("/webhook/gitea") -async def gitea_webhook( - request: Request, - x_gitea_event: Optional[str] = Header(None, alias="X-Gitea-Event"), - x_gitea_delivery: Optional[str] = Header(None, alias="X-Gitea-Delivery"), - x_gitea_signature: Optional[str] = Header(None, alias="X-Gitea-Signature"), -) -> Response: - """Gitea Webhook 接收端点。 - - 处理流程:签名验证 → 幂等检查 → 事件分发 → Mail 推送。 - - 返回策略: - - payload 解析失败 / 未知事件 / 幂等重复 → 200(不触发重试) - - Mail 创建失败 → 500(触发 Gitea 重试) - """ - body = await request.body() - - # 1. 签名验证 - if not _verify_signature(body, x_gitea_signature): - logger.warning("Webhook signature verification failed") - return Response(status_code=403, - content="signature verification failed") - - # 3. 解析 payload(提前解析,用于幂等检查) - try: - payload = await request.json() - except Exception: - logger.warning("Failed to parse webhook payload") - return Response(status_code=200, content="invalid payload") - - # 2. 幂等检查(需要在 payload 解析后,以支持内容去重) - if x_gitea_event and x_gitea_delivery: - async with _idempotency_lock: - if _is_duplicate(x_gitea_event, x_gitea_delivery, payload): - logger.debug( - "Duplicate webhook: %s/%s", - x_gitea_event, - x_gitea_delivery) - return Response(status_code=200, content="duplicate") - - # 4. 查找 handler - action = payload.get("action", "") - logger.info("[WEBHOOK] event=%s action=%s delivery=%s", x_gitea_event, action, x_gitea_delivery) - handler = _EVENT_HANDLERS.get(x_gitea_event or "") - if not handler: - logger.info("[WEBHOOK] Unhandled event type: %s", x_gitea_event) - return Response(status_code=200, - content=f"unhandled event: {x_gitea_event}") - - # 5. 执行 handler - try: - await handler(payload) - except Exception: - logger.exception("Mail creation failed for %s event", x_gitea_event) - return Response(status_code=500, content="mail creation failed") - - return Response(status_code=200, content="ok") diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py deleted file mode 100644 index bf7908d..0000000 --- a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py +++ /dev/null @@ -1,129 +0,0 @@ -""" -prompt_composer.py — PromptSection Protocol + PromptContext + PromptComposer - -拼装器:有序管理 prompt 段落,按优先级排序后合并为最终 prompt。 -""" - -import logging -from dataclasses import dataclass, field -from typing import Dict, List, Optional, Protocol, runtime_checkable - -logger = logging.getLogger("moziplus-v2.prompt_composer") - -# --------------------------------------------------------------------------- -# Section 优先级范围约定 -# --------------------------------------------------------------------------- -PRIORITY_CONTEXT = 10 # 任务上下文 -PRIORITY_PRIOR = 20 # 前序信息 -PRIORITY_ROLE = 30 # 角色规范 -PRIORITY_API = 40 # API 操作指令 -PRIORITY_CONSTRAINTS = 50 # 硬约束 -PRIORITY_EXTENSION = 60 # 扩展段 - - -# --------------------------------------------------------------------------- -# PromptSection Protocol -# --------------------------------------------------------------------------- -@runtime_checkable -class PromptSection(Protocol): - """一个 prompt 段""" - - name: str # 段名(去重用,同名覆盖) - priority: int # 排序优先级(小数字=靠前) - - def render(self, context: "PromptContext") -> str: - """渲染此段的文本内容。返回空字符串表示不注入。""" - ... - - def should_include(self, context: "PromptContext") -> bool: - """是否注入此段(默认 True,条件段可覆盖)。""" - ... - - -# --------------------------------------------------------------------------- -# PromptContext 数据对象 -# --------------------------------------------------------------------------- -@dataclass -class PromptContext: - """Prompt 渲染的统一上下文""" - - task_id: str - title: str - description: str - must_haves: str - project_id: str - agent_id: str - - task: Optional[Dict] = None - role: str = "executor" - spawn_type: str = "executor" - - # mail 专用 - from_agent: str = "" - mail_type: str = "" # inform / request - - # toolchain 专用 - event_type: str = "" # ci_failure / review_request / ... - event_data: Dict = field(default_factory=dict) - action_type: str = "" # 动作分类(review_result / ci_failure / ...) - action_steps: list = field(default_factory=list) # 结构化编号步骤列表 - - # 前序产出 - depends_on_outputs: Optional[List] = None - - -# --------------------------------------------------------------------------- -# PromptComposer 拼装器 -# --------------------------------------------------------------------------- -class PromptComposer: - """有序拼装 prompt sections""" - - SEPARATOR = "\n\n---\n\n" - TOKEN_BUDGET_WARN = 800 # token 预算警告阈值 - CHARS_PER_TOKEN = 3.5 # 估算比率 - - def __init__(self) -> None: - self._sections: List[PromptSection] = [] - - def add(self, section: PromptSection) -> None: - """添加一个 section(同名覆盖)""" - self._sections = [s for s in self._sections if s.name != section.name] - self._sections.append(section) - - def add_many(self, sections: List[PromptSection]) -> None: - """批量添加""" - for s in sections: - self.add(s) - - def compose(self, context: PromptContext) -> str: - """拼装最终 prompt - - 1. 过滤 should_include=False 的段 - 2. 按 priority 排序 - 3. 逐段 render - 4. 过滤空段 - 5. 用分隔符连接 - 6. Token 预算警告(不截断) - """ - active = [s for s in self._sections if s.should_include(context)] - active.sort(key=lambda s: s.priority) - - parts = [s.render(context) for s in active] - parts = [p for p in parts if p.strip()] - - result = self.SEPARATOR.join(parts) - - # Token 估算 - tokens = max(1, int(len(result) / self.CHARS_PER_TOKEN)) - logger.debug( - "Composed prompt from %d sections, %d tokens", - len(parts), tokens, - ) - - if tokens > self.TOKEN_BUDGET_WARN: - logger.warning( - "Prompt exceeds %d token budget: %d tokens (task_id=%s)", - self.TOKEN_BUDGET_WARN, tokens, context.task_id, - ) - - return result diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py deleted file mode 100644 index 28451bb..0000000 --- a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py +++ /dev/null @@ -1,2088 +0,0 @@ -"""Agent Spawner - 异步 spawn Full Agent / Subagent - -Full Agent: asyncio.create_subprocess_exec(异步非阻塞,不 await 完成) -Subagent: 占位(实际通过 OpenClaw Gateway API sessions_spawn,F17 完善) -""" - -from __future__ import annotations - -import asyncio -import json -import logging -import os -import uuid -from datetime import datetime -from pathlib import Path -from typing import Any, Dict, List, Optional - -from src.blackboard.db import get_connection -from src.daemon.task_type_registry import TaskTypeRegistry - -logger = logging.getLogger("moziplus-v2.spawner") - - -# ── Prompt 模板 ── - -# Mail 专用模板:inform 类型(纯通知,状态由系统管理) -MAIL_INFORM_TEMPLATE = """你收到一封飞鸽传书(纯通知)。 - -发件者: {from_agent} -主题: {title} -内容: {text} - -已阅即可。如需回复,用 in_reply_to 回复发件者(不需要填 to)。 -⚠️ 不要执行任何状态转换命令。 -""" - -# Mail 专用模板:request 类型(需要处理并回复,状态由系统管理) -MAIL_REQUEST_TEMPLATE = """你收到一封飞鸽传书,需要你处理并回复。 - -发件者: {from_agent} -主题: {title} -内容: {text} - -### 如何回复发件者 - -curl -s -X POST http://localhost:8083/api/mail \\ - -H 'Content-Type: application/json' \\ - -d '{{"from": "{agent_id}", "in_reply_to": "{task_id}", "title": "回复: {title}", "text": "你的回复内容"}}' - -⚠️ 不需要填 "to",系统自动回复给发件者。 - -### 如何给其他人发新邮件 - -curl -s -X POST http://localhost:8083/api/mail \\ - -H 'Content-Type: application/json' \\ - -d '{{"from": "{agent_id}", "to": "对方agent-id", "title": "标题", "text": "正文", "type": "inform"}}' - -⚠️ to 必须是有效的 agent id: {valid_agents} -⚠️ 纯通知用 type=inform,需要对方回复不填 type(默认 request) -⚠️ 不能给自己发邮件 -⚠️ 不要执行任何状态转换命令(标 working/done/review/failed 等),系统会自动处理。 -""" - -SPAWN_PROMPT_TEMPLATE = """{identity_section} - -## 任务 -{title} -{description} - -项目: {project_id} | ID: {task_id} -类型: {task_type} | 优先级: {priority} -验收标准: {must_haves} - -{retry_context} - -## 你能做什么 -- 读任务详情(含依赖、讨论、产出): GET {api_base}/projects/{project_id}/tasks/{task_id}?expand=all -- 读所有活跃任务: GET {api_base}/projects/{project_id}/tasks?status=working,claimed,review -- 写产出: POST {api_base}/projects/{project_id}/tasks/{task_id}/outputs -- 写评论/交接: POST {api_base}/projects/{project_id}/tasks/{task_id}/comments -- 更新状态: POST {api_base}/projects/{project_id}/tasks/{task_id}/status -- 创建子任务: POST {api_base}/projects/{project_id}/tasks -- 认领任务: POST {api_base}/projects/{project_id}/tasks/{{{{id}}}}/claim - -## 约束 -- 完成后必须写产出物(output)并标 review,不能无产出就提交 -- 失败了标 failed 并写明原因 -- 产出物 handoff comment ≥ 50 字符(用于系统验证) -- 禁止使用 sessions_send 直接发消息(用 Mail API 或黑板 comment) -- 委托他人做事用黑板 comment @agent-id,系统自动路由(如 @zhaoyun-data 你来获取数据,无需手动传 mentions 数组) -- 安全红线: {guardrails_summary} - -### API 请求体示例 -写产出: POST .../outputs -```json -{{{{"agent": "{agent_id}", "content_type": "code", "title": "产出标题", "content_path": "/path/to/file", "summary": "简要说明"}}}} -``` - -写评论: POST .../comments -```json -{{{{"author": "{agent_id}", "body": "评论内容(≥50字符)", "comment_type": "handoff"}}}} -``` -""" - - -DISCUSSION_PROMPT_TEMPLATE = """你被 spawn 来参与黑板讨论。这是一个 v2.9 四相循环的讨论环节。 - -## 你的任务 - -{goal_snapshot} - -## 约束 - -{constraints} - -## 黑板 API - -你可以随时: -- 读黑板:GET http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}?expand=all(含 comments、outputs) -- 写 comment:POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/comments - body: {{"author": "{agent_id}", "body": "内容(@agent-id 自动路由)"}} -- 创建 sub task:POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks - body: {{"title": "...", "description": "...", "task_type": "...", "parent_task": "{task_id}", "must_haves": "{{\"capability\": \"...\"}}"}} -- 认领任务:POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{{sub_task_id}}/claim - -## 行为准则 - -1. **你是自主的。**读黑板、思考、行动,不要等指令。 -2. **不重复别人的工作。**动手前先读黑板看谁在做什么(Separation)。 -3. **保持方向对齐。**你的产出方向和 parent goal 对齐,不确定时 @pangtong-fujunshi(Alignment)。 -4. **产出可共享。**产出写入黑板,让其他人能看到你的成果(Cohesion)。 -5. **不越界。**安全红线不要碰,超出能力的 @ 庞统升级(Boundary)。 -6. **随时讨论。**执行过程中需要协作时 @ 对应 Agent,讨论是灵活的不是固定阶段的。 - -## 讨论完成后 - -- 如果讨论收敛到可执行的任务,直接创建 sub task -- 如果有分歧或不确定,在黑板上写 comment @ 庞统裁决 -- 标记完成: -```bash -curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/status \ - -H 'Content-Type: application/json' \ - -d '{{"status": "done", "agent": "{agent_id}"}}' -``` -""" - - -# Mail 续杯专用模板:不包含状态转换指令(系统自动标 done) -MAIL_RETRY_PROMPT = """你收到一个续杯提醒。你的任务在执行过程中被中断了。 - -发件者: {from_agent} -主题: {title} -续杯次数: 第 {retry_count} 次(上限 {max_retries} 次) - -请检查 session 历史中你之前做了什么,然后继续未完成的工作。 - -⚠️ 不要执行任何状态转换命令(标 working/done/review/failed 等),系统会自动处理。 -⚠️ 如果任务已完成,直接写产出即可,不要调 status API。 -""" - - -class AgentBusyError(Exception): - """Agent 无法 spawn(被占用/冷却/session 锁等) - - #07: reason 字段区分具体原因,便于 dispatcher 层区分处理。 - """ - - def __init__(self, agent_id: str, reason: str = "busy", - detail: Optional[dict] = None): - self.agent_id = agent_id - # counter_blocked / session_locked / session_running / session_compacting / session_stuck - self.reason = reason - self.detail = detail or {} - super().__init__(f"{agent_id}: {reason}") - - -class AgentSpawner: - """Agent spawn 管理""" - - def __init__( - self, - db_path: Optional[Path] = None, - agent_timeout: float = 630.0, - dry_run: bool = False, - api_host: str = "127.0.0.1", - api_port: int = 8083, - bootstrap_builder: Optional[Any] = None, - gateway_timeout: float = 600.0, - max_retries: int = 3, - max_monitor_timeouts: int = 3, - counter: Optional[Any] = None, - ): - """ - Args: - db_path: 项目黑板 DB 路径(用于写 task_attempts) - agent_timeout: Agent 超时秒数 - dry_run: 测试模式,不实际 spawn - api_host: API 地址(供 Agent 回写) - api_port: API 端口(供 Agent 回写) - """ - self.db_path = db_path - self.agent_timeout = agent_timeout - self.dry_run = dry_run - self.api_host = api_host - self.api_port = api_port - self.bootstrap_builder = bootstrap_builder - self.gateway_timeout = gateway_timeout - self.max_retries = max_retries - self.max_monitor_timeouts = max_monitor_timeouts - # v2.7.2: counter 引用(spawn_full_agent 内部 acquire/release) - self.counter = counter - # guardrails: 由 main.py 在初始化后赋值 - self.guardrails = None - - # session 注册表 {session_id: {...}} - self._sessions: Dict[str, Dict[str, Any]] = {} - # B2 compact 等待计数器 {task_id: count} - self._compact_waits: Dict[str, int] = {} - # B1 假死计数器 {task_id: count} - self._stuck_counts: Dict[str, int] = {} - self._valid_agents_cache: Optional[set] = None - - def _load_valid_agents(self) -> set: - """从 config/default.yaml 读取有效 Agent ID 列表(带缓存)""" - if self._valid_agents_cache is not None: - return self._valid_agents_cache - config_path = Path(__file__).parent.parent / "config" / "default.yaml" - if config_path.exists(): - try: - import yaml - with open(config_path) as f: - cfg = yaml.safe_load(f) - profiles = cfg.get("daemon", {}).get("agent_profiles", {}) - if profiles: - self._valid_agents_cache = set(profiles.keys()) - return self._valid_agents_cache - except Exception: - pass - self._valid_agents_cache = { - "zhangfei-dev", "guanyu-dev", "zhaoyun-data", - "jiangwei-infra", "pangtong-fujunshi", "simayi-challenger" - } - return self._valid_agents_cache - - @property - def active_sessions(self) -> Dict[str, Dict[str, Any]]: - """当前活跃的 spawn sessions""" - return {sid: s for sid, s in self._sessions.items() - if s.get("status") == "running"} - - def build_spawn_message( - self, - task_id: str, - title: str, - description: str, - task_type: str = "", - priority: int = 5, - must_haves: str = "", - project_id: str = "", - agent_id: str = "", - current_status: str = "claimed", - retry_context: str = "", - task: Optional[Any] = None, - project_config: Optional[Dict[str, Any]] = None, - spawn_type: str = "executor", # executor | discussion | review - ) -> str: - """构建 Agent spawn 的消息(优先用 BootstrapBuilder,fallback 用模板) - - Args: - current_status: 任务当前状态(动态生成状态机提示) - retry_context: 重试上下文(前轮产出摘要 + 审查意见) - task: Task 对象(BootstrapBuilder 用) - project_config: 项目配置(BootstrapBuilder 用) - spawn_type: spawn 类型(executor=执行, discussion=讨论, review=审查) - """ - # discussion 类型直接用模板(不走 BootstrapBuilder) - if spawn_type == "discussion": - return self._build_discussion_prompt( - task_id, title, description, must_haves, - project_id, agent_id) - - # handler 路径:Task/Mail/Toolchain 用各自的 PromptSection 构建 - handler = TaskTypeRegistry.get_by_project(project_id) - if handler: - from src.daemon.prompt_composer import PromptContext - # 从 must_haves 解析 mail 元数据(from / performative) - from_agent = "" - mail_type = "" - action_type = "" - action_steps = [] - try: - meta = json.loads(must_haves) if must_haves else {} - from_agent = meta.get("from", "") - mail_type = meta.get("performative", meta.get("type", "")) - # toolchain 字段提取 - action_type = meta.get("action_type", "") - action_steps = meta.get("steps", []) - except Exception: - pass - ctx = PromptContext( - task_id=task_id, title=title, description=description or "", - must_haves=must_haves or "", project_id=project_id, - agent_id=agent_id, role=spawn_type, - spawn_type=spawn_type, - from_agent=from_agent, mail_type=mail_type, - action_type=action_type, action_steps=action_steps, - ) - return handler.build_prompt(ctx) - - # 旧路径保留:_general 等非 handler 项目 - - # 走 BootstrapBuilder 新路径 - if self.bootstrap_builder and task is not None: - role_map = { - "executor": "executor", - "review": "reviewer", - "discussion": "planner"} - role = role_map.get(spawn_type, "executor") - bootstrap_prompt = self.bootstrap_builder.build_for_task( - task=task, - role=role, - ) - api_section = self._build_api_section( - project_id, task_id, agent_id) - return bootstrap_prompt + "\n\n---\n\n" + api_section - - # 无 BootstrapBuilder 或无 task 对象 → 最小 fallback - # 只保留任务上下文 + API 操作指令 - logger.warning( - "No BootstrapBuilder or task object, using minimal fallback") - return self._build_minimal_fallback( - task_id, title, description, must_haves, - project_id, agent_id) - - def _build_minimal_fallback(self, task_id, title, description, must_haves, - project_id, agent_id): - """最小 fallback:只有任务上下文 + API 指令""" - task_section = f"""## 任务 -{title} -{description or "(无描述)"} - -项目: {project_id} | ID: {task_id} -验收标准: {must_haves or "(无)"}""" - api_section = self._build_api_section(project_id, task_id, agent_id) - return task_section + "\n\n---\n\n" + api_section - - def _build_api_section(self, project_id: str, task_id: str, - agent_id: str) -> str: - """构建 API 回写操作指令(BootstrapBuilder 模式下补充)""" - # handler 项目(_mail/_toolchain)的 success_status 由 PromptSection 处理 - # 这里只处理无 handler 的项目(normal task) - handler = TaskTypeRegistry.get_by_project(project_id) - if handler: - success_status = '"done"' if handler.target_success_status == "done" else '"review"' - else: - success_status = '"review"' - return f"""## 操作指令 - -### 状态回写 -开始工作: -```bash -curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/tasks/{task_id}/status \ - -H 'Content-Type: application/json' \ - -d '{{"status": "working", "agent": "{agent_id}"}}' -``` - -### 写入产出 -```bash -curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/tasks/{task_id}/outputs \ - -H 'Content-Type: application/json' \ - -d '{{"agent": "{agent_id}", "type": "<类型>", "title": "<标题>", "content": "<内容>", "summary": "<摘要>"}}' -``` - -### 完成后 -成功:status → {success_status} | 失败:status → "failed" -""" - - def _build_discussion_prompt(self, task_id: str, title: str, - description: str, must_haves: str, - project_id: str, agent_id: str) -> str: - """构建讨论类 spawn prompt(§3.3 框架 + Boids)""" - goal_snapshot = description or title - constraints = must_haves or "(无特殊约束)" - - return DISCUSSION_PROMPT_TEMPLATE.format( - goal_snapshot=goal_snapshot, - constraints=constraints, - project_id=project_id, - task_id=task_id, - agent_id=agent_id, - api_host=self.api_host, - api_port=self.api_port, - ) - - def _inject_agent_identity(self, agent_id: str) -> str: - """#03: 注入 Agent 身份+专长""" - caps = "通用" - router = getattr(self, '_router_ref', None) - if router: - profile = router.agent_profiles.get(agent_id) - if profile and getattr(profile, 'capabilities_zh', None): - caps = ", ".join(profile.capabilities_zh) - return f"你是 {agent_id},专长: {caps}。" - - def _get_guardrails_summary(self) -> str: - """#03: 从 GuardrailEngine 提取红线摘要""" - if not self.guardrails: - return "无特殊限制" - try: - return "、".join(r.get("name", r.get("rule_id", "")) - for r in self.guardrails.rules[:6]) - except Exception: - return "无特殊限制" - - def _get_agent_profile(self, agent_id: str): - """获取 Agent 能力画像""" - router = getattr(self, '_router_ref', None) - if router: - return router.agent_profiles.get(agent_id) - return None - - def _build_mail_prompt(self, task_id: str, title: str, description: str, - must_haves: str, agent_id: str) -> str: - """构建 Mail 专用精简模板""" - # 解析 must_haves 获取 from 和 performative - from_agent = agent_id - performative = "request" - try: - meta = json.loads(must_haves) if must_haves else {} - from_agent = meta.get("from", agent_id) - performative = meta.get( - "performative", meta.get( - "type", "request")) - except Exception: - pass - - # 截断 title 和 text 用于模板安全 - safe_title = (title or "").replace('"', '\\"')[:100] - safe_text = (description or "").replace('"', '\\"') - - # 获取有效 Agent 列表(从 config/default.yaml 读取) - valid_agents_list = self._load_valid_agents() - valid_agents_str = " / ".join(sorted(valid_agents_list)) - - common_kwargs = dict( - from_agent=from_agent, - title=safe_title, - text=safe_text, - task_id=task_id, - agent_id=agent_id, - api_host=self.api_host, - api_port=self.api_port, - valid_agents=valid_agents_str, - ) - - if performative == "inform": - return MAIL_INFORM_TEMPLATE.format(**common_kwargs) - else: - return MAIL_REQUEST_TEMPLATE.format(**common_kwargs) - - async def spawn_full_agent( - self, - agent_id: str, - message: str, - new_session: bool = False, - task_id: Optional[str] = None, - on_complete: Optional[Any] = None, - use_main_session: bool = False, - task_db_path: Optional[Path] = None, - reuse_session_id: Optional[str] = None, - on_checks_passed: Optional[Any] = None, - skip_counter: bool = False, - broadcast_task_ids: Optional[List[str]] = None, - ) -> str: - """Spawn Full Agent(异步非阻塞) - - v2.7.2: counter acquire/release 在内部统一管理。 - 调用级生命周期:spawn 时 acquire,进程退出时 release(通过 wrapped_on_complete)。 - - Args: - on_complete: 业务回调(agent_id, outcome) - 不含 counter.release, - counter.release 由内部 wrapped_on_complete 保证。 - use_main_session: True = 投递到主 Agent session(不传 --session-id) - on_checks_passed: 所有检查通过后的回调(session check + counter acquire 后、subprocess 前) - reuse_session_id: 传入指定 session-id 复用(用于续杯) - deprecated,use_main_session=True 已替代 - - Returns: - session_id - - Raises: - AgentBusyError: agent 被 counter 占用或冷却中 - """ - # ── #07 Acquire-First: counter 前置 → session check 在锁内贴近 spawn ── - - # Step 0: 分配 session_id(纯计算,无 IO) - if use_main_session: - session_id = None - elif reuse_session_id: - session_id = reuse_session_id - else: - session_id = str(uuid.uuid4()) - _sid_key = session_id or "main" # counter 用的 key - - # Phase 0: Pre-acquire 修复(无锁) - # timeout/failed 状态先修复再 acquire。revive 只改 running→idle,幂等安全。 - # asyncio 协作式并发保证同一时刻只有一个协程在执行,revive 的 sessions.json - # 写操作不会真正并行。 - if use_main_session: - pre_state = self._check_session_state(agent_id) - if pre_state.get("status") in ("timeout", "failed"): - logger.info("Phase 0: %s status=%s, reviving before acquire", - agent_id, pre_state["status"]) - self._revive_session(agent_id) - elif pre_state.get("status") == "running" and not pre_state.get("lock_pid_alive"): - # status=running 但 lock PID 已死 → 假死,revive - logger.warning( - "Phase 0: %s status=running but lock PID dead, reviving", - agent_id) - self._revive_session(agent_id) - - # Phase 1: Counter acquire(互斥锁) - # v2.8.1 Bug-4 fix: retry 时跳过 counter(counter 从原始 spawn 保持到 retry 完成) - if self.counter and not skip_counter: - acquired = await self.counter.acquire(agent_id, _sid_key) - if not acquired: - raise AgentBusyError(agent_id, reason="counter_blocked") - - # Phase 2: Session check(在锁保护下,贴近 spawn) - # 并列收集所有 block 原因,统一判定。 - if use_main_session: - session_state = self._check_session_state(agent_id) - logger.info("Phase 2 session check for %s: status=%s lock_pid=%s lock_pid_alive=%s compact=%s", - agent_id, session_state.get( - 'status'), session_state.get('lock_pid'), - session_state.get('lock_pid_alive'), session_state.get('recent_compact')) - - blockers = [] - if session_state.get( - "lock_pid_alive") and not session_state.get("lock_expired"): - blockers.append( - ("session_locked", session_state.get("lock_pid"))) - if session_state.get("status") == "running": - if session_state.get("lock_pid_alive"): - # 真 running:外部进程占用 - blockers.append(("session_running", None)) - else: - # 假 running:lock PID 死了但 status 还在 running → Phase 2.5 处理 - pass - if session_state.get("recent_compact"): - blockers.append(("session_compacting", None)) - - if blockers: - # 释放 counter,报具体原因 - if self.counter and not skip_counter: - self.counter.release(agent_id, _sid_key) - primary_reason, primary_detail = blockers[0] - logger.info("Phase 2 blocked %s: %s (all=%s)", - agent_id, primary_reason, blockers) - raise AgentBusyError(agent_id, reason=primary_reason, - detail={"blockers": blockers}) - - # Phase 2.5: 假死修复(status=running + lock PID 死 → revive → 重检) - # 此场景应被 Phase 0 提前修复,这里做兜底 - if session_state.get("status") == "running" and not session_state.get( - "lock_pid_alive"): - logger.warning("Phase 2.5: %s status=running + lock dead (should be caught in Phase 0), reviving", - agent_id) - self._revive_session(agent_id) - session_state = self._check_session_state(agent_id) - if session_state.get("status") == "running": - if self.counter and not skip_counter: - self.counter.release(agent_id, _sid_key) - raise AgentBusyError(agent_id, reason="session_stuck", - detail={"status": "running after revive"}) - - # Phase 3: on_checks_passed 回调 - # 注意:如果回调抛异常,counter 已 acquire 但 subprocess 未启动, - # wrapped_on_complete 不会执行。需在此 try/except 中手动 release。 - if on_checks_passed: - try: - on_checks_passed() - except Exception: - if self.counter and not skip_counter: - self.counter.release(agent_id, _sid_key) - raise - - if self.dry_run: - logger.info( - "[DRY RUN] Would spawn agent %s (session=%s)", - agent_id, - _sid_key) - self._register_session(_sid_key, agent_id, task_id, pid=None) - return _sid_key - - # 4. wrapped_on_complete 保证 counter release(闭包捕获 _sid_key) - async def _wrapped_on_complete(aid, outcome): - try: - if self.counter: - self.counter.release(aid, _sid_key) - finally: - if on_complete: - try: - result = on_complete(aid, outcome) - if asyncio.iscoroutine(result): - await result - except Exception: - logger.warning( - "Business on_complete failed for %s", aid, exc_info=True) - - cmd = [ - "openclaw", "agent", - "--agent", agent_id, - ] - if session_id: - cmd.extend(["--session-id", session_id]) - cmd.extend([ - "--message", message, - "--json", - "--timeout", str(int(self.gateway_timeout)), - ]) - - try: - proc = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - self._register_session(session_id, agent_id, task_id, proc.pid, - broadcast_task_ids=broadcast_task_ids) - logger.info("Spawned agent %s (session=%s, pid=%d)", - agent_id, session_id, proc.pid) - - # Schedule monitor(传 wrapped_on_complete) - asyncio.create_task( - self._monitor_process(session_id, proc, agent_id, task_id, - on_complete=_wrapped_on_complete, - db_path=task_db_path or self.db_path) - ) - - return session_id - - except Exception as e: - # spawn 失败也要 release counter - if self.counter: - self.counter.release(agent_id, _sid_key) - logger.exception("Failed to spawn agent %s", agent_id) - self._record_attempt( - task_id, - agent_id, - "spawn_failed", - error=str(e)) - raise - - async def spawn_subagent( - self, - task_description: str, - task_id: Optional[str] = None, - ) -> str: - """Spawn Subagent(占位,实际通过 Gateway API) - - Returns: - session_id - """ - session_id = str(uuid.uuid4()) - - if self.dry_run: - logger.info( - "[DRY RUN] Would spawn subagent (session=%s)", - session_id) - self._register_session(session_id, "subagent", task_id, pid=None) - return session_id - - # TODO: F17 通过 Gateway API sessions_spawn 实现 - logger.info("Subagent spawn (session=%s) - placeholder", session_id) - self._register_session(session_id, "subagent", task_id, pid=None) - return session_id - - # ── 续杯 Prompt 模板 ── - - RETRY_PROMPT = """你收到一个续杯提醒。你的任务在执行过程中被中断了。 - -## 任务信息 - -- 项目: {project_id} -- 任务ID: {task_id} -- 标题: {title} -- 续杯次数: 第 {retry_count} 次(上限 {max_retries} 次) - -请检查 session 历史中你之前做了什么,然后继续未完成的工作。 - -## 操作指令 - -### 查看任务当前状态 -```bash -curl http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}?expand=all -``` - -### 如果已经完成,标记 review -```bash -curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/status \\ - -H 'Content-Type: application/json' \\ - -d '{{"status": "review", "agent": "{agent_id}"}}' -``` - -### 写入产出(如果之前没写) -```bash -curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/outputs \\ - -H 'Content-Type: application/json' \\ - -d '{{"agent": "{agent_id}", "type": "<类型>", "title": "<标题>", "content": "<内容>", "summary": "<摘要>"}}' -``` - -### 如果无法解决,标记失败 -```bash -curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/status \\ - -H 'Content-Type: application/json' \\ - -d '{{"status": "failed", "agent": "{agent_id}", "detail": "<失败原因>"}}' -``` - -{fallback_hint}""" - - async def _monitor_process( - self, - session_id: Optional[str], - proc: asyncio.subprocess.Process, - agent_id: str, - task_id: Optional[str], - on_complete: Optional[Any] = None, - db_path: Optional[Path] = None, - monitor_timeout_count: int = 0, - ) -> None: - """监控子进程全生命周期(设计文档 spawner-monitor-design.md)""" - stdout_chunks: list = [] - stderr_chunks: list = [] - - try: - # ── 等待进程退出 + 流式读取 ── - async def _read_streams(): - async def _read_out(): - while True: - chunk = await proc.stdout.read(4096) - if not chunk: - break - stdout_chunks.append(chunk) - - async def _read_err(): - while True: - chunk = await proc.stderr.read(4096) - if not chunk: - break - stderr_chunks.append(chunk) - - await asyncio.gather(_read_out(), _read_err(), proc.wait()) - - await asyncio.wait_for(_read_streams(), timeout=self.agent_timeout) - # ── 情况 A:进程退出 ── - exit_code = proc.returncode - await self._handle_exit( - session_id, agent_id, task_id, exit_code, - stdout_chunks, stderr_chunks, on_complete, db_path - ) - - except asyncio.TimeoutError: - # ── 情况 B:monitor timeout(进程没退出)── - logger.warning("Agent %s monitor timeout (session=%s, count=%d/%d)", - agent_id, session_id, monitor_timeout_count + 1, - self.max_monitor_timeouts) - await self._handle_monitor_timeout( - session_id, agent_id, task_id, proc, - on_complete, db_path, stderr_chunks, monitor_timeout_count - ) - - async def _handle_exit(self, session_id, agent_id, task_id, exit_code, - stdout_chunks, stderr_chunks, on_complete, db_path): - """情况 A:进程退出后的处理 - - v2.7.2: 进程退出 = counter release(由 on_complete = wrapped_on_complete 保证)。 - 只有 A2/A3(gateway_timeout)触发续杯,其他都不 retry。 - A9(api_error/429)额外推回 pending + 设冷却。 - """ - stdout_text = b"".join(stdout_chunks).decode("utf-8", errors="replace") - stderr_text = b"".join(stderr_chunks).decode("utf-8", errors="replace") - - # 解析 stdout JSON - json_result = self._parse_stdout_json(stdout_text) - logger.info("Parsed JSON result for agent=%s session=%s: %s", - agent_id, session_id, json_result) - - # 查任务实际状态 - task_status = self._get_task_status( - db_path, task_id) if task_id else None - - # 分类 - cls = self._classify_outcome( - exit_code, - json_result, - stderr_text, - task_status, - stdout_text) - outcome = cls["outcome"] - - # 更新 session 状态 - sid = session_id or "main" - if sid in self._sessions: - self._sessions[sid]["status"] = outcome - self._sessions[sid]["completed_at"] = datetime.utcnow().isoformat() - self._sessions[sid]["exit_code"] = exit_code - if json_result: - self._sessions[sid]["meta"] = json_result - - # 记录 attempt - self._record_attempt( - task_id, agent_id, outcome, exit_code=exit_code, - db_path=db_path, - metadata={ - "status": json_result.get("status"), - "summary": json_result.get("summary"), - "fallback_used": json_result.get("fallback_used"), - "fallback_reason": json_result.get("fallback_reason"), - "task_status_at_exit": task_status, - } - ) - - logger.info("Agent %s finished (session=%s, outcome=%s, exit=%d, task_status=%s)", - agent_id, session_id, outcome, exit_code, task_status) - - # 广播反馈追踪(Phase 1 bug fix) - if task_id == "broadcast" and hasattr( - self, '_ticker') and self._ticker: - # 广播任务:从 session 信息取真实 task_id 列表,逐一回调 tracker - sess_info = self._sessions.get(session_id or "main", {}) - bt_ids = sess_info.get("broadcast_task_ids") or [] - # 广播场景一律标 no_reply:Agent 只 claim 一个任务, - # 其余任务的 tracker 不能被 claimed 清除 - for real_task_id in bt_ids: - self._ticker.record_broadcast_response( - real_task_id, agent_id, "no_reply") - elif task_id and hasattr(self, '_ticker') and self._ticker: - outcome_str = "claimed" if cls.get( - "status") == "ok" else "no_reply" - self._ticker.record_broadcast_response( - task_id, agent_id, outcome_str) - - if cls["should_retry"]: - # cooldown: 新增的可恢复场景(A14/A15/A16/A8/A10) - cooldown_seconds = cls.get("cooldown_seconds", 0) - if cooldown_seconds and self.counter: - self.counter.set_cooldown(agent_id, seconds=cooldown_seconds) - # A2/A3: gateway_timeout → 续杯(on_complete 会 release counter) - await self._do_retry( - session_id, agent_id, task_id, on_complete, db_path, - cls.get("retry_field", "retry_count") - ) - elif outcome == "api_error": - # A9: [DEPRECATED] api_error 已改为 should_retry=True 走续杯路径。 - # 此分支理论上不再命中,保留作为安全兜底。 - # A9: 429/API 错误 → release counter(on_complete)+ 推回 pending + 冷却 - # 有上限:api_retry_count 累计达 max_retries 则标 failed - await self._do_on_complete_async(on_complete, agent_id, outcome) - if self.counter: - self.counter.set_cooldown(agent_id) - if db_path and task_id: - retry_counts = self._get_retry_counts(db_path, task_id) - api_count = retry_counts.get("api_retry_count", 0) + 1 - retry_counts["api_retry_count"] = api_count - self._update_retry_counts(db_path, task_id, retry_counts) - if api_count >= self.max_retries: - logger.error("Task %s api_retry_count=%d >= max_retries, marking failed", - task_id, api_count) - self._mark_task(db_path, task_id, "failed", { - "reason": "max_api_retry_count", "count": api_count, - }) - else: - self._mark_task(db_path, task_id, "pending", { - "reason": "api_error_retry", - "api_retry_count": api_count, - }) - logger.info("Task %s pushed back to pending (api_error, api_retry=%d/%d)", - task_id, api_count, self.max_retries) - elif outcome == "fallback_timeout" and not cls["should_retry"]: - # A3/A3b: fallback 分级处理 - # fallback_count 从 task_attempts.metadata 读取, - # 达 max_retries 标 failed(A3),否则 retry + cooldown(A3b) - fallback_count = 0 - if db_path and task_id: - retry_counts = self._get_retry_counts(db_path, task_id) - fallback_count = retry_counts.get("fallback_count", 0) + 1 - retry_counts["fallback_count"] = fallback_count - self._update_retry_counts(db_path, task_id, retry_counts) - - if fallback_count >= self.max_retries: - # A3: 连续 fallback 达上限,标 failed - logger.error("A3 fallback exhausted: agent=%s session=%s task=%s " - "fallback_count=%d reason=%s", - agent_id, session_id, task_id, fallback_count, - json_result.get("fallback_reason")) - await self._do_on_complete_async(on_complete, agent_id, outcome) - if db_path and task_id: - self._mark_task(db_path, task_id, "failed", { - "reason": "fallback_exhausted", - "fallback_count": fallback_count, - "fallback_reason": json_result.get("fallback_reason"), - }) - else: - # A3b: fallback 未达上限,retry + cooldown - logger.warning("A3b fallback retry: agent=%s session=%s task=%s " - "fallback_count=%d/%d reason=%s", - agent_id, session_id, task_id, fallback_count, - self.max_retries, json_result.get("fallback_reason")) - if self.counter: - self.counter.set_cooldown(agent_id, seconds=60) - await self._do_retry( - session_id, agent_id, task_id, on_complete, db_path, - "fallback_retry_count" # 独立计数,不与 gateway_timeout 的 retry_count 共用 - ) - else: - # 其他:A1(completed), A4(agent_failed), A7(auth_failed), - # A8(gateway_unreachable), A11(lock_conflict), - # A10(compact_failed), A12(agent_error) - # v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间 - if outcome == "crashed" and self.counter: - self.counter.set_cooldown(agent_id, seconds=60) - logger.info( - "Crash cooldown set for %s: 60s (outcome=%s)", - agent_id, - outcome) - elif outcome in ("compact_failed", "process_crash", "session_stuck", - "compact_hanging", "agent_error", "compact_interrupted") and self.counter: - self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟 - logger.info( - "Error cooldown set for %s: 300s (outcome=%s)", - agent_id, - outcome) - # F1: 不可恢复 outcome → 立刻标 failed + 写黑板 - if outcome in ("auth_failed", - "agent_error") and db_path and task_id: - logger.error( - "Task %s: unrecoverable outcome=%s, marking failed immediately", - task_id, - outcome) - self._mark_task(db_path, task_id, "failed", { - "reason": outcome, - "stderr_preview": (stderr_text or "")[:500], - }) - # 注意: cooldown 期间任务状态仍为 working,但 counter 已释放。 - # DB 中的 working 是"假 working"——ticker 不会重新分配,_check_timeouts 会 - # 在 cooldown 结束后回收。如果 ticker 在此期间给同一 agent 分配新任务,属正常行为。 - # 进程退出 → on_complete release counter - # 任务状态由各 outcome 自行处理(或等 ticker) - await self._do_on_complete_async(on_complete, agent_id, outcome) - - async def _handle_monitor_timeout(self, session_id, agent_id, task_id, proc, - on_complete, db_path, stderr_chunks, - monitor_timeout_count): - """情况 B:monitor timeout""" - # 读已缓冲的 stderr - try: - remaining = await asyncio.wait_for(proc.stderr.read(), timeout=2.0) - if remaining: - stderr_chunks.append(remaining) - except Exception: - pass - - # stderr collected but not used in this handler - # (kept for potential future diagnostics) - b"".join(stderr_chunks).decode("utf-8", errors="replace") - - # 检查 session 状态 - state = self._check_session_state(agent_id) - - # B1: 假死 - 先复活,连续假死 ≥2 次再 failed - if state.get("status") == "running" and not state.get( - "lock_pid_alive", True): - # 假死计数 - stuck_count = self._stuck_counts.get(task_id, 0) + 1 - self._stuck_counts[task_id] = stuck_count - - if stuck_count >= 2: - # 连续假死 ≥2 次,标 failed - logger.error("Agent %s session stuck %d times (session=%s, lock PID dead)", - agent_id, stuck_count, session_id) - self._mark_task(db_path, task_id, "failed", - {"reason": "session_stuck", "stuck_count": stuck_count, - "diagnostics": state}) - await self._do_on_complete_async(on_complete, agent_id, "session_stuck") - return - - # 第 1 次假死 → 尝试复活 - logger.warning("Agent %s session stuck (attempt %d), reviving (session=%s)", - agent_id, stuck_count, session_id) - revived = self._revive_session(agent_id) - if revived: - logger.info("Agent %s session revived, releasing counter for ticker re-dispatch", - agent_id) - # release counter → 任务保持 working → ticker 下次 re-dispatch - await self._do_on_complete_async(on_complete, agent_id, "session_revived") - else: - # 复活失败 → 标 failed - logger.error( - "Agent %s revive failed, marking failed", agent_id) - self._mark_task(db_path, task_id, "failed", - {"reason": "revive_failed", "stuck_count": stuck_count, - "diagnostics": state}) - await self._do_on_complete_async(on_complete, agent_id, "revive_failed") - return - - # B2/B3/B4: 进程还活着 - # B2: compact 进行中 - 不计入 monitor timeout 计数,继续等 - if state.get("recent_compact"): - logger.info("Agent %s recent compaction detected, extending patience " - "(session=%s, monitor=%d/%d)", - agent_id, session_id, monitor_timeout_count, self.max_monitor_timeouts) - # 不递增 monitor_timeout_count,但最多额外等 max_monitor_timeouts 次 - # 用独立计数器防止无限等待 - compact_wait_count = self._compact_waits.get(task_id, 0) + 1 - self._compact_waits[task_id] = compact_wait_count - if compact_wait_count >= self.max_monitor_timeouts: - # #07.3 ACT-2: compact_hanging 不标 failed,只 release counter - # 进程还活着但不 monitor,等 ticker _check_timeouts 超时回收 → 重新 dispatch - logger.warning("Agent %s compact hanging after %d waits, releasing counter for ticker re-dispatch", - agent_id, compact_wait_count) - self._compact_waits.pop(task_id, None) - await self._do_on_complete_async(on_complete, agent_id, "compact_hanging") - return - # 继续等 - asyncio.create_task( - self._monitor_process( - session_id, proc, agent_id, task_id, - on_complete=on_complete, db_path=db_path, - monitor_timeout_count=monitor_timeout_count, - ) - ) - return - - # B3/B4: 无 compact,正常计数 - monitor_timeout_count += 1 - if monitor_timeout_count >= self.max_monitor_timeouts: - logger.error("Agent %s max monitor timeouts (session=%s, count=%d)", - agent_id, session_id, monitor_timeout_count) - self._mark_task(db_path, task_id, "failed", { - "reason": "max_monitor_timeouts", - "count": monitor_timeout_count, - "elapsed_seconds": monitor_timeout_count * int(self.agent_timeout), - "diagnostics": state, - }) - await self._do_on_complete_async(on_complete, agent_id, "max_monitor_timeouts") - return - - # 未超限:继续等(不 release counter) - logger.info("Agent %s continuing monitor (session=%s, count=%d/%d)", - agent_id, session_id, monitor_timeout_count, self.max_monitor_timeouts) - asyncio.create_task( - self._monitor_process( - session_id, proc, agent_id, task_id, - on_complete=on_complete, db_path=db_path, - monitor_timeout_count=monitor_timeout_count, - ) - ) - - async def _do_retry(self, session_id, agent_id, task_id, on_complete, - db_path, retry_field="retry_count"): - """续杯:手动 release counter 后通过 spawn_full_agent 重新 spawn - - v2.7.2: 进程已退出但 wrapped_on_complete 未被调用(只有 should_retry 分支走到这里)。 - 需要手动 release counter,然后 spawn_full_agent 内部会 acquire。 - on_complete(含 counter release)置为 None,避免 double release。 - """ - # v2.8.1 Bug-4 fix: 不再手动 release counter + 置 None on_complete - # counter 从原始 spawn 保持到 retry 完成,避免窗口期 ticker acquire 同一 agent - # on_complete 保留原始 wrapped_on_complete,retry 完成后自然 release counter - - # 续杯前检查任务状态,已终态则跳过 - if db_path and task_id: - try: - conn = get_connection(db_path) - try: - row = conn.execute( - "SELECT status FROM tasks WHERE id=?", (task_id,) - ).fetchone() - # Bug-6 fix: pending 不是终态 - if row and row["status"] in ( - "done", "failed", "cancelled", "review"): - logger.info("Retry skip: task %s already %s (agent=%s)", - task_id, row["status"], agent_id) - # on_complete = wrapped_on_complete,会 release counter - await self._do_on_complete_async(on_complete, agent_id, "task_already_done") - return - finally: - conn.close() - except Exception: - logger.warning( - "Retry status check failed for %s, proceeding", task_id) - - # 直接读写 tasks 表的 retry_count - if retry_field == "retry_count" and db_path and task_id: - try: - conn = get_connection(db_path) - try: - conn.execute("BEGIN IMMEDIATE") - conn.execute( - "UPDATE tasks SET retry_count = COALESCE(retry_count, 0) + 1 WHERE id=?", - (task_id,), - ) - conn.commit() - row = conn.execute( - "SELECT retry_count FROM tasks WHERE id=?", (task_id,) - ).fetchone() - count = row["retry_count"] if row else 1 - finally: - conn.close() - except Exception: - logger.exception( - "Failed to update retry_count for task %s", task_id) - count = 1 - else: - retry_counts = self._get_retry_counts(db_path, task_id) - count = retry_counts.get(retry_field, 0) + 1 - retry_counts[retry_field] = count - self._update_retry_counts(db_path, task_id, retry_counts) - - if count >= self.max_retries: - logger.error("Agent %s max retries (session=%s, %s=%d)", - agent_id, session_id, retry_field, count) - self._mark_task(db_path, task_id, "failed", { - "reason": f"max_{retry_field}", "count": count, - }) - await self._do_on_complete_async(on_complete, agent_id, "max_retries") - return - - logger.info("Agent %s retry %s=%d/%d (session=%s)", - agent_id, retry_field, count, self.max_retries, session_id) - - # 构建续杯 message(Mail 用专用模板,Task 用标准模板) - task_info = self._get_task_info(db_path, task_id) or {} - project_id = task_info.get("project_id", "") - handler = TaskTypeRegistry.get_by_project(project_id) - is_handler = handler is not None - - if is_handler: - must_haves = task_info.get("must_haves", "{}") - try: - meta = json.loads(must_haves) if must_haves else {} - except Exception: - meta = {} - message = MAIL_RETRY_PROMPT.format( - from_agent=meta.get("from", "unknown"), - title=task_info.get("title", ""), - retry_count=count, - max_retries=self.max_retries, - ) - else: - fallback_hint = "\n⚠️ 之前有 fallback 执行,请调 API 检查任务当前状态和已有产出,确认是否已完成。" if retry_field == "retry_count" else "" - message = self.RETRY_PROMPT.format( - project_id=project_id, - task_id=task_id or "", - title=task_info.get("title", ""), - retry_count=count, - max_retries=self.max_retries, - api_host=self.api_host, - api_port=self.api_port, - agent_id=agent_id, - fallback_hint=fallback_hint, - ) - - # v2.7.2: 通过 spawn_full_agent 重新 spawn(内部 can_acquire + acquire) - # on_complete = wrapped_on_complete(含 counter release),作为业务回调传入 - try: - await self.spawn_full_agent( - agent_id=agent_id, - message=message, - task_id=task_id, - on_complete=on_complete, - use_main_session=True, # #02: 续杯走 main session - task_db_path=db_path, - skip_counter=True, # Bug-4 fix: counter 已在原始 spawn 中持有 - ) - except AgentBusyError as e: - # #07.3 ACT-3: session busy(compact/lock/running)= 暂时性阻塞 - # release counter → 任务保持 working → ticker 重新 dispatch - logger.warning("Retry spawn deferred: %s session busy (%s), releasing counter for ticker re-dispatch", - agent_id, e.reason) - await self._do_on_complete_async(on_complete, agent_id, "retry_session_busy") - except Exception: - logger.exception("Retry spawn failed for %s", agent_id) - await self._do_on_complete_async(on_complete, agent_id, "retry_spawn_failed") - - # ── 辅助方法 ── - - @staticmethod - def _parse_stdout_json(stdout_text: str) -> dict: - """解析 openclaw agent --json 的 stdout 输出 - - 返回可直接使用的字段:status, summary, fallback_used, fallback_reason, payloads - 不再提取 meta,直接用顶层字段。 - """ - text = stdout_text.strip() - if not text: - return {"status": None, "summary": None, "fallback_used": False, - "fallback_reason": None, "payloads": []} - try: - data = json.loads(text) - except json.JSONDecodeError: - # 多行输出,找最后一个 JSON - for line in reversed(text.splitlines()): - try: - data = json.loads(line) - break - except json.JSONDecodeError: - continue - else: - return {"status": None, "summary": None, "fallback_used": False, - "fallback_reason": None, "payloads": []} - - # 从 data.result.meta.executionTrace 取 fallback 信息 - result = data.get("result", {}) - meta = result.get("meta", {}) - trace = meta.get("executionTrace", {}) - - return { - "status": data.get("status"), - "summary": data.get("summary"), - "fallback_used": trace.get("fallbackUsed", False), - "fallback_reason": trace.get("fallbackReason"), - "payloads": result.get("payloads", []), - } - - @staticmethod - def _get_task_status( - db_path: Optional[Path], task_id: Optional[str]) -> Optional[str]: - """查任务实际 API 状态""" - if not db_path or not task_id: - return None - try: - conn = get_connection(db_path) - try: - row = conn.execute( - "SELECT status FROM tasks WHERE id=?", (task_id,) - ).fetchone() - return row["status"] if row else None - finally: - conn.close() - except Exception: - return None - - @staticmethod - def _get_task_info(db_path: Optional[Path], - task_id: Optional[str]) -> Optional[dict]: - """查任务基本信息""" - if not db_path or not task_id: - return None - try: - conn = get_connection(db_path) - try: - row = conn.execute( - "SELECT id, title, status FROM tasks WHERE id=?", ( - task_id,) - ).fetchone() - if not row: - return None - info = dict(row) - # 从 db_path 推断 project_id: data//blackboard.db - info["project_id"] = db_path.parent.name - return info - finally: - conn.close() - except Exception: - return None - - @staticmethod - def _revive_session(agent_id: str) -> bool: - """假死复活术:修改 sessions.json status 从 running 改为 idle""" - sessions_path = Path(os.environ.get( - "OPENCLAW_HOME", str(Path.home() / ".openclaw") - )) / "agents" / agent_id / "sessions" / "sessions.json" - if not sessions_path.exists(): - return False - try: - with open(sessions_path) as f: - sessions = json.load(f) - main_key = f"agent:{agent_id}:main" - main_session = sessions.get(main_key, {}) - if main_session.get("status") != "running": - return False # 不是 running 状态,不需要复活 - main_session["status"] = "idle" - sessions[main_key] = main_session - with open(sessions_path, "w") as f: - json.dump(sessions, f, indent=2) - logger.info( - "Revived %s: sessions.json status changed running→idle", - agent_id) - # #07 O4: 同时清理残留 lock 文件 - sf = main_session.get("sessionFile", "") - if sf: - lock_path = Path(sf + ".lock") - if lock_path.exists(): - try: - lock_path.unlink() - logger.info( - "Cleaned stale lock for %s: %s", - agent_id, - lock_path.name) - except Exception: - pass - return True - except Exception: - logger.exception("Failed to revive %s", agent_id) - return False - - # deprecated: §24 v3, 保留供方案 B 备选 - @staticmethod - def _get_recent_gateway_logs() -> list: - """获取当天和昨天的 gateway 日志路径。 - - 日志路径通过 OPENCLAW_LOG_DIR 环境变量配置,默认 /tmp/openclaw。 - 文件名格式:openclaw-{YYYY-MM-DD}.log - """ - from datetime import timedelta - log_dir = os.environ.get("OPENCLAW_LOG_DIR", "/tmp/openclaw") - now_local = datetime.now() - today = now_local.strftime("%Y-%m-%d") - yesterday = (now_local - timedelta(days=1)).strftime("%Y-%m-%d") - paths = [] - for d in [today, yesterday]: - p = os.path.join(log_dir, f"openclaw-{d}.log") - if os.path.exists(p): - paths.append(p) - return paths - - # deprecated: §24 v3, 保留供方案 B 备选(旧 rotation 结束标记检测,已被 v5 取代) - @staticmethod - def _check_compact_in_progress_gateway( - session_key: str, window_seconds: int = 120) -> bool: - """§24 v3 rotation-only: 检查 gateway 日志,判断指定 session 是否刚完成 compact。 - - 检测逻辑:读日志尾部 2MB,按目标 sessionKey 过滤, - 找最后一个 rotation 事件,如果在窗口内 → compact 可能仍在 retry 循环中。 - """ - from datetime import datetime as _dt, timezone as _tz, timedelta - log_paths = AgentSpawner._get_recent_gateway_logs() - if not log_paths: - return False - - now = _dt.now(_tz.utc) - window_start = now - timedelta(seconds=window_seconds) - - last_rotation_time = None - - for log_path in log_paths: - if not os.path.exists(log_path): - continue - try: - with open(log_path, "rb") as f: - f.seek(0, 2) - size = f.tell() - f.seek(max(0, size - 2 * 1024 * 1024)) - tail = f.read().decode("utf-8", errors="replace") - except Exception: - continue - - for line in tail.splitlines(): - if not line.strip(): - continue - try: - obj = json.loads(line) - except (json.JSONDecodeError, ValueError): - continue - - msg = obj.get("message", "") - # 只看包含目标 sessionKey 的事件 - if session_key not in msg: - continue - - # rotation 事件 - if "[compaction] rotated active transcript" in msg: - ts_str = obj.get("time", "") - if ts_str: - try: - event_time = _dt.fromisoformat( - ts_str.replace("Z", "+00:00")) - # timezone-aware: normalize to UTC - if event_time.tzinfo is None: - event_time = event_time.replace(tzinfo=_tz.utc) - if last_rotation_time is None or event_time > last_rotation_time: - last_rotation_time = event_time - except (ValueError, TypeError): - continue - - if last_rotation_time is not None: - return last_rotation_time >= window_start - - return False - - # ─── v5: compact 开始标记检测(gateway log)+ 结束标记检测(jsonl) ─── - - @staticmethod - def _find_compact_start_in_gateway_log( - agent_id: str, window_seconds: int = 900) -> Optional[str]: - """v5: 检查 gateway 日志,找最近的 compact 开始标记。 - - 只检测 precheck 路径:message 含 "[context-overflow-precheck]" 且 - "route=compact_then_truncate"。原因: - - overflow 标记("attempting auto-compaction")不含 sessionKey, - 被 `session_key not in msg` 前置过滤跳过,是死代码。 - - timeout 标记推测同理不含 sessionKey。 - - precheck 标记含 sessionKey 且实测总在 overflow 之前触发(同一 compact - 事件,precheck 先检测到,overflow 是 fallback),所以 precheck 已覆盖 - overflow 场景。 - - threshold/manual 触发的 compact 无开始标记(静默执行),依赖 - counter+lock+status 保护,不需要 gateway 日志检测。 - - 超时兜底:开始标记超过 window_seconds(默认 15 分钟)自动忽略。 - - 返回最近一个开始标记的 UTC ISO 时间字符串(带 Z 后缀),或 None。 - """ - from datetime import datetime as _dt, timezone as _tz, timedelta - log_paths = AgentSpawner._get_recent_gateway_logs() - if not log_paths: - return None - - session_key = f"agent:{agent_id}:main" - now = _dt.now(_tz.utc) - window_start = now - timedelta(seconds=window_seconds) - - latest_start_time = None # type: Optional[_dt] - latest_start_str = None # type: Optional[str] - - for log_path in log_paths: - if not os.path.exists(log_path): - continue - try: - with open(log_path, "rb") as f: - f.seek(0, 2) - size = f.tell() - f.seek(max(0, size - 2 * 1024 * 1024)) - tail = f.read().decode("utf-8", errors="replace") - except Exception: - continue - - for line in tail.splitlines(): - if not line.strip(): - continue - try: - obj = json.loads(line) - except (json.JSONDecodeError, ValueError): - continue - - msg = obj.get("message", "") - if session_key not in msg: - continue - - # 只检测 precheck 路径:route=compact_then_truncate - # overflow/timeout 标记不含 sessionKey,被前置过滤跳过(死代码),已删除 - if ("[context-overflow-precheck]" not in msg - or "route=compact_then_truncate" not in msg): - continue - - # 解析时间 - ts_str = obj.get("time", "") - if not ts_str: - continue - try: - event_time = _dt.fromisoformat( - ts_str.replace("Z", "+00:00")) - if event_time.tzinfo is None: - event_time = event_time.replace(tzinfo=_tz.utc) - else: - # 确保 UTC - event_time = event_time.astimezone(_tz.utc) - except (ValueError, TypeError): - continue - - # 超时兜底:超过窗口的忽略 - if event_time < window_start: - continue - - if latest_start_time is None or event_time > latest_start_time: - latest_start_time = event_time - latest_start_str = event_time.strftime( - "%Y-%m-%dT%H:%M:%S.") + f"{event_time.microsecond:06d}" + "Z" - - return latest_start_str - - @staticmethod - def _check_compaction_finished_in_jsonl( - session_file: str, after_time: str) -> bool: - """v5: 检查 jsonl 是否有 after_time 之后的 compaction entry。 - - 有 → compact 已完成 → True - 没有 → compact 可能仍在进行 → False - - after_time 格式:UTC ISO(如 2026-06-12T10:25:27.581Z)。 - jsonl timestamp 格式也是 UTC ISO。 - """ - if not session_file or not Path(session_file).exists(): - return False - try: - from datetime import datetime as _dt, timezone as _tz - after_dt = _dt.fromisoformat(after_time.replace("Z", "+00:00")) - if after_dt.tzinfo is None: - after_dt = after_dt.replace(tzinfo=_tz.utc) - - with open(session_file, "rb") as sf: - sf.seek(0, 2) - size = sf.tell() - sf.seek(max(0, size - 1048576)) - tail = sf.read().decode("utf-8", errors="replace") - - for line in reversed(tail.splitlines()): - if not line.strip(): - continue - try: - obj = json.loads(line) - except (json.JSONDecodeError, ValueError): - continue - if obj.get("type") == "compaction": - ts = obj.get("timestamp", "") - if ts: - try: - ct = _dt.fromisoformat(ts.replace("Z", "+00:00")) - if ct.tzinfo is None: - ct = ct.replace(tzinfo=_tz.utc) - if ct >= after_dt: - return True - except (ValueError, TypeError): - pass - # 遇到早于 after_time 的 entry → 不需要继续往前扫 - ts = obj.get("timestamp", "") - if ts: - try: - ct = _dt.fromisoformat(ts.replace("Z", "+00:00")) - if ct.tzinfo is None: - ct = ct.replace(tzinfo=_tz.utc) - if ct < after_dt: - break - except (ValueError, TypeError): - pass - return False - except Exception: - return False - - @staticmethod - def _check_recent_compaction_jsonl( - session_file: str, window_seconds: int = 900) -> bool: - """v2.8.2 Fix-2: 读 session jsonl 末尾,检查是否有 window_seconds 内的 compaction 记录。 - - 比 compactionCheckpoints 更可靠:Gateway 每次完成 compact 必然在 jsonl 末尾追加记录, - 但不保证更新 compactionCheckpoints。 - - v2.8.2: 窗口从 300s→900s(15min), 尾部读取从 50KB→1MB。 - 实测 50KB 在长对话中不够(compact 记录被推出窗口导致漏检)。 - 正常扫描量不变:从尾部往前扫,遇到超过 15min 的 timestamp 即 break。 - """ - if not session_file or not Path(session_file).exists(): - return False - try: - from datetime import datetime, timezone - now = datetime.now(timezone.utc) - with open(session_file, "rb") as sf: - sf.seek(0, 2) - size = sf.tell() - sf.seek(max(0, size - 1048576)) - tail = sf.read().decode("utf-8", errors="replace") - for line in reversed(tail.splitlines()): - if not line.strip(): - continue - try: - import json as _json - obj = _json.loads(line) - except (_json.JSONDecodeError, ValueError): - continue - if obj.get("type") == "compaction": - ts = obj.get("timestamp", "") - if ts: - try: - ct = datetime.fromisoformat( - ts.replace("Z", "+00:00")) - if (now - ct).total_seconds() < window_seconds: - return True - except (ValueError, TypeError): - pass - ts = obj.get("timestamp", "") - if ts: - try: - ct = datetime.fromisoformat(ts.replace("Z", "+00:00")) - if (now - ct).total_seconds() >= window_seconds: - break - except (ValueError, TypeError): - pass - return False - except Exception: - return False - - @staticmethod - def _check_session_state(agent_id: str) -> dict: - """检查 sessions.json 和 lock 状态 - - v2.8.1: compact 检测改用 session jsonl 末尾扫描(Fix-1), - 替代失效的 compactionCheckpoints 检测。 - """ - result = { - "status": "unknown", - "lock_pid": None, - "lock_pid_alive": False, - "recent_compact": False} - sessions_path = Path(os.environ.get( - "OPENCLAW_HOME", str(Path.home() / ".openclaw") - )) / "agents" / agent_id / "sessions" / "sessions.json" - if not sessions_path.exists(): - return result - try: - with open(sessions_path) as f: - sessions = json.load(f) - main_key = f"agent:{agent_id}:main" - main_session = sessions.get(main_key, {}) - result["status"] = main_session.get("status", "unknown") - - # 检查 lock (v3.1: done/timeout 时 lock 视为过期) - sf = main_session.get("sessionFile", "") - if sf: - lock_path = Path(sf + ".lock") - if lock_path.exists(): - try: - lock_data = json.loads(lock_path.read_text()) - pid = lock_data.get("pid") - result["lock_pid"] = pid - if pid: - try: - os.kill(pid, 0) - result["lock_pid_alive"] = True - except ProcessLookupError: - result["lock_pid_alive"] = False - # session 已完成/超时 > lock 是 Gateway 冷却锁,不阻塞新 turn - if result["status"] in ("done", "timeout"): - result["lock_pid_alive"] = False - result["lock_expired"] = True - # running + lock 超时 >30分钟 > 视为 idle,允许 dispatch - elif result["status"] == "running" and result["lock_pid_alive"]: - try: - lock_data = json.loads(lock_path.read_text()) - created_at_str = lock_data.get("createdAt", "") - if created_at_str: - from datetime import datetime as _dt, timezone as _tz - created_dt = _dt.fromisoformat( - created_at_str.replace("Z", "+00:00")) - elapsed = (_dt.now(_tz.utc) - - created_dt).total_seconds() - if elapsed > 1800: # 30 minutes - result["lock_pid_alive"] = False - result["lock_expired"] = True - logger.info("Lock expired for %s: running + lock age %.0fs > 1800s", - agent_id, elapsed) - except Exception: - pass - except Exception: - pass - - # §24 v5: compact 检测 = gateway log 开始标记 + jsonl 结束标记配对 - # 旧方法 (_check_compact_in_progress_trajectory, _check_recent_compaction_jsonl) - # 保留为 deprecated 但不再调用。 - # - # 逻辑: - # 1. 查 gateway log 最近的 compact 开始标记(precheck route=compact_then_truncate) - # 2. 有开始标记 → 查 jsonl 是否有对应的 compaction entry(结束标记) - # 3. 有开始无结束 → 阻塞(recent_compact=True) - # 4. 有开始有结束 → 放行 - # 5. 无开始标记 → threshold/manual 静默触发,靠 counter+lock+status 保护 - # 6. 超时兜底:开始标记超过 15 分钟自动忽略 - if result["status"] not in ("idle", "unknown", None) and sf: - compact_start = AgentSpawner._find_compact_start_in_gateway_log(agent_id) - if compact_start: - finished = AgentSpawner._check_compaction_finished_in_jsonl(sf, compact_start) - if not finished: - # 有开始标记且未完成 → 阻塞 - result["recent_compact"] = True - # 如果已完成 → recent_compact 保持 False(放行) - # 没有开始标记 → threshold/manual 静默触发,不阻塞 - except Exception: - pass - return result - - @staticmethod - def _check_compact_in_progress_trajectory( - session_file: str, timeout_minutes: int = 30) -> bool: - """§24 v4: 检查 trajectory jsonl 尾部,判断 session 是否处于非正常状态。 - - 检测逻辑:最后一个完整 turn 没有 prompt.submitted/skipped → 非正常 → skip。 - 覆盖:compact、timeout、hook block、session 结束等所有非正常状态。 - - Returns: - True = 非正常状态(skip ticker) - False = 正常(不 skip)或超时兜底放行 - """ - if not session_file: - return False - traj_path = f"{session_file}.trajectory.jsonl" - if not os.path.exists(traj_path): - return False - - try: - from datetime import datetime as _dt, timezone as _tz - - # 读尾部 500KB - with open(traj_path, "rb") as f: - f.seek(0, 2) - size = f.tell() - f.seek(max(0, size - 500 * 1024)) - tail = f.read().decode("utf-8", errors="replace") - - if not tail.strip(): - return False - - # 解析所有有效行 - events = [] - for line in tail.splitlines(): - line = line.strip() - if not line: - continue - try: - obj = json.loads(line) - events.append(obj) - except (json.JSONDecodeError, ValueError): - continue - - if not events: - return False - - # 按 session.started 分组找 turn - # 每个 turn 以 session.started 开始 - turns = [] - current_turn = [] - for evt in events: - if evt.get("type") == "session.started": - if current_turn: - turns.append(current_turn) - current_turn = [evt] - else: - current_turn.append(evt) - if current_turn: - turns.append(current_turn) - - if not turns: - return False - - # 检查最后一个完整 turn(包含 session.started) - last_turn = turns[-1] - turn_types = {evt.get("type") for evt in last_turn} - - # 有 prompt.submitted 或 prompt.skipped → 正常 turn - if "prompt.submitted" in turn_types or "prompt.skipped" in turn_types: - return False - - # 非正常状态 → 检查超时兜底 - # 找最后一个有 ts 的事件 - last_ts = None - for evt in reversed(events): - ts_str = evt.get("ts") - if ts_str: - try: - last_ts = _dt.fromisoformat( - ts_str.replace("Z", "+00:00")) - if last_ts.tzinfo is None: - last_ts = last_ts.replace(tzinfo=_tz.utc) - except (ValueError, TypeError): - continue - break - - if last_ts is None: - # 没有 ts 信息,无法判断超时 → 非正常 → skip - return True - - now = _dt.now(_tz.utc) - elapsed = (now - last_ts).total_seconds() - if elapsed > timeout_minutes * 60: - logger.debug("Trajectory last event %.0fs ago > %dm, fallback pass", - elapsed, timeout_minutes) - return False # 兜底放行 - - return True # 非正常状态且未超时 - - except Exception as e: - logger.debug("_check_compact_in_progress_trajectory error: %s", e) - return False - - @staticmethod - def _classify_outcome(exit_code: int, json_result: dict, stderr_text: str, - task_status: Optional[str], stdout_text: str = "") -> dict: - """分类退出原因,返回处理策略 - - v3.1: A0 拆分为 A14-A17(信号中断/stderr 智能分类)。 - A8/A10 改为可恢复 retry。cooldown 统一 60s。 - """ - status = json_result.get("status") - summary = json_result.get("summary", "") - fallback_used = json_result.get("fallback_used", False) - - # A4: 任务 DB status=failed(Agent 自己标的) - if task_status == "failed": - return {"outcome": "agent_failed", "should_retry": False} - - # A1: status=ok + completed + 非 fallback - if status == "ok" and summary == "completed" and not fallback_used: - return {"outcome": "completed", "should_retry": False} - - # A5/A6: status=ok + fallback - if status == "ok" and fallback_used: - return {"outcome": "fallback_timeout", "should_retry": False} - - # A2/A3: status=timeout → 唯一续杯场景 - # 注意: PM2 restart 时 daemon 自身也收到 SIGTERM,此时 retry spawn 的新进程 - # 会随 daemon 一起被杀。A14 retry 假设 daemon 存活,PM2 级重启不在此场景内。 - if status == "timeout": - return {"outcome": "gateway_timeout", "should_retry": True, - "retry_field": "retry_count"} - - # A0 拆分: 无 JSON 输出 + exit≠0 - if status is None and not stdout_text.strip() and exit_code != 0: - # A14: SIGINT(130) / SIGTERM(143) → 外部中断,可恢复 - if exit_code in (130, 143): - return {"outcome": "interrupted", "should_retry": True, - "retry_field": "retry_count", "cooldown_seconds": 60} - # A15/A16: stderr 含 network/compact 关键字 → 可恢复 - if stderr_text: - stderr_lower = stderr_text.lower() - if any(kw in stderr_lower for kw in [ - "econnrefused", "etimedout", "gateway closed", "econnreset"]): - return {"outcome": "gateway_unreachable", "should_retry": True, - "retry_field": "retry_count", "cooldown_seconds": 60} - if any(kw in stderr_lower for kw in [ - "compaction-diag", "context-overflow"]): - return {"outcome": "compact_interrupted", "should_retry": True, - "retry_field": "retry_count", "cooldown_seconds": 60} - # A17: 真正的 crash → 保持 working,ticker 兜底 - return {"outcome": "crashed", "should_retry": False, - "original": "process_crash"} - - # A13 revised: stdout 为空但 exit=0 → 信任进程退出码,视为正常完成 - # 实测发现 openclaw session=None + exit=0 是正常场景(inform 通知等) - # 旧逻辑按 task_status 区分,非终态判 agent_error → 导致 inform 邮件永不标 done - if status is None and not stdout_text.strip() and exit_code == 0: - return {"outcome": "completed", "should_retry": False} - - # A7-A12: status=error → 不续杯,stderr 辅助分类 - if status == "error": - stderr_lower = stderr_text.lower() - if any(kw in stderr_lower for kw in [ - "401", "403", "unauthorized", "auth"]): - return {"outcome": "auth_failed", "should_retry": False} - if any(kw in stderr_lower for kw in [ - "econnrefused", "etimedout", "gateway closed", "econnreset"]): - return {"outcome": "gateway_unreachable", "should_retry": True, - "retry_field": "retry_count", "cooldown_seconds": 60} - if any(kw in stderr_lower for kw in [ - "rate_limit", "500", "503", "api error"]): - return {"outcome": "api_error", "should_retry": True, - "retry_field": "retry_count", "cooldown_seconds": 60} - if any(kw in stderr_lower for kw in [ - "compaction-diag", "context-overflow"]): - return {"outcome": "compact_failed", "should_retry": False} - if any(kw in stderr_lower for kw in [ - "lock", "busy", "concurrent", "lane task error"]): - return {"outcome": "lock_conflict", "should_retry": True, - "retry_field": "retry_count", "cooldown_seconds": 60} - return {"outcome": "agent_error", "should_retry": False} - - # 兜底:status 未知值 - return {"outcome": "agent_error", - "should_retry": False, "original": "unknown_status"} - - @staticmethod - def _get_retry_counts( - db_path: Optional[Path], task_id: Optional[str]) -> dict: - """从最新 task_attempt 的 metadata 读计数器""" - defaults = {"retry_count": 0, "connect_retry_count": 0, - "api_retry_count": 0, "lock_retry_count": 0, - "monitor_timeout_count": 0} - if not db_path or not task_id: - return defaults - try: - conn = get_connection(db_path) - try: - row = conn.execute( - "SELECT metadata FROM task_attempts WHERE task_id=? ORDER BY attempt_number DESC LIMIT 1", - (task_id,) - ).fetchone() - if row and row["metadata"]: - stored = json.loads(row["metadata"]) - for k in defaults: - if k in stored: - defaults[k] = stored[k] - finally: - conn.close() - except Exception: - pass - return defaults - - def _update_retry_counts(self, db_path: Optional[Path], - task_id: Optional[str], counts: dict): - """将 retry counts 写回最新 task_attempt 的 metadata""" - if not db_path or not task_id: - return - try: - conn = get_connection(db_path) - try: - conn.execute("BEGIN IMMEDIATE") - row = conn.execute( - "SELECT rowid, metadata FROM task_attempts " - "WHERE task_id=? ORDER BY attempt_number DESC LIMIT 1", - (task_id,) - ).fetchone() - if row: - meta = json.loads( - row["metadata"]) if row["metadata"] else {} - meta.update(counts) - conn.execute( - "UPDATE task_attempts SET metadata=? WHERE rowid=?", - (json.dumps(meta), row["rowid"]) - ) - conn.commit() - finally: - conn.close() - except Exception: - logger.exception( - "Failed to update retry counts for task %s", task_id) - - def _mark_task(self, db_path: Optional[Path], task_id: Optional[str], - status: str, detail: Optional[dict] = None): - """标记任务状态(用于 failed/escalate)""" - if not db_path or not task_id: - return - try: - conn = get_connection(db_path) - try: - conn.execute("BEGIN IMMEDIATE") - conn.execute( - "UPDATE tasks SET status=?, completed_at=datetime('now') WHERE id=?", - (status, task_id) - ) - if detail: - conn.execute( - "INSERT INTO events (task_id, agent, event_type, detail) VALUES (?,?,?,?)", - (task_id, "daemon", status, json.dumps( - detail, ensure_ascii=False)) - ) - conn.commit() - finally: - conn.close() - # F2: conn 已关闭,Blackboard 内部自己 get_connection - if status == "failed": - reason = (detail or {}).get("reason", "unknown") - try: - from src.daemon.mail_notify import _is_mail_project, notify_mail_failed - if _is_mail_project(db_path): - # Mail 失败:通知发件人,不 @pangtong - notify_mail_failed(db_path, task_id, reason, detail) - else: - # Task 失败:@pangtong(F2 原逻辑) - from src.blackboard.operations import Blackboard - bb = Blackboard(db_path) - cid = bb.add_comment(task_id, "daemon", - f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入", - comment_type="system") - bb.record_mentions(cid, task_id, ["pangtong-fujunshi"]) - logger.info( - "Task %s: failure notified pangtong via comment+mention (reason=%s)", - task_id, - reason) - except Exception as e: - logger.warning("Task %s: failed to notify: %s", task_id, e) - except Exception: - logger.exception("Failed to mark task %s as %s", task_id, status) - - @staticmethod - def _do_on_complete(on_complete, agent_id, outcome): - """执行 on_complete 回调(同步+异步兼容)""" - if not on_complete: - return - try: - result = on_complete(agent_id, outcome) - if asyncio.iscoroutine(result): - # 注意:这里是同步调用的,不能 await - # 在 _monitor_process 的 async 上下文中应该用 await - pass - except Exception: - pass - - async def _do_on_complete_async(self, on_complete, agent_id, outcome): - """异步执行 on_complete 回调""" - if not on_complete: - return - try: - result = on_complete(agent_id, outcome) - if asyncio.iscoroutine(result): - await result - except Exception: - logger.warning( - "on_complete callback failed for %s", - agent_id, - exc_info=True) - - def _register_session( - self, - session_id: str, - agent_id: str, - task_id: Optional[str], - pid: Optional[int], - broadcast_task_ids: Optional[List[str]] = None, - ) -> None: - """注册 spawn session""" - self._sessions[session_id] = { - "agent_id": agent_id, - "task_id": task_id, - "pid": pid, - "status": "running", - "started_at": datetime.utcnow().isoformat(), - "completed_at": None, - "broadcast_task_ids": broadcast_task_ids, - } - - def _record_attempt( - self, - task_id: Optional[str], - agent_id: str, - outcome: str, - exit_code: Optional[int] = None, - error: Optional[str] = None, - metadata: Optional[dict] = None, - db_path: Optional[Path] = None, - ) -> None: - """记录 task_attempt""" - # 广播 spawn 产生的 "broadcast" task_id 不记录 attempts,避免脏数据 - if task_id == "broadcast": - return - effective_db = db_path or self.db_path - if not task_id or not effective_db: - return - - try: - conn = get_connection(effective_db) - try: - conn.execute("BEGIN IMMEDIATE") - row = conn.execute( - "SELECT MAX(attempt_number) as max_a FROM task_attempts WHERE task_id=?", - (task_id,), - ).fetchone() - attempt_number = (row["max_a"] or 0) + 1 - - meta = metadata or {} - if error: - meta["error"] = error - conn.execute( - "INSERT INTO task_attempts " - "(task_id, attempt_number, agent, outcome, exit_code, metadata, completed_at) " - "VALUES (?,?,?,?,?,?,datetime('now'))", - (task_id, attempt_number, agent_id, outcome, - exit_code, json.dumps(meta)), - ) - conn.execute( - "INSERT INTO events (task_id, agent, event_type, detail) VALUES (?,?,?,?)", - (task_id, agent_id, - "agent_completed" if outcome == "completed" else "daemon_tick", - json.dumps({"outcome": outcome, "attempt": attempt_number})), - ) - conn.commit() - finally: - conn.close() - except Exception: - logger.exception("Failed to record attempt for task %s", task_id) - - def get_session(self, session_id: str) -> Optional[Dict[str, Any]]: - """获取 session 信息""" - return self._sessions.get(session_id) - - def get_session_by_agent(self, agent_id: str) -> Optional[Dict[str, Any]]: - """v2.7.2: 根据 agent_id 获取活跃 session 信息(用于进程存活性检查)""" - for sid, info in self._sessions.items(): - if info.get("agent_id") == agent_id and info.get( - "status") == "running": - return info - return None - - def cleanup_session(self, session_id: str) -> None: - """清理 session""" - if session_id in self._sessions: - session = self._sessions[session_id] - task_id = session.get("task_id") - del self._sessions[session_id] - # 清理 B2 compact 等待计数器 - if task_id and task_id in self._compact_waits: - del self._compact_waits[task_id] diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py deleted file mode 100644 index 4ecf503..0000000 --- a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py +++ /dev/null @@ -1,512 +0,0 @@ -"""toolchain_handler.py - 工具链事件 handler。 - -处理 Gitea Webhook 事件(CI 失败、Review 请求、Issue 指派等)。 -L2 引擎层强约束:输入(结构化步骤)+ 执行(Red Flags)+ 输出(action_report 验证)。 -""" -from __future__ import annotations - -import json -import logging -import urllib.request -from pathlib import Path -from typing import Dict, List - -from src.daemon.base_task_handler import BaseTaskHandler, VerifyResult -from src.daemon.prompt_composer import PromptComposer, PromptContext -from src.daemon.toolchain_templates import render_template, _TEMPLATE_MAP -from src.blackboard.db import get_connection - -logger = logging.getLogger("moziplus-v2.handler.toolchain") - -# --------------------------------------------------------------------------- -# Gitea API 配置 -# --------------------------------------------------------------------------- - -_GITEA_BASE = "http://192.168.2.154:3000/api/v1" -_GITEA_TOKEN = "a6d596b826f4bfeaf983ef4d25ac25dab95bbc4e" - -# 业务失败连续次数阈值,超过则升级为系统失败 -_BUSINESS_FAIL_THRESHOLD = 3 - -# action_type → action_hint 映射 -_ACTION_HINTS: Dict[str, str] = { - "review_result": "你收到一个 Review 结果通知,这是一个需要你执行动作的事件(不是纯通知)。", - "review_request": "你收到一个 Review 请求,这是一个需要你审查并提交 Review 的事件。", - "review_updated": "你收到一个 PR 更新通知,这是一个需要你重新审查修改部分的事件。", - "review_comment": "你收到一个 Review 评论,这是一个需要你查看并响应的事件。", - "ci_failure": "你收到一个 CI 失败通知,这是一个需要你修复失败测试的事件。", - "issue_assigned": "你收到一个 Issue 指派,这是一个需要你编码实现的事件。", - "deploy_failure": "你收到一个部署失败通知,这是一个需要你排查并修复的事件。", - "mention": "你收到一个 @mention 通知,这是一个需要你按指引响应的事件。", - "review_merged": "你收到一个 PR 合并通知。这是一条纯通知,阅读即可。", - "infrastructure_failure": "你收到一个基础设施问题报告,请排查并修复。", -} - - -# --------------------------------------------------------------------------- -# Toolchain PromptSections -# --------------------------------------------------------------------------- - -class ToolchainContextSection: - """事件类型 + 事件详情 + 结构化步骤 + action_hint(priority=10)""" - - name: str = "toolchain_context" - priority: int = 10 - - def render(self, context: PromptContext) -> str: - event_type = context.event_type - event_data: Dict = context.event_data or {} - - # Part 1: 事件信息(现有模板引擎) - if event_type in _TEMPLATE_MAP: - variables = {k: str(v) for k, v in event_data.items()} - event_text = render_template(event_type, variables) - else: - lines = ["## 工具链事件", ""] - lines.append(f"- **事件类型**: {event_type or '未知'}") - if event_data: - lines.append("- **事件详情**:") - for key, value in event_data.items(): - lines.append(f" - {key}: {value}") - lines.append("") - event_text = "\n".join(lines) - - # Part 2: 结构化编号步骤(新增,从 action_steps 渲染) - steps: List[str] = context.action_steps or [] - if steps: - step_lines = ["", "### 必须执行的步骤", ""] - for i, step in enumerate(steps, 1): - step_lines.append(f"{i}. {step}") - steps_text = "\n".join(step_lines) - else: - steps_text = "" - - # Part 3: action 指引(新增,按 action_type 选择) - action_hint = _ACTION_HINTS.get( - context.action_type, - "你收到一个工具链事件,这是一个需要你执行动作的事件。", - ) - - return f"{action_hint}\n\n{event_text}{steps_text}" - - def should_include(self, context: PromptContext) -> bool: - return True - - -class ToolchainApiSection: - """API 操作指令(priority=40)-- action_report 提交指引""" - - name: str = "toolchain_api" - priority: int = 40 - - API_HOST = "localhost:8083" - - def render(self, context: PromptContext) -> str: - task_id = context.task_id - project_id = context.project_id - agent_id = context.agent_id - - lines = [ - "## API 操作指令", - "", - f"项目 ID: `{project_id}`", - f"任务 ID: `{task_id}`", - "", - "### 完成后必须提交 action report", - "", - "执行完所有步骤后,必须提交 action report:", - "```bash", - f'curl -s -X POST "http://{self.API_HOST}/api/projects/{project_id}/tasks/{task_id}/comments" \\', - ' -H "Content-Type: application/json" \\', - f' -d \'{{"author": "{agent_id}", "comment_type": "action_report", "body": "简要描述你执行了什么操作及结果"}}\'', - "```", - "", - "⚠️ 不提交 action report 的任务会被标记为 failed。", - "", - "### 提交产出", - "", - "如有产出(如 review 结果、修复方案),提交到任务 outputs:", - "```bash", - f'curl -s -X POST "http://{self.API_HOST}/api/projects/{project_id}/tasks/{task_id}/outputs" \\', - ' -H "Content-Type: application/json" \\', - ' -d \'{"content": "<你的产出内容>", "type": "text"}\'', - "```", - "", - "### 需要其他角色支持时", - "", - "如果在执行过程中需要其他角色协助(如缺数据、需要审批等),在关联的 PR/Issue 上创建 comment @对方:", - "```bash", - f'curl -s -X POST "{_GITEA_BASE}/repos/{{repo}}/issues/{{pr_number}}/comments" \\', - f' -H "Authorization: token " \\', - ' -H "Content-Type: application/json" \\', - ' -d \'{"body": "@{agent-id} 需要你的支持:{描述问题}"}\'', - "```", - "", - "⚠️ 不要使用 Mail API(飞鸽传书)。所有协作通过 Gitea 留痕。", - "", - ] - return "\n".join(lines) - - def should_include(self, context: PromptContext) -> bool: - return True - - -class ToolchainConstraintsSection: - """硬约束 + Red Flags(priority=50)""" - - name: str = "toolchain_constraints" - priority: int = 50 - - def render(self, context: PromptContext) -> str: - lines = [ - "## 硬约束(必须遵守)", - "", - "⚠️ 以下是强制要求,不是建议或参考。违反任何一条都会导致任务失败。", - "", - "### 1. 必须按步骤执行", - '- 检查上方“必须执行的步骤”列表', - '- 逐条执行每个步骤,不可跳过', - '- 不要只读不做——这不是纯通知', - "", - "### 2. 必须提交 action report", - '- 执行完所有步骤后,必须提交 action report', - "- 提交方式:POST comment(comment_type='action_report')", - '- 报告内容:简要描述你执行了什么操作、结果如何', - '- ⚠️ 不提交 action report 的任务会被标记为 failed', - "", - "### 3. 不要执行任何状态转换命令", - '- 不要手动标 working/done/review/failed,系统会自动处理', - "", - "### 4. 不需要回复", - '- action report 就是你的完成凭证', - '- 不要发送 Mail(飞鸽传书),你的所有操作在 toolchain 流程内完成', - "", - "### 5. 所有协作通过 Gitea 完成", - '- 如果遇到问题需要其他角色支持,在关联的 PR/Issue 上创建 comment @对方', - '- 不要使用 Mail API(飞鸽传书)发送消息', - '- 你的所有操作都在 toolchain 流程内,通过 Gitea 留痕', - "", - "### Red Flags(如果脑海中出现以下想法,说明你错了)", - "", - '| Agent 想法 | Red Flag 驳回 |', - '|------------|--------------|', - '| “这个通知看看就行了” | ❌ 错!这是 action 指令,必须执行步骤列表中的每一项 |', - '| “我不需要做任何事” | ❌ 错!检查“必须执行的步骤”列表,每一步都要执行 |', - '| “先放着等会处理” | ❌ 错!立即执行,不要推迟 |', - '| “我已经知道了” | ❌ 知道不等于执行。执行步骤 + 提交 action report 才算完成 |', - '| “步骤太多了,选几个做就行” | ❌ 错!必须逐条执行,不可跳过 |', - '| “这个步骤不适用于当前情况” | ❌ 如果确实不适用,在 action report 中说明原因,但其他步骤必须执行 |', - "", - ] - return "\n".join(lines) - - def should_include(self, context: PromptContext) -> bool: - return True - - -# --------------------------------------------------------------------------- -# ToolchainHandler -# --------------------------------------------------------------------------- - -class ToolchainHandler(BaseTaskHandler): - """工具链事件 handler。""" - - task_type = "toolchain" - virtual_project = "_toolchain" - display_name = "工具链事件" - - def target_success_status(self) -> str: - return "done" - - def pre_spawn(self, task_id: str, db_path: Path) -> bool: - """auto_working:pending → working""" - return self._auto_mark_working(task_id, db_path) - - def get_sections(self) -> list: - """返回 3 个 Toolchain PromptSection 实例""" - return [ - ToolchainContextSection(), - ToolchainApiSection(), - ToolchainConstraintsSection(), - ] - - def build_prompt(self, context: PromptContext) -> str: - """通过 PromptComposer 拼装 sections 为最终 prompt""" - composer = PromptComposer() - composer.add_many(self.get_sections()) - return composer.compose(context) - - def verify_completion(self, task_id: str, db_path: Path) -> VerifyResult: - """检查 action report(精确验证)+ 三层 fallback""" - try: - conn = get_connection(db_path) - try: - # 特殊处理:infrastructure_failure 始终通过(防递归) - row = conn.execute( - "SELECT must_haves FROM tasks WHERE id=?", (task_id,) - ).fetchone() - if row and row["must_haves"]: - try: - meta = json.loads(row["must_haves"]) - except Exception: - meta = {} - if meta.get("action_type") == "infrastructure_failure": - return VerifyResult(True, "infrastructure_passthrough", - "infrastructure_failure auto-pass") - - # 特殊处理:review_merged 始终通过(纯通知) - if meta.get("action_type") == "review_merged": - return VerifyResult(True, "merged_passthrough", - "review_merged auto-pass") - - # 1. 优先检查 action_report comment - report_row = conn.execute( - "SELECT id FROM comments WHERE task_id=? " - "AND comment_type='action_report' LIMIT 1", - (task_id,) - ).fetchone() - if report_row: - return VerifyResult(True, "has_action_report", "action_report found") - - # 2. fallback:检查 output(向后兼容) - output_count = conn.execute( - "SELECT COUNT(*) FROM outputs WHERE task_id=?", (task_id,) - ).fetchone()[0] - if output_count > 0: - return VerifyResult(True, "has_output", f"output_count={output_count}") - - # 3. fallback:检查有实质内容的 comment(向后兼容) - comment_count = conn.execute( - "SELECT COUNT(*) FROM comments WHERE task_id=? " - "AND author != 'system' AND LENGTH(body) >= 20", - (task_id,) - ).fetchone()[0] - if comment_count > 0: - return VerifyResult(True, "has_comment", f"comment_count={comment_count}") - - return VerifyResult(False, "no_action", - "no action_report, no output, no valid comment") - finally: - conn.close() - except Exception as e: - logger.error("Toolchain %s: verify error: %s", task_id, e) - return VerifyResult(False, "verify_error", str(e)) - - def on_failure(self, task_id: str, agent_id: str, - db_path: Path, verify: VerifyResult) -> None: - """验证失败 → 三分路处理(业务/系统/基础设施)""" - self._mark_task_status(db_path, task_id, "failed") - logger.info("Toolchain %s: verify failed (%s), marked failed", - task_id, verify.reason) - - # 读取 must_haves 获取事件上下文 - meta = {} - try: - conn = get_connection(db_path) - row = conn.execute( - "SELECT must_haves FROM tasks WHERE id=?", (task_id,) - ).fetchone() - if row and row["must_haves"]: - meta = json.loads(row["must_haves"]) - # 统计该 task 的业务失败次数 - fail_count = conn.execute( - "SELECT COUNT(*) FROM events WHERE task_id=? " - "AND event_type='status_change' AND payload LIKE '%failed%'", - (task_id,) - ).fetchone()[0] - conn.close() - except Exception: - fail_count = 0 - - action_type = meta.get("action_type", "") - context_data = meta.get("context", {}) - assignee = meta.get("assignee", "") or meta.get("from", "") - - # 三分路决策 - route = self._classify_failure(verify, fail_count) - - if route == "business": - self._handle_business_failure( - task_id, agent_id, verify, action_type, context_data, assignee, db_path) - elif route == "system": - self._handle_system_failure( - task_id, agent_id, verify, action_type, context_data, db_path) - else: # infrastructure - self._handle_infrastructure_failure( - task_id, agent_id, verify, db_path) - - def _classify_failure(self, verify: VerifyResult, fail_count: int) -> str: - """分类失败类型:business / system / infrastructure""" - # verify_error 或 DB 不可用 → 基础设施失败 - if verify.reason == "verify_error": - return "infrastructure" - # 连续业务失败超过阈值 → 升级为系统失败 - if fail_count >= _BUSINESS_FAIL_THRESHOLD: - return "system" - # 默认:业务失败 - return "business" - - def _handle_business_failure( - self, task_id: str, agent_id: str, verify: VerifyResult, - action_type: str, context_data: dict, assignee: str, - db_path: Path, - ) -> None: - """业务失败 → 在关联 PR/Issue 上创建 comment @原始 assignee""" - repo = context_data.get("repo", "") - pr_number = context_data.get("pr_number") or context_data.get("issue_number", "") - - if repo and pr_number: - comment_body = ( - f"@{assignee or agent_id} 工具链任务执行失败\n\n" - f"任务 ID: {task_id}\n" - f"失败原因: {verify.reason}\n" - f"证据: {verify.evidence}\n\n" - f"请检查黑板任务并处理。" - ) - success = self._create_gitea_comment(repo, pr_number, comment_body) - if success: - logger.info("Toolchain %s: business failure → Gitea comment on %s#%s", - task_id, repo, pr_number) - return - # Gitea API failed → escalate to system failure - logger.warning( - "Toolchain %s: Gitea comment failed, escalating to system failure", - task_id) - self._handle_system_failure( - task_id, agent_id, verify, action_type, context_data, db_path) - else: - # 没有 PR/Issue 关联 → fallback 到系统失败 - logger.warning( - "Toolchain %s: no PR/Issue context for business failure, " - "escalating to system failure", task_id) - self._handle_system_failure( - task_id, agent_id, verify, action_type, context_data, db_path) - - def _handle_system_failure( - self, task_id: str, agent_id: str, verify: VerifyResult, - action_type: str, context_data: dict, db_path: Path, - ) -> None: - """系统失败 → 创建 Gitea Issue @pangtong-fujunshi""" - repo = context_data.get("repo", "sanguo/sanguo_moziplus_v2") - title = f"[toolchain-handler] 工具链事件处理失败: {task_id}" - body = ( - f"任务 {task_id} 验证失败\n\n" - f"事件类型: {action_type or '未知'}\n" - f"失败原因: {verify.reason}\n" - f"证据: {verify.evidence}\n\n" - f"@pangtong-fujunshi 请检查黑板任务并手动处理。" - ) - - # 尝试在 Gitea 创建 Issue - created = self._create_gitea_issue(repo, title, body, ["pangtong-fujunshi"]) - if created: - logger.info("Toolchain %s: system failure → Gitea Issue created on %s", - task_id, repo) - else: - # Gitea API 不可用 → 基础设施失败 - logger.error( - "Toolchain %s: Gitea API unavailable, escalating to infrastructure failure", - task_id) - self._handle_infrastructure_failure( - task_id, agent_id, verify, db_path) - - def _handle_infrastructure_failure( - self, task_id: str, agent_id: str, - verify: VerifyResult, db_path: Path, - ) -> None: - """基础设施失败 → _send_toolchain_task @jiangwei-infra(防递归)""" - # 直接在 _toolchain DB 创建 task(不走 Gitea webhook) - try: - from src.api.toolchain_routes import _send_toolchain_task - _send_toolchain_task( - to_agent="jiangwei-infra", - title=f"[基础设施] Gitea API 不可用 - {task_id}", - description=( - f"Gitea API 不可用,原任务 {task_id} 无法通过正常路径处理。\n" - f"请检查 Gitea 服务状态和网络连通性。" - ), - event_type="infrastructure_failure", - action_type="infrastructure_failure", - steps=[ - "检查 Gitea 服务状态(http://192.168.2.154:3000)", - "检查网络连通性", - "恢复后提交 action report", - ], - context_data={"original_task_id": task_id, "verify_reason": verify.reason}, - source="toolchain_handler", - ) - logger.info("Toolchain %s: infrastructure failure → task created for jiangwei-infra", - task_id) - except Exception as e: - logger.error( - "Toolchain %s: failed to create infrastructure_failure task: %s", - task_id, e) - - # ----------------------------------------------------------------------- - # Gitea API 辅助 - # ----------------------------------------------------------------------- - - def _create_gitea_comment( - self, repo: str, pr_number: int, body: str, - ) -> bool: - """在 PR/Issue 上创建 comment。返回是否成功。""" - payload = json.dumps({"body": body}, ensure_ascii=False).encode("utf-8") - try: - req = urllib.request.Request( - f"{_GITEA_BASE}/repos/{repo}/issues/{pr_number}/comments", - data=payload, - headers={ - "Authorization": f"token {_GITEA_TOKEN}", - "Content-Type": "application/json", - }, - ) - urllib.request.urlopen(req, timeout=5) - return True - except Exception as e: - logger.warning("Gitea comment failed on %s#%s: %s", repo, pr_number, e) - return False - - def _create_gitea_issue( - self, repo: str, title: str, body: str, - assignees: list = None, - ) -> bool: - """创建 Gitea Issue。返回是否成功。""" - data = {"title": title, "body": body} - if assignees: - data["assignees"] = assignees - payload = json.dumps(data, ensure_ascii=False).encode("utf-8") - try: - req = urllib.request.Request( - f"{_GITEA_BASE}/repos/{repo}/issues", - data=payload, - headers={ - "Authorization": f"token {_GITEA_TOKEN}", - "Content-Type": "application/json", - }, - ) - urllib.request.urlopen(req, timeout=5) - return True - except Exception as e: - logger.warning("Gitea create issue failed on %s: %s", repo, e) - return False - - # ----------------------------------------------------------------------- - # 兼容:保留旧方法签名(但不再被 on_failure 调用) - # ----------------------------------------------------------------------- - - def _build_gitea_links(self, event_type: str, event_data: dict) -> str: - """根据事件类型构建 Gitea 链接。""" - links = [] - repo = event_data.get("repo", "") - base_url = "http://192.168.2.154:3000" - - if "pr_number" in event_data: - links.append(f"PR: {base_url}/{repo}/pulls/{event_data['pr_number']}") - if "issue_number" in event_data: - links.append(f"Issue: {base_url}/{repo}/issues/{event_data['issue_number']}") - if "commit" in event_data: - links.append(f"Commit: {base_url}/{repo}/commit/{event_data['commit']}") - if "branch" in event_data and "commit" not in event_data: - links.append(f"分支: {event_data['branch']}") - - return "\n".join(links) if links else "(无法提取链接,请检查黑板任务详情)" diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py deleted file mode 100644 index 44ab599..0000000 --- a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py +++ /dev/null @@ -1,89 +0,0 @@ -"""工具链事件模板引擎(Toolchain Event Hub) - -加载 templates/toolchain/ 下的 Markdown 模板,提供 {variable} 占位符渲染。 -""" - -from __future__ import annotations - -import logging -from collections import defaultdict -from pathlib import Path -from typing import Dict - -logger = logging.getLogger(__name__) - -TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "toolchain" - -# 模板文件名映射 -_TEMPLATE_MAP: Dict[str, str] = { - "review_request": "review_request.md", - "review_result": "review_result.md", - "issue_assigned": "issue_assigned.md", - "ci_failure": "ci_failure.md", - "deploy_failure": "deploy_failure.md", - "review_updated": "review_updated.md", - "review_comment": "review_comment.md", - "review_merged": "review_merged.md", - "mention": "mention.md", -} - -# 模板缓存 -_template_cache: Dict[str, str] = {} - - -def _load_template(name: str) -> str: - """加载并缓存模板文件内容。 - - Args: - name: 模板名称(不含 .md 后缀) - - Returns: - 模板文本内容 - - Raises: - FileNotFoundError: 模板文件不存在 - """ - if name in _template_cache: - return _template_cache[name] - - filename = _TEMPLATE_MAP.get(name) - if not filename: - raise ValueError(f"Unknown template: {name}") - - path = TEMPLATES_DIR / filename - if not path.exists(): - raise FileNotFoundError(f"Template not found: {path}") - - content = path.read_text(encoding="utf-8") - _template_cache[name] = content - logger.debug("Loaded template: %s (%d bytes)", name, len(content)) - return content - - -def _escape_braces(value: str) -> str: - """转义花括号防止 format_map 报错""" - return str(value).replace("{", "{{").replace("}", "}}") - - -def render_template(name: str, variables: Dict[str, str]) -> str: - """渲染模板,将 {variable} 占位符替换为实际值。 - - 使用 defaultdict(str) 确保未提供的变量替换为空字符串而非报错。 - - Args: - name: 模板名称 - variables: 变量字典 - - Returns: - 渲染后的文本 - """ - template_text = _load_template(name) - # 先对所有变量值转义花括号,防止 format_map 报错 - escaped_vars = {k: _escape_braces(v) for k, v in variables.items()} - safe_vars: Dict[str, str] = defaultdict(str, escaped_vars) - return template_text.format_map(safe_vars) - - -def clear_cache() -> None: - """清空模板缓存(用于测试或热更新)""" - _template_cache.clear() diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md b/~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md deleted file mode 100644 index 22d2895..0000000 --- a/~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md +++ /dev/null @@ -1,16 +0,0 @@ -{mention_type}通知 - -来源: {source_type} {source_url} -评论者: {commenter} -意图: {intent_hint} -内容: -{content_snippet} - -📋 获取完整上下文: -1. 查看{source_type}详情: GET {gitea_api}/repos/{repo}/{source_detail_api_path} -2. 查看评论列表: GET {gitea_api}/repos/{repo}/{source_comments_api_path} - -📌 响应指引: -{response_guidance} - -完成后按指引操作。 diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py b/~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py deleted file mode 100644 index 0a32cc7..0000000 --- a/~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py +++ /dev/null @@ -1,129 +0,0 @@ -"""mention_utils 单元测试 — §25.7 覆盖。""" - -import pytest - -from src.api.mention_utils import ( - extract_mentions, - should_suppress_mention, - infer_intent, -) - - -# --------------------------------------------------------------------------- -# extract_mentions -# --------------------------------------------------------------------------- - -class TestExtractMentions: - """测试 @mention 提取逻辑。""" - - def test_exact_match(self): - """@zhangfei-dev 精确匹配。""" - assert extract_mentions("@zhangfei-dev 请看一下", "someone") == ["zhangfei-dev"] - - def test_chinese_alias(self): - """@张飞 中文别名匹配。""" - assert extract_mentions("@张飞 帮忙看看", "someone") == ["zhangfei-dev"] - - def test_english_short_name(self): - """@zhangfei 英文短名匹配。""" - assert extract_mentions("@zhangfei 快来", "someone") == ["zhangfei-dev"] - - def test_prefix_unique(self): - """@zhangf 前缀唯一匹配。""" - assert extract_mentions("@zhangf 来一下", "someone") == ["zhangfei-dev"] - - def test_prefix_ambiguous_no_match(self): - """@z 前缀模糊,多个候选,不匹配。""" - assert extract_mentions("@z 看看", "someone") == [] - - def test_dedup_same_person(self): - """@张飞 @zhangfei-dev 同时出现去重。""" - result = extract_mentions("@张飞 @zhangfei-dev 来一下", "someone") - assert result == ["zhangfei-dev"] - - def test_exclude_self(self): - """@zhangfei-dev 排除自己(sender=zhangfei-dev)。""" - assert extract_mentions("@zhangfei-dev 自己说", "zhangfei-dev") == [] - - def test_unknown_person(self): - """@unknown 不匹配任何 Agent。""" - assert extract_mentions("@unknown 你好", "someone") == [] - - def test_multiple_mentions(self): - """多个 @mention 返回多个 Agent。""" - result = set(extract_mentions("@张飞 @关羽 来讨论", "someone")) - assert result == {"zhangfei-dev", "guanyu-dev"} - - def test_mention_with_hyphen_in_middle(self): - """@mention 后面紧跟标点也能识别。""" - result = extract_mentions("@赵云,请看下", "someone") - assert result == ["zhaoyun-data"] - - -# --------------------------------------------------------------------------- -# should_suppress_mention -# --------------------------------------------------------------------------- - -class TestShouldSuppressMention: - """测试 @mention 通知抑制逻辑。""" - - def test_suppress_when_in_list(self): - """被提及者在自动通知列表中 → 抑制。""" - assert should_suppress_mention("zhangfei-dev", ["zhangfei-dev", "guanyu-dev"]) is True - - def test_not_suppress_when_not_in_list(self): - """被提及者不在自动通知列表中 → 不抑制。""" - assert should_suppress_mention("zhangfei-dev", ["guanyu-dev"]) is False - - def test_suppress_empty_list(self): - """自动通知列表为空 → 不抑制。""" - assert should_suppress_mention("zhangfei-dev", []) is False - - -# --------------------------------------------------------------------------- -# infer_intent -# --------------------------------------------------------------------------- - -class TestInferIntent: - """测试意图推断逻辑。 - - 优先级:assign → collaborate → help → notify(默认) - """ - - def test_help_question_mark(self): - """疑问句 → help。""" - assert infer_intent("@赵云 数据格式是什么?") == "help" - - def test_notify_plain_mention(self): - """纯通知(无关键词) → notify。""" - assert infer_intent("@关羽 这个 PR 涉及风控变更") == "notify" - - def test_collaborate_please_help(self): - """'请帮忙' → collaborate(NOT help!)。""" - assert infer_intent("@庞统 请帮忙澄清需求") == "collaborate" - - def test_assign_keywords(self): - """'交给你' → assign。""" - assert infer_intent("@张飞 前端部分交给你") == "assign" - - def test_help_how_to(self): - """'如何' → help。""" - assert infer_intent("@姜维 如何部署这个服务") == "help" - - def test_collaborate_please_review(self): - """'请review' → collaborate。""" - assert infer_intent("@司马懿 请review 这个方案") == "collaborate" - - def test_notify_default(self): - """无任何关键词 → notify。""" - assert infer_intent("@赵云 已更新数据") == "notify" - - def test_assign_takes_priority_over_help(self): - """assign 关键词优先于 help 关键词。""" - # "交给" in body → assign, even though "?" also present - assert infer_intent("@张飞 这个模块交给你,有问题?") == "assign" - - def test_collaborate_takes_priority_over_help(self): - """collaborate 关键词优先于 help 关键词。""" - # "请帮忙" in body → collaborate, even though "?" absent - assert infer_intent("@赵云 请帮忙看看数据") == "collaborate" -- 2.45.4