diff --git a/src/api/toolchain_routes.py b/src/api/toolchain_routes.py index 20cc655..e8c1e92 100644 --- a/src/api/toolchain_routes.py +++ b/src/api/toolchain_routes.py @@ -403,7 +403,7 @@ async def _send_mention_mails( action_type="mention", steps=[ "按上方 mention 模板中的 response_guidance 执行", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "source_type": source_type, @@ -475,7 +475,7 @@ async def _handle_pr_opened(payload: Dict[str, Any]) -> None: f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", "按审查清单审查(参考 code-review Skill)", f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)— APPROVE 或 REQUEST_CHANGES", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "pr_number": pr_number, @@ -603,7 +603,7 @@ async def _handle_pull_request_review(payload: Dict[str, Any]) -> None: steps=[ f"查看评论(Gitea API: GET /repos/{repo}/issues/{pr_number}/comments)", "根据评论内容响应(修改代码或在 PR 上回复 comment)", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "pr_number": pr_number, @@ -637,14 +637,14 @@ async def _handle_pull_request_review(payload: Dict[str, Any]) -> None: if state == "APPROVED": tc_steps = [ f"合并 PR(Gitea API: POST /repos/{repo}/pulls/{pr_number}/merge)", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ] else: # REQUEST_CHANGES tc_steps = [ "按审查意见逐条修改代码", "push 到原分支 → CI 自动跑", "CI 通过后等重新 Review", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ] _send_toolchain_task( to_agent=pr_author, @@ -740,7 +740,7 @@ async def _handle_pr_synchronize(payload: Dict[str, Any]) -> None: f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", "重点检查上次 Review 意见的修改部分", f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "pr_number": pr_number, @@ -772,7 +772,7 @@ def _send_deploy_failure_task(repo: str, pr_number: int, pr_title: str, reason: "检查 deploy 日志", "排查失败原因", "修复并重新部署", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "repo": repo, @@ -1002,7 +1002,7 @@ async def _handle_issues(payload: Dict[str, Any]) -> None: "push → 等 CI", f"CI 通过后创建 PR(Gitea API: POST /repos/{repo}/pulls)", "等 Review", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "issue_number": issue_number, @@ -1037,7 +1037,7 @@ async def _handle_issues(payload: Dict[str, Any]) -> None: "检查 deploy 日志", "排查失败原因", "修复并重新部署", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "repo": repo, @@ -1120,7 +1120,7 @@ async def _handle_issue_comment(payload: Dict[str, Any]) -> None: "查看完整 CI 日志(PR 页面或 Gitea Actions 页面)", "修复失败的测试", "push → CI 自动重跑", - f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + "提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", ], context_data={ "pr_number": issue_number, diff --git a/src/daemon/toolchain_handler.py b/src/daemon/toolchain_handler.py index 4ecf503..65bf8a6 100644 --- a/src/daemon/toolchain_handler.py +++ b/src/daemon/toolchain_handler.py @@ -137,7 +137,7 @@ class ToolchainApiSection: "如果在执行过程中需要其他角色协助(如缺数据、需要审批等),在关联的 PR/Issue 上创建 comment @对方:", "```bash", f'curl -s -X POST "{_GITEA_BASE}/repos/{{repo}}/issues/{{pr_number}}/comments" \\', - f' -H "Authorization: token " \\', + ' -H "Authorization: token " \\', ' -H "Content-Type: application/json" \\', ' -d \'{"body": "@{agent-id} 需要你的支持:{描述问题}"}\'', "```", diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py new file mode 100644 index 0000000..5922ff0 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/api/mention_utils.py @@ -0,0 +1,169 @@ +"""@mention 解析工具模块。供所有 toolchain handler 复用。""" + +import re +import logging +from typing import List, Set + +from src.config.agents import AGENT_IDS + +logger = logging.getLogger(__name__) + +# Gitea API 基地址常量(避免硬编码) +GITEA_API_BASE = "http://192.168.2.154:3000/api/v1" +GITEA_WEB_BASE = "http://192.168.2.154:3000" + +# Agent 别名映射 +# 规则: +# 1. 中文名(如"张飞")→ 完整 Agent ID +# 2. 英文短名(如"zhangfei")→ 完整 Agent ID +# 3. 前缀模糊匹配需唯一匹配(见 extract_mentions 假设 A2) +AGENT_ALIAS: dict[str, str] = { + # 中文名 + "张飞": "zhangfei-dev", + "关羽": "guanyu-dev", + "赵云": "zhaoyun-data", + "姜维": "jiangwei-infra", + "司马懿": "simayi-challenger", + "庞统": "pangtong-fujunshi", + # 字+号(常见写法) + "翼德": "zhangfei-dev", + "云长": "guanyu-dev", + "子龙": "zhaoyun-data", + "伯约": "jiangwei-infra", + "仲达": "simayi-challenger", + "士元": "pangtong-fujunshi", + # 英文短名 + "zhangfei": "zhangfei-dev", + "guanyu": "guanyu-dev", + "zhaoyun": "zhaoyun-data", + "jiangwei": "jiangwei-infra", + "simayi": "simayi-challenger", + "pangtong": "pangtong-fujunshi", +} + +# 正则:匹配 @后面跟着的合法 Agent 名(英文字母/中文/数字/连字符) +_MENTION_PATTERN = re.compile(r"@([a-zA-Z\u4e00-\u9fa5][a-zA-Z0-9\u4e00-\u9fff-]*)") + + +def extract_mentions(body: str, sender: str) -> list[str]: + """从文本中提取 @mention 的 Agent ID 列表。 + + Args: + body: 评论文本 + sender: 评论者 Gitea 用户名(用于排除自己 @自己) + + Returns: + 去重后的 Agent ID 列表 + + 匹配优先级:精确 > 别名 > 前缀模糊(需唯一匹配,多候选则跳过) + """ + candidates = _MENTION_PATTERN.findall(body) + result: Set[str] = set() + + for c in candidates: + # 1. 精确匹配(@zhangfei-dev) + if c in AGENT_IDS: + result.add(c) + # 2. 别名匹配(@张飞、@zhangfei) + elif c in AGENT_ALIAS: + result.add(AGENT_ALIAS[c]) + else: + # 3. 前缀模糊匹配(@zhangf → zhangfei-dev) + # 假设 A2:多个候选时不匹配,只 log warning + matches = [aid for aid in AGENT_IDS if aid.startswith(c)] + if len(matches) == 1: + result.add(matches[0]) + elif len(matches) > 1: + logger.warning( + "Prefix '%s' matched %d agents (%s), skipping ambiguous mention", + c, len(matches), matches) + + # 排除自己 @自己(假设 A1:Gitea login = Agent ID) + result.discard(sender) + return list(result) + + +def should_suppress_mention( + mentioned_agent: str, + auto_notify_targets: List[str], +) -> bool: + """判断 @mention 通知是否应被抑制(因为自动流转已通知同一人)。 + + Args: + mentioned_agent: 被 @的 Agent ID + auto_notify_targets: 本次事件自动流转已通知的目标列表 + + Returns: + True 表示应抑制(不发 @mention Mail) + """ + return mentioned_agent in auto_notify_targets + + +def infer_intent(body: str) -> str: + """从 @mention 内容推断意图。 + + Returns: + "help" | "notify" | "collaborate" | "assign" + """ + # 分配子任务关键词 + assign_keywords = ["交给", "分配", "负责", "认领", "做一下", "帮忙做", "implement"] + if any(kw in body for kw in assign_keywords): + return "assign" + + # 求助关键词(注意:"帮忙"已由 assign_keywords 的"帮忙做"覆盖,"请帮忙"由 collab_keywords 覆盖) + help_keywords = ["怎么", "如何", "?", "?", "什么", "哪个", "能否"] + if any(kw in body for kw in help_keywords): + return "help" + + # 协作请求关键词 + collab_keywords = ["请帮忙", "请协助", "请澄清", "请review", "请审查", "评估"] + if any(kw in body for kw in collab_keywords): + return "collaborate" + + # 默认为通知关注 + return "notify" + + +def _build_response_guidance( + intent: str, + gitea_api: str, + repo: str, + issue_number: int, + commenter: str, +) -> str: + """根据意图类型生成响应指引文本。""" + if intent == "help": + return ( + f"这是一条求助,请到 Gitea 评论回复:\n" + f"1. 获取评论上下文(上方 API)\n" + f"2. 组织回答\n" + f"3. 在 Gitea 评论回复: POST {gitea_api}/repos/{repo}/issues/{issue_number}/comments\n" + f' Body: {{"body": "你的回答内容"}}' + ) + elif intent == "notify": + return ( + f"这是一条通知,请查看并知晓。如有意见,可到 Gitea 评论:\n" + f"- 查看 Issue/PR 详情(上方 API)\n" + f"- 如有意见,评论回复: POST {gitea_api}/repos/{repo}/issues/{issue_number}/comments" + ) + elif intent == "collaborate": + return ( + f"这是一条协作请求,请评估后回复(评论或 Mail):\n" + f"1. 获取详情(上方 API)\n" + f"2. 评估可行性\n" + f"3a. 评论回复: POST {gitea_api}/repos/{repo}/issues/{issue_number}/comments\n" + f' Body: {{"body": "你的回复"}}\n' + f"3b. 或通过 Mail 回复评论者: {commenter}" + ) + elif intent == "assign": + return ( + f"这是一条任务分配,请认领并执行:\n" + f"1. 获取 Issue 详情(上方 API)\n" + f"2. 评估可行性\n" + f"3. 认领 Issue: POST {gitea_api}/repos/{repo}/issues/{issue_number}/assignees\n" + f' Body: {{"assignees": ["{{your_agent_id}}"]}}\n' + f"4. 执行任务\n" + f"5. 完成后更新 Issue 状态: PATCH {gitea_api}/repos/{repo}/issues/{issue_number}\n" + f' Body: {{"state": "closed"}}' + ) + return "请查看详情(上方 API)并按需回复。" diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py new file mode 100644 index 0000000..20cc655 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/api/toolchain_routes.py @@ -0,0 +1,1246 @@ +"""API 路由 — 工具链事件中枢(Toolchain Event Hub) + +接收 Gitea Webhook,翻译成 Mail 通知推送给 Agent。 + +端点: POST /webhook/gitea +支持事件: pull_request, pull_request_review, issues, issue_comment +""" + +from __future__ import annotations + +import asyncio +import hashlib +import hmac +import json +import logging +import os +import re +import time +from datetime import datetime +from pathlib import Path, PurePath +from typing import Any, Dict, List, Optional, Set, Tuple + +import httpx +from fastapi import APIRouter, Header, Request, Response + +from src.blackboard.db import init_db +from src.blackboard.models import Task +from src.blackboard.operations import Blackboard +from src.config.agents import AGENT_IDS +from src.api.mention_utils import ( + extract_mentions, + should_suppress_mention, + infer_intent, + _build_response_guidance, + GITEA_API_BASE, +) +from src.daemon.toolchain_templates import render_template +from src.utils import get_data_root + +logger = logging.getLogger(__name__) + +router = APIRouter(tags=["toolchain"]) + +# --------------------------------------------------------------------------- +# 幂等检查:内存 set,保留最近 7 天 +# --------------------------------------------------------------------------- +# 使用内存 set 而非 SQLite(设计文档原计划 SQLite,简化实现:daemon 重启不频繁, +# 重启后丢失可接受,Webhook 重试窗口内不会重复) + +_delivery_cache: Set[str] = set() +_delivery_timestamps: List[Tuple[float, str]] = [] +_TTL_SECONDS = 7 * 24 * 3600 +_idempotency_lock = asyncio.Lock() + + +def _is_duplicate(event: str, delivery: str, + payload: Optional[Dict[str, Any]] = None) -> bool: + """检查 Webhook 是否重复投递,自动清理过期条目。 + + 双重去重策略: + 1. delivery UUID 去重(标准幂等) + 2. payload 内容去重(应对 Gitea v1.23.4 的 webhookNotifier + actionsNotifier + 对同一 review 生成不同 UUID 的双投递问题) + """ + now = time.time() + # 清理过期条目 + while _delivery_timestamps and ( + now - _delivery_timestamps[0][0]) > _TTL_SECONDS: + _, key = _delivery_timestamps.pop(0) + _delivery_cache.discard(key) + + # 检查 delivery UUID 去重 + key = f"{event}-{delivery}" + if key in _delivery_cache: + return True + + # 检查 payload 内容去重(review 事件:同一 PR + 同一用户 + 同一内容) + # 注意:Gitea webhookNotifier 用 review.body,actionsNotifier 用 review.content + # 所以去重 key 需要同时取两个字段,确保两种格式生成相同 key + if payload and "review" in event: + pr_num = payload.get("pull_request", {}).get("number") + sender = payload.get("sender", {}).get("login") + review = payload.get("review", {}) + # 取 body 或 content,优先 body(webhookNotifier 格式) + content = review.get("body", "") or review.get("content", "") + content_hash = hashlib.sha256(content.encode()).hexdigest()[:16] + review_id = review.get("id", "") + content_key = f"content:{event}:{pr_num}:{sender}:{review_id}:{content_hash}" + if content_key in _delivery_cache: + logger.info( + "Content-based duplicate detected: %s PR#%s by %s", + event, + pr_num, + sender) + return True + _delivery_cache.add(content_key) + _delivery_timestamps.append((now, content_key)) + + _delivery_cache.add(key) + _delivery_timestamps.append((now, key)) + return False + + +# --------------------------------------------------------------------------- +# 签名验证 +# --------------------------------------------------------------------------- + +_WEBHOOK_SECRET: Optional[str] = os.environ.get("GITEA_WEBHOOK_SECRET") + + +def _verify_signature(body: bytes, signature: Optional[str]) -> bool: + """验证 HMAC-SHA256 签名。secret 为空时跳过验签。""" + if not _WEBHOOK_SECRET: + return True + if not signature: + return False + expected = hmac.new( + _WEBHOOK_SECRET.encode(), body, hashlib.sha256 + ).hexdigest() + return hmac.compare_digest(expected, signature) + + +# --------------------------------------------------------------------------- +# Gitea API 调用 +# --------------------------------------------------------------------------- + +_GITEA_TOKEN: str = os.environ.get("GITEA_TOKEN", "") +_GITEA_BASE = "http://192.168.2.154:3000/api/v1" + + +async def _fetch_pr_files(repo: str, pr_number: int) -> Tuple[List[str], str]: + """获取 PR 文件列表,含重试机制。 + + Returns: + (文件列表, 错误信息) — 成功时错误信息为空字符串 + """ + if not _GITEA_TOKEN: + return [], "GITEA_TOKEN 未配置" + + url = f"{_GITEA_BASE}/repos/{repo}/pulls/{pr_number}/files" + headers = {"Authorization": f"token {_GITEA_TOKEN}"} + last_error = "" + for attempt in range(3): + try: + async with httpx.AsyncClient(timeout=5.0) as client: + resp = await client.get(url, headers=headers) + resp.raise_for_status() + files: List[Dict[str, Any]] = resp.json() + return [f.get("filename", "") for f in files], "" + except Exception as e: + last_error = str(e) + if attempt < 2: + await asyncio.sleep(0.5 * (attempt + 1)) + logger.warning( + "Retry %d/3 fetching PR files: %s/pulls/%d", + attempt + 1, + repo, + pr_number) + logger.warning( + "Failed to fetch PR files after 3 retries: %s/pulls/%d - %s", + repo, + pr_number, + last_error) + return [], f"获取文件列表失败(重试3次): {last_error}" + + +# --------------------------------------------------------------------------- +# 风险级别判定 +# --------------------------------------------------------------------------- + +_HIGH_PATTERNS = [ + "**/spawner*", "**/ticker*", "**/dispatcher*", + "**/router*", "**/guardrails*", "**/strategy*", "**/risk*", +] + + +def _calc_risk_level(changed_files: List[str]) -> str: + """根据改动文件列表判定风险级别。""" + for filepath in changed_files: + for pattern in _HIGH_PATTERNS: + if PurePath(filepath).match(pattern): + return "high" + return "standard" + + +# --------------------------------------------------------------------------- +# Mail 创建 +# --------------------------------------------------------------------------- + + +MAIL_PROJECT_ID = "_mail" +TOOLCHAIN_PROJECT_ID = "_toolchain" + + +def _mail_db_path() -> Path: + """获取 Mail 数据库路径,确保目录存在。""" + root = get_data_root() + db = root / MAIL_PROJECT_ID / "blackboard.db" + db.parent.mkdir(parents=True, exist_ok=True) + init_db(db) + return db + + +def _toolchain_db_path() -> Path: + """获取 Toolchain 数据库路径,确保目录和表存在。""" + root = get_data_root() + db = root / TOOLCHAIN_PROJECT_ID / "blackboard.db" + db.parent.mkdir(parents=True, exist_ok=True) + init_db(db) + return db + + +def _send_toolchain_task( + to_agent: str, + title: str, + description: str, + event_type: str, + action_type: str, + steps: list, + context_data: dict | None = None, + source: str = "webhook", +) -> str: + """创建 Toolchain Task 并写入 _toolchain DB。 + + Args: + to_agent: 收件人 Agent ID + title: 任务标题 + description: 任务描述(模板渲染后的事件信息) + event_type: 事件类型(review_result / ci_failure / ...) + action_type: 动作分类(用于步骤选择和日志统计) + steps: 结构化编号步骤列表 + context_data: 事件上下文数据(PR 号、仓库名等) + source: 来源标识 + + Returns: + 创建的 Task ID + """ + if to_agent not in AGENT_IDS: + logger.warning("Unknown agent: %s, skipping toolchain task", to_agent) + return "" + + task_id = f"tc-{int(datetime.now().timestamp() * 1000)}" + must_hives = json.dumps({ + "event_type": event_type, + "action_type": action_type, + "steps": steps, + "context": context_data or {}, + "from": "system", + "source": source, + }, ensure_ascii=False) + + task = Task( + id=task_id, + title=title, + description=description, + assignee=to_agent, + assigned_by="system", + must_haves=must_hives, + task_type="toolchain", + status="pending", + ) + bb = Blackboard(_toolchain_db_path()) + bb.create_task(task) + logger.info( + "Toolchain task sent: %s → %s [%s] action_type=%s", + title[:40], to_agent, task_id, action_type, + ) + return task_id + + +def _send_mail( + to_agent: str, + title: str, + description: str, + source: str = "webhook", +) -> str: + """创建 Mail Task 并写入数据库。 + + Args: + to_agent: 收件人 Agent ID + title: 邮件标题 + description: 邮件正文 + source: 来源标识 + + Returns: + 创建的 Mail ID + + Raises: + Exception: 数据库写入失败 + """ + if to_agent not in AGENT_IDS: + logger.warning("Unknown agent: %s, skipping mail", to_agent) + return "" + + mail_id = f"mail-{int(datetime.now().timestamp() * 1000)}" + notify_meta = { + "type": "inform", + "performative": "inform", + "is_read": False, + "conversation_id": f"conv-{mail_id}", + "from": "system", + "source": source, + } + task = Task( + id=mail_id, + title=title, + description=description, + assignee=to_agent, + assigned_by="system", + must_haves=json.dumps(notify_meta, ensure_ascii=False), + task_type="mail", + status="pending", + ) + bb = Blackboard(_mail_db_path()) + bb.create_task(task) + logger.info("Mail sent: %s → %s [%s]", title[:40], to_agent, mail_id) + return mail_id + + +# --------------------------------------------------------------------------- +# 辅助:从 payload 提取仓库全名 +# --------------------------------------------------------------------------- + + +def _repo_fullname(payload: Dict[str, Any]) -> str: + """从 Webhook payload 提取仓库全名(owner/repo)。""" + repo = payload.get("repository") or {} + return repo.get("full_name", "") + + +# --------------------------------------------------------------------------- +# @mention 通用发送函数 +# --------------------------------------------------------------------------- + + +async def _send_mention_mails( + mentions: list[str], + auto_targets: list[str], + source_type: str, + mention_type: str, + source_url: str, + commenter: str, + content: str, + repo: str, + issue_number: int, + is_pr: bool, +) -> None: + """通用 @mention Mail 发送函数。 + + 自动抑制已在 auto_targets 中的 Agent,避免双重通知。 + 根据内容推断意图,生成不同的响应指引。 + """ + # 确定 API 路径 + if is_pr: + detail_api = f"pulls/{issue_number}" + comments_api = f"issues/{issue_number}/comments" + else: + detail_api = f"issues/{issue_number}" + comments_api = f"issues/{issue_number}/comments" + + for agent_id in mentions: + if should_suppress_mention(agent_id, auto_targets): + logger.info( + "Mention suppressed for %s (already notified by auto flow)", + agent_id) + continue + + # 从 api_path 提取编号用于标题,如 "issues/32" → "#32" + number_str = f"#{issue_number}" if issue_number else "" + intent = infer_intent(content) + intent_hint = {"help": "求助", "notify": "通知关注", + "collaborate": "协作请求", "assign": "分配子任务"}[intent] + + # 生成响应指引 + guidance = _build_response_guidance( + intent=intent, + gitea_api=GITEA_API_BASE, + repo=repo, + issue_number=issue_number, + commenter=commenter, + ) + + text = render_template("mention", { + "mention_type": mention_type, + "source_type": source_type, + "source_url": source_url, + "commenter": commenter, + "intent_hint": intent_hint, + "content_snippet": content[:500], + "gitea_api": GITEA_API_BASE, + "repo": repo, + "source_detail_api_path": detail_api, + "source_comments_api_path": comments_api, + "response_guidance": guidance, + }) + + title = f"@mention ({intent_hint}): {source_type} {number_str} ({repo})" + _send_toolchain_task( + to_agent=agent_id, + title=title, + description=text, + event_type="mention", + action_type="mention", + steps=[ + "按上方 mention 模板中的 response_guidance 执行", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "source_type": source_type, + "source_url": source_url, + "commenter": commenter, + "content_snippet": content[:500], + "repo": repo, + "issue_number": issue_number, + }, + ) + + +# --------------------------------------------------------------------------- +# 事件处理函数 +# --------------------------------------------------------------------------- + + +async def _handle_pull_request(payload: Dict[str, Any]) -> None: + """处理 pull_request 事件:opened → 通知 reviewer;closed → merge 通知。""" + action = payload.get("action", "") + if action == "opened": + await _handle_pr_opened(payload) + elif action == "closed": + await _handle_pr_closed(payload) + elif action == "synchronize": + await _handle_pr_synchronize(payload) + + +async def _handle_pr_opened(payload: Dict[str, Any]) -> None: + """PR opened → 通知 simayi-challenger。""" + pr = payload.get("pull_request") + if not pr or not isinstance(pr, dict): + logger.warning( + "pull_request event missing pull_request field, skipping") + return + repo = _repo_fullname(payload) + pr_number = pr.get("number", 0) + pr_title = pr.get("title", "") + pr_author = pr.get("user", {}).get("login", "unknown") + branch = pr.get("head", {}).get("ref", "unknown") + + # 获取改动文件列表 + changed_files, fetch_error = await _fetch_pr_files(repo, pr_number) + risk_level = _calc_risk_level(changed_files) + if fetch_error: + file_list = f"⚠️ {fetch_error}" + else: + file_list = "\n".join( + f"- {f}" for f in changed_files) if changed_files else "(无文件变更)" + + text = render_template("review_request", { + "repo": repo, + "pr_number": str(pr_number), + "pr_title": pr_title, + "pr_author": pr_author, + "branch": branch, + "risk_level": risk_level, + "file_list": file_list, + }) + + title = f"Review 请求: {pr_title} ({repo}#{pr_number})" + _send_toolchain_task( + to_agent="simayi-challenger", + title=title, + description=text, + event_type="review_request", + action_type="review_request", + steps=[ + f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", + "按审查清单审查(参考 code-review Skill)", + f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)— APPROVE 或 REQUEST_CHANGES", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "pr_author": pr_author, + "branch": branch, + "risk_level": risk_level, + }, + ) + + # S3: PR body @mention 通知 + pr_body = pr.get("body", "") or "" + sender = pr.get("user", {}).get("login", "") + mentions = extract_mentions(pr_body, sender) + if mentions: + # 自动流转已通知 simayi-challenger(review_request) + auto_targets = ["simayi-challenger"] + await _send_mention_mails( + mentions=mentions, + auto_targets=auto_targets, + source_type="PR", + mention_type="PR @mention", + source_url=pr.get("html_url", ""), + commenter=sender, + content=pr_body, + repo=repo, + issue_number=pr_number, + is_pr=True, + ) + + +async def _send_review_mentions( + review_body: str, + reviewer: str, + pr_author: str, + pr: dict, + repo: str, + pr_number: int, +) -> None: + """提取并发送 Review body 中的 @mention 通知(COMMENTED / 非 COMMENTED 通用)。""" + mentions = extract_mentions(review_body, reviewer) + if mentions: + auto_targets = [pr_author] + await _send_mention_mails( + mentions=mentions, + auto_targets=auto_targets, + source_type="Review", + mention_type="Review @mention", + source_url=pr.get("html_url", ""), + commenter=reviewer, + content=review_body, + repo=repo, + issue_number=pr_number, + is_pr=True, + ) + + +async def _handle_pull_request_review(payload: Dict[str, Any]) -> None: + """处理 pull_request_review 事件:非 COMMENTED → 通知 PR 作者。 + + 支持两种 payload 格式: + - repo webhook: review.state = "APPROVED" / "REQUEST_CHANGES" + - org webhook (Gitea v1.23.4): review.type = "pull_request_review_approved" / "pull_request_review_rejected" + """ + review = payload.get("review") + if not review or not isinstance(review, dict): + logger.warning( + "pull_request_review event missing review field, skipping") + return + pr = payload.get("pull_request") + if not pr or not isinstance(pr, dict): + logger.warning( + "pull_request_review event missing pull_request field, skipping") + return + + # 兼容两种 payload 格式提取 state + state = review.get("state", "") + if not state: + # org webhook 格式:review.type = "pull_request_review_approved" + review_type = review.get("type", "") + type_map = { + "pull_request_review_approved": "APPROVED", + "pull_request_review_rejected": "REQUEST_CHANGES", + "pull_request_review_comment": "COMMENTED", + } + state = type_map.get(review_type, "") + + repo = _repo_fullname(payload) + pr_number = pr.get("number", 0) + pr_title = pr.get("title", "") + pr_author = pr.get("user", {}).get("login", "unknown") + # 兼容:org webhook 的 review 没有 user,从 sender 取 + reviewer = review.get( + "user", + {}).get( + "login", + "") or payload.get( + "sender", + {}).get( + "login", + "unknown") + review_body = review.get("body", "") or review.get("content", "(无评论)") + + if state == "COMMENTED": + # Review 评论 → 通知 PR 作者 + review_body = review.get("body", "") or review.get("content", "(无评论)") + reviewer = review.get("user", {}).get("login", "") or payload.get("sender", {}).get("login", "unknown") + + text = render_template("review_comment", { + "repo": repo, + "pr_number": str(pr_number), + "pr_title": pr_title, + "reviewer": reviewer, + "comment_body": review_body, + }) + + title = f"Review 评论: {pr_title} ({repo}#{pr_number})" + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="review_comment", + action_type="review_comment", + steps=[ + f"查看评论(Gitea API: GET /repos/{repo}/issues/{pr_number}/comments)", + "根据评论内容响应(修改代码或在 PR 上回复 comment)", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "reviewer": reviewer, + "comment_body": review_body, + }, + ) + + # S5: Review body @mention 通知(COMMENTED 路径) + await _send_review_mentions(review_body, reviewer, pr_author, pr, repo, pr_number) + + return + + result_map = {"APPROVED": "通过 ✓", "REQUEST_CHANGES": "驳回 ✗"} + if state not in result_map: + return + result = result_map[state] + + text = render_template("review_result", { + "repo": repo, + "pr_number": str(pr_number), + "pr_title": pr_title, + "reviewer": reviewer, + "result": result, + "review_body": review_body, + }) + + title = f"Review {result}: {pr_title} ({repo}#{pr_number})" + if state == "APPROVED": + tc_steps = [ + f"合并 PR(Gitea API: POST /repos/{repo}/pulls/{pr_number}/merge)", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ] + else: # REQUEST_CHANGES + tc_steps = [ + "按审查意见逐条修改代码", + "push 到原分支 → CI 自动跑", + "CI 通过后等重新 Review", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ] + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="review_result", + action_type="review_result", + steps=tc_steps, + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "result": result, + "reviewer": reviewer, + "review_body": review_body, + }, + ) + + # S5: Review body @mention 通知(非 COMMENTED 路径) + await _send_review_mentions(review_body, reviewer, pr_author, pr, repo, pr_number) + + +async def _fetch_latest_reviewer(repo: str, pr_number: int) -> str: + """查询 PR 最近一次非 PENDING review 的提交者。 + + Returns: + reviewer login 或空字符串 + """ + if not _GITEA_TOKEN: + return "" + + url = f"{_GITEA_BASE}/repos/{repo}/pulls/{pr_number}/reviews" + headers = {"Authorization": f"token {_GITEA_TOKEN}"} + + try: + async with httpx.AsyncClient(timeout=5.0) as client: + resp = await client.get(url, headers=headers) + resp.raise_for_status() + reviews = resp.json() + + # 取最后一个非 PENDING 的 review 的 user + for review in reversed(reviews): + state = review.get("state", "") + if state in ("APPROVED", "REQUEST_CHANGES", "COMMENTED"): + user = review.get("user", {}) + return user.get("login", "") + except Exception as e: + logger.warning("Failed to fetch reviews for %s#%d: %s", repo, pr_number, e) + + return "" + + +async def _handle_pr_synchronize(payload: Dict[str, Any]) -> None: + """PR 更新(新 push)→ 通知 reviewer 重新 review。 + + 查询最近一次 review 的提交者作为通知目标。 + 只在有 review 历史时才通知(避免和 opened 重复)。 + """ + pr = payload.get("pull_request") + if not pr or not isinstance(pr, dict): + return + + repo = _repo_fullname(payload) + pr_number = pr.get("number", 0) + pr_title = pr.get("title", "") + pr_author = pr.get("user", {}).get("login", "unknown") + new_sha = pr.get("head", {}).get("sha", "unknown")[:12] + + # 查询最近 review 的提交者 + reviewer = await _fetch_latest_reviewer(repo, pr_number) + if not reviewer: + # 没有已有 review 历史,fallback 到默认 reviewer + reviewer = "simayi-challenger" + logger.info("No review history for PR #%s, using default reviewer %s", pr_number, reviewer) + + text = render_template("review_updated", { + "repo": repo, + "pr_number": str(pr_number), + "pr_title": pr_title, + "pr_author": pr_author, + "new_sha": new_sha, + "reviewer": reviewer, + }) + + title = f"PR 更新: {pr_title} ({repo}#{pr_number})" + _send_toolchain_task( + to_agent=reviewer, + title=title, + description=text, + event_type="review_updated", + action_type="review_updated", + steps=[ + f"读取 PR diff(Gitea API: GET /repos/{repo}/pulls/{pr_number}.diff)", + "重点检查上次 Review 意见的修改部分", + f"提交 Review(Gitea API: POST /repos/{repo}/pulls/{pr_number}/reviews)", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "pr_author": pr_author, + "new_sha": new_sha, + "reviewer": reviewer, + }, + ) + + +def _send_deploy_failure_task(repo: str, pr_number: int, pr_title: str, reason: str) -> None: + """CD 部署失败通知,走 ToolchainHandler。""" + text = render_template("deploy_failure", { + "repo": repo, + "commit_sha": f"PR #{pr_number}", + }) + title = f"部署失败: {repo} (auto-deploy, PR #{pr_number})" + full_text = f"{text}\n\n失败原因: {reason}" + for agent_id in ("jiangwei-infra", "pangtong-fujunshi"): + _send_toolchain_task( + to_agent=agent_id, + title=title, + description=full_text, + event_type="deploy_failure", + action_type="deploy_failure", + steps=[ + "检查 deploy 日志", + "排查失败原因", + "修复并重新部署", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "repo": repo, + "pr_number": pr_number, + "pr_title": pr_title, + "reason": reason, + }, + ) + + +async def _handle_pr_closed(payload: Dict[str, Any]) -> None: + """PR closed → 如果 merged,通知 PR 作者。""" + pr = payload.get("pull_request") + if not pr or not isinstance(pr, dict): + return + + # 只处理 merged 的 PR + if not pr.get("merged", False): + return + + repo = _repo_fullname(payload) + pr_number = pr.get("number", 0) + pr_title = pr.get("title", "") + pr_author = pr.get("user", {}).get("login", "unknown") + # merged_by 可能不在 payload 中,fallback 到 sender + merged_by = ( + pr.get("merged_by", {}).get("login", "") + or payload.get("sender", {}).get("login", "unknown") + ) + + text = render_template("review_merged", { + "repo": repo, + "pr_number": str(pr_number), + "pr_title": pr_title, + "pr_author": pr_author, + "merged_by": merged_by, + }) + + title = f"PR 已合并: {pr_title} ({repo}#{pr_number})" + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="review_merged", + action_type="review_merged", + steps=[], # 纯通知,无步骤 + context_data={ + "pr_number": pr_number, + "repo": repo, + "pr_title": pr_title, + "pr_author": pr_author, + "merged_by": merged_by, + }, + ) + + # 自动部署:git pull + rsync + 按需 post_deploy + try: + import yaml + + # 加载部署配置 + config_path = Path(__file__).parent.parent.parent / "config" / "deploy-targets.yaml" + if not config_path.exists(): + return + + with open(config_path, "r", encoding="utf-8") as f: + deploy_config = yaml.safe_load(f) or {} + + targets = deploy_config.get("targets", {}) + target = targets.get(repo) + if not target: + return # 该仓库不在部署配置中,跳过 + + dev_dir = os.path.expanduser(target["dev_dir"]) + install_dir = os.path.expanduser(target.get("install_dir", target["dev_dir"])) + rsync_excludes = target.get("rsync_exclude", []) + + # Step 1: git pull in dev dir + proc = await asyncio.create_subprocess_exec( + "git", "pull", "origin", "main", + cwd=dev_dir, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30) + + if proc.returncode != 0: + logger.warning("Auto-deploy: git pull failed for %s: %s", repo, stderr.decode()) + return + + logger.info("Auto-deploy: git pull success for %s", repo) + + # Step 2: rsync to install dir + rsync_args = ["rsync", "-a"] + for exc in rsync_excludes: + rsync_args.extend(["--exclude", exc]) + rsync_args.extend([f"{dev_dir}/", f"{install_dir}/"]) + + rsync_proc = await asyncio.create_subprocess_exec( + *rsync_args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _, rsync_err = await asyncio.wait_for(rsync_proc.communicate(), timeout=60) + + if rsync_proc.returncode != 0: + logger.error("Auto-deploy: rsync failed: %s", rsync_err.decode()) + _send_deploy_failure_task(repo, pr_number, pr_title, f"rsync 失败: {rsync_err.decode()}") + return + + # Step 3: 判断是否需要执行 post_deploy + files = await _fetch_pr_files(repo, pr_number) + file_list = files[0] + needs_restart = any( + f.startswith("src/") or f.startswith("templates/") or f.startswith("frontend/") or f.endswith(".py") + for f in file_list + ) + + if needs_restart: + post_deploy_cmds = target.get("post_deploy", []) + pm2_name = target.get("pm2_name", "") + for cmd in post_deploy_cmds: + logger.info("Auto-deploy: executing post_deploy: %s", cmd) + + # M2: 检测当前进程是否会被此命令杀掉(而非脆弱的字符串匹配) + # 通过 PM2 环境变量判断:pm2 启动的进程有 PM2_HOME + self_restart = False + if pm2_name and os.environ.get("PM2_HOME") and "pm2 restart" in cmd: + # 检查命令是否包含当前进程名 + if re.search(rf'pm2\s+restart\s+{re.escape(pm2_name)}', cmd): + self_restart = True + + if self_restart: + # M1: 用 asyncio.sleep 延迟而非 nohup,保留子进程输出和错误检测 + # 先 sleep 让 handler 正常返回,再启动 restart 命令 + # restart 的子进程会在父进程死后被 pm2 新进程接管 + logger.info("Auto-deploy: self-restart detected, deferring 2s: %s", cmd) + await asyncio.sleep(2) + deploy_proc = await asyncio.create_subprocess_exec( + "sh", "-c", cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + # restart 会杀掉当前进程,communicate 可能不会完成 + # 但我们至少尝试读取输出 + try: + _, deploy_err = await asyncio.wait_for( + deploy_proc.communicate(), timeout=10) + except (asyncio.TimeoutError, ProcessLookupError): + # 预期行为:进程被 pm2 restart 杀掉 + logger.info("Auto-deploy: process killed by self-restart (expected)") + break + else: + deploy_proc = await asyncio.create_subprocess_exec( + "sh", "-c", cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _, deploy_err = await asyncio.wait_for(deploy_proc.communicate(), timeout=30) + + if deploy_proc.returncode != 0: + logger.error("Auto-deploy: post_deploy failed: %s", deploy_err.decode()) + _send_deploy_failure_task(repo, pr_number, pr_title, f"post_deploy 失败 ({cmd}): {deploy_err.decode()}") + break + else: + logger.info("Auto-deploy: all post_deploy commands succeeded (files: %s)", ", ".join(file_list[:5])) + else: + logger.info("Auto-deploy: docs-only change for %s, skip post_deploy", repo) + + except asyncio.TimeoutError: + logger.error("Auto-deploy: timeout for %s", repo) + _send_deploy_failure_task(repo, pr_number, pr_title, "部署超时") + except Exception as e: + logger.error("Auto-deploy: unexpected error: %s", e) + + +async def _handle_issues(payload: Dict[str, Any]) -> None: + """处理 issues 事件:assigned → 通知被指派人;opened+部署失败 → 通知运维。""" + action = payload.get("action", "") + issue = payload.get("issue") + if not issue or not isinstance(issue, dict): + logger.warning("issues event missing issue field, skipping") + return + repo = _repo_fullname(payload) + issue_number = issue.get("number", 0) + issue_title = issue.get("title", "") + + if action == "assigned": + assignee = "" + assignees = issue.get("assignees") or [] + if not assignees: + single = issue.get("assignee") + if single and isinstance(single, dict): + assignees = [single] + if assignees: + assignee = assignees[-1].get("login", "") + else: + assignee = "" + if not assignee: + logger.debug("Issue assigned but no assignee found, skipping") + return + + labels_list = [lbl.get("name", "") + for lbl in (issue.get("labels") or [])] + labels = ", ".join(labels_list) if labels_list else "(无标签)" + issue_body = issue.get("body", "(无描述)") + brief = issue_title[:20].replace(" ", "-").lower() + + text = render_template("issue_assigned", { + "repo": repo, + "issue_number": str(issue_number), + "issue_title": issue_title, + "labels": labels, + "issue_body": issue_body or "(无描述)", + "brief": brief, + }) + + title = f"Issue 指派: {issue_title} ({repo}#{issue_number})" + _send_toolchain_task( + to_agent=assignee, + title=title, + description=text, + event_type="issue_assigned", + action_type="issue_assigned", + steps=[ + f"创建分支 fix/{issue_number}-{brief}", + "编码 + 写 UT", + "push → 等 CI", + f"CI 通过后创建 PR(Gitea API: POST /repos/{repo}/pulls)", + "等 Review", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "issue_number": issue_number, + "repo": repo, + "issue_title": issue_title, + "labels": labels, + "issue_body": issue_body or "(无描述)", + "brief": brief, + }, + ) + + elif action == "opened": + if "部署失败" in issue_title: + # 从 Issue body 提取 commit hash(Gitea deploy workflow 格式) + sha_match = re.search(r'[0-9a-f]{40}', issue.get("body", "")) + commit_sha = sha_match.group(0) if sha_match else "(未知)" + + text = render_template("deploy_failure", { + "repo": repo, + "commit_sha": commit_sha or "(未知)", + }) + + title = f"部署失败: {repo}" + for agent_id in ("jiangwei-infra", "pangtong-fujunshi"): + _send_toolchain_task( + to_agent=agent_id, + title=title, + description=text, + event_type="deploy_failure", + action_type="deploy_failure", + steps=[ + "检查 deploy 日志", + "排查失败原因", + "修复并重新部署", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "repo": repo, + "commit_sha": commit_sha or "(未知)", + }, + ) + + # Issue body @mention(opened 时检查) + issue_body = issue.get("body", "") or "" + sender = payload.get("sender", {}).get("login", "") + mentions = extract_mentions(issue_body, sender) + if mentions: + # 自动流转已通知 assignee + assignees = issue.get("assignees") or [] + if not assignees: + single = issue.get("assignee") + if single and isinstance(single, dict): + assignees = [single] + auto_targets = [a.get("login", "") for a in assignees if isinstance(a, dict)] + await _send_mention_mails( + mentions=mentions, + auto_targets=auto_targets, + source_type="Issue", + mention_type="Issue @mention", + source_url=issue.get("html_url", ""), + commenter=sender, + content=issue_body, + repo=repo, + issue_number=issue_number, + is_pr=False, + ) + + +async def _handle_issue_comment(payload: Dict[str, Any]) -> None: + """处理 issue_comment 事件:CI 失败关键词 → 通知 PR 作者;@mention → 通知被提及者。""" + comment = payload.get("comment") + if not comment or not isinstance(comment, dict): + logger.warning("issue_comment event missing comment field, skipping") + return + body = comment.get("body", "") + sender = comment.get("user", {}).get("login", "") + + issue = payload.get("issue") + if not issue or not isinstance(issue, dict): + logger.warning("issue_comment event missing issue field, skipping") + return + + action = payload.get("action", "") + if action != "created": + return + + # === 路径 1:CI 失败通知(原有逻辑,改为正向 if) === + if ("[CI]" in body or "CI 失败" in body) and issue.get("state") != "closed": + repo = _repo_fullname(payload) + issue_number = issue.get("number", 0) + + # 尝试从关联 PR 获取信息 + pr_author = issue.get("user", {}).get("login", "unknown") + branch_match = re.search(r"分支:\s*(\S+)", body) + branch = branch_match.group(1) if branch_match else "(未知)" + + # 提取错误摘要(取 comment body 前 500 字符) + error_summary = body[:500] if body else "(无错误信息)" + + text = render_template("ci_failure", { + "repo": repo, + "pr_number": str(issue_number), + "branch": branch, + "error_summary": error_summary, + }) + + title = f"CI 失败: {repo}#{issue_number}" + _send_toolchain_task( + to_agent=pr_author, + title=title, + description=text, + event_type="ci_failure", + action_type="ci_failure", + steps=[ + "查看完整 CI 日志(PR 页面或 Gitea Actions 页面)", + "修复失败的测试", + "push → CI 自动重跑", + f"提交 action report(POST http://localhost:8083/api/projects/_toolchain/tasks//comments,comment_type=action_report)", + ], + context_data={ + "pr_number": issue_number, + "repo": repo, + "branch": branch, + "error_summary": error_summary, + }, + ) + # CI 处理完不 return,继续检查 @mention + + # === 路径 2:@mention 通知(新增,独立路径) === + # 注意:@mention 检测与 CI 检测是独立的,同一条评论可同时触发两者 + mentions = extract_mentions(body, sender) + if mentions: + # 判断是 PR 还是 Issue(Gitea 中 PR 本质是特殊的 Issue) + is_pr = issue.get("pull_request") is not None + source_type = "PR" if is_pr else "Issue" + mention_type = "PR @mention" if is_pr else "Issue @mention" + + issue_number = issue.get("number", 0) + repo = _repo_fullname(payload) + + # 自动流转已通知的人(CI 失败通知的 PR 作者) + auto_targets: list[str] = [] + if ("[CI]" in body or "CI 失败" in body) and issue.get("state") != "closed": + auto_targets.append(issue.get("user", {}).get("login", "")) + + await _send_mention_mails( + mentions=mentions, + auto_targets=auto_targets, + source_type=source_type, + mention_type=mention_type, + source_url=issue.get("html_url", ""), + commenter=sender, + content=body, + repo=repo, + issue_number=issue_number, + is_pr=is_pr, + ) + + +# --------------------------------------------------------------------------- +# 事件分发 +# --------------------------------------------------------------------------- + +_EVENT_HANDLERS: Dict[str, Any] = { + "pull_request": _handle_pull_request, + "pull_request_sync": _handle_pr_synchronize, # Gitea: PR branch push 是独立事件类型 + "pull_request_review": _handle_pull_request_review, + "pull_request_review_approved": _handle_pull_request_review, + "pull_request_review_rejected": _handle_pull_request_review, + "pull_request_review_comment": _handle_pull_request_review, + "pull_request_comment": _handle_pull_request_review, # Gitea: review comment 独立事件类型 + # Gitea v1.23.4 实际发出的 review 子事件(无 _review_ 中间段) + "pull_request_approved": _handle_pull_request_review, + "pull_request_rejected": _handle_pull_request_review, + "issues": _handle_issues, + "issue_comment": _handle_issue_comment, +} + + +# --------------------------------------------------------------------------- +# Webhook 端点 +# --------------------------------------------------------------------------- + + +@router.post("/webhook/gitea") +async def gitea_webhook( + request: Request, + x_gitea_event: Optional[str] = Header(None, alias="X-Gitea-Event"), + x_gitea_delivery: Optional[str] = Header(None, alias="X-Gitea-Delivery"), + x_gitea_signature: Optional[str] = Header(None, alias="X-Gitea-Signature"), +) -> Response: + """Gitea Webhook 接收端点。 + + 处理流程:签名验证 → 幂等检查 → 事件分发 → Mail 推送。 + + 返回策略: + - payload 解析失败 / 未知事件 / 幂等重复 → 200(不触发重试) + - Mail 创建失败 → 500(触发 Gitea 重试) + """ + body = await request.body() + + # 1. 签名验证 + if not _verify_signature(body, x_gitea_signature): + logger.warning("Webhook signature verification failed") + return Response(status_code=403, + content="signature verification failed") + + # 3. 解析 payload(提前解析,用于幂等检查) + try: + payload = await request.json() + except Exception: + logger.warning("Failed to parse webhook payload") + return Response(status_code=200, content="invalid payload") + + # 2. 幂等检查(需要在 payload 解析后,以支持内容去重) + if x_gitea_event and x_gitea_delivery: + async with _idempotency_lock: + if _is_duplicate(x_gitea_event, x_gitea_delivery, payload): + logger.debug( + "Duplicate webhook: %s/%s", + x_gitea_event, + x_gitea_delivery) + return Response(status_code=200, content="duplicate") + + # 4. 查找 handler + action = payload.get("action", "") + logger.info("[WEBHOOK] event=%s action=%s delivery=%s", x_gitea_event, action, x_gitea_delivery) + handler = _EVENT_HANDLERS.get(x_gitea_event or "") + if not handler: + logger.info("[WEBHOOK] Unhandled event type: %s", x_gitea_event) + return Response(status_code=200, + content=f"unhandled event: {x_gitea_event}") + + # 5. 执行 handler + try: + await handler(payload) + except Exception: + logger.exception("Mail creation failed for %s event", x_gitea_event) + return Response(status_code=500, content="mail creation failed") + + return Response(status_code=200, content="ok") diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py new file mode 100644 index 0000000..bf7908d --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/prompt_composer.py @@ -0,0 +1,129 @@ +""" +prompt_composer.py — PromptSection Protocol + PromptContext + PromptComposer + +拼装器:有序管理 prompt 段落,按优先级排序后合并为最终 prompt。 +""" + +import logging +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Protocol, runtime_checkable + +logger = logging.getLogger("moziplus-v2.prompt_composer") + +# --------------------------------------------------------------------------- +# Section 优先级范围约定 +# --------------------------------------------------------------------------- +PRIORITY_CONTEXT = 10 # 任务上下文 +PRIORITY_PRIOR = 20 # 前序信息 +PRIORITY_ROLE = 30 # 角色规范 +PRIORITY_API = 40 # API 操作指令 +PRIORITY_CONSTRAINTS = 50 # 硬约束 +PRIORITY_EXTENSION = 60 # 扩展段 + + +# --------------------------------------------------------------------------- +# PromptSection Protocol +# --------------------------------------------------------------------------- +@runtime_checkable +class PromptSection(Protocol): + """一个 prompt 段""" + + name: str # 段名(去重用,同名覆盖) + priority: int # 排序优先级(小数字=靠前) + + def render(self, context: "PromptContext") -> str: + """渲染此段的文本内容。返回空字符串表示不注入。""" + ... + + def should_include(self, context: "PromptContext") -> bool: + """是否注入此段(默认 True,条件段可覆盖)。""" + ... + + +# --------------------------------------------------------------------------- +# PromptContext 数据对象 +# --------------------------------------------------------------------------- +@dataclass +class PromptContext: + """Prompt 渲染的统一上下文""" + + task_id: str + title: str + description: str + must_haves: str + project_id: str + agent_id: str + + task: Optional[Dict] = None + role: str = "executor" + spawn_type: str = "executor" + + # mail 专用 + from_agent: str = "" + mail_type: str = "" # inform / request + + # toolchain 专用 + event_type: str = "" # ci_failure / review_request / ... + event_data: Dict = field(default_factory=dict) + action_type: str = "" # 动作分类(review_result / ci_failure / ...) + action_steps: list = field(default_factory=list) # 结构化编号步骤列表 + + # 前序产出 + depends_on_outputs: Optional[List] = None + + +# --------------------------------------------------------------------------- +# PromptComposer 拼装器 +# --------------------------------------------------------------------------- +class PromptComposer: + """有序拼装 prompt sections""" + + SEPARATOR = "\n\n---\n\n" + TOKEN_BUDGET_WARN = 800 # token 预算警告阈值 + CHARS_PER_TOKEN = 3.5 # 估算比率 + + def __init__(self) -> None: + self._sections: List[PromptSection] = [] + + def add(self, section: PromptSection) -> None: + """添加一个 section(同名覆盖)""" + self._sections = [s for s in self._sections if s.name != section.name] + self._sections.append(section) + + def add_many(self, sections: List[PromptSection]) -> None: + """批量添加""" + for s in sections: + self.add(s) + + def compose(self, context: PromptContext) -> str: + """拼装最终 prompt + + 1. 过滤 should_include=False 的段 + 2. 按 priority 排序 + 3. 逐段 render + 4. 过滤空段 + 5. 用分隔符连接 + 6. Token 预算警告(不截断) + """ + active = [s for s in self._sections if s.should_include(context)] + active.sort(key=lambda s: s.priority) + + parts = [s.render(context) for s in active] + parts = [p for p in parts if p.strip()] + + result = self.SEPARATOR.join(parts) + + # Token 估算 + tokens = max(1, int(len(result) / self.CHARS_PER_TOKEN)) + logger.debug( + "Composed prompt from %d sections, %d tokens", + len(parts), tokens, + ) + + if tokens > self.TOKEN_BUDGET_WARN: + logger.warning( + "Prompt exceeds %d token budget: %d tokens (task_id=%s)", + self.TOKEN_BUDGET_WARN, tokens, context.task_id, + ) + + return result diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py new file mode 100644 index 0000000..28451bb --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/spawner.py @@ -0,0 +1,2088 @@ +"""Agent Spawner - 异步 spawn Full Agent / Subagent + +Full Agent: asyncio.create_subprocess_exec(异步非阻塞,不 await 完成) +Subagent: 占位(实际通过 OpenClaw Gateway API sessions_spawn,F17 完善) +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +import uuid +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional + +from src.blackboard.db import get_connection +from src.daemon.task_type_registry import TaskTypeRegistry + +logger = logging.getLogger("moziplus-v2.spawner") + + +# ── Prompt 模板 ── + +# Mail 专用模板:inform 类型(纯通知,状态由系统管理) +MAIL_INFORM_TEMPLATE = """你收到一封飞鸽传书(纯通知)。 + +发件者: {from_agent} +主题: {title} +内容: {text} + +已阅即可。如需回复,用 in_reply_to 回复发件者(不需要填 to)。 +⚠️ 不要执行任何状态转换命令。 +""" + +# Mail 专用模板:request 类型(需要处理并回复,状态由系统管理) +MAIL_REQUEST_TEMPLATE = """你收到一封飞鸽传书,需要你处理并回复。 + +发件者: {from_agent} +主题: {title} +内容: {text} + +### 如何回复发件者 + +curl -s -X POST http://localhost:8083/api/mail \\ + -H 'Content-Type: application/json' \\ + -d '{{"from": "{agent_id}", "in_reply_to": "{task_id}", "title": "回复: {title}", "text": "你的回复内容"}}' + +⚠️ 不需要填 "to",系统自动回复给发件者。 + +### 如何给其他人发新邮件 + +curl -s -X POST http://localhost:8083/api/mail \\ + -H 'Content-Type: application/json' \\ + -d '{{"from": "{agent_id}", "to": "对方agent-id", "title": "标题", "text": "正文", "type": "inform"}}' + +⚠️ to 必须是有效的 agent id: {valid_agents} +⚠️ 纯通知用 type=inform,需要对方回复不填 type(默认 request) +⚠️ 不能给自己发邮件 +⚠️ 不要执行任何状态转换命令(标 working/done/review/failed 等),系统会自动处理。 +""" + +SPAWN_PROMPT_TEMPLATE = """{identity_section} + +## 任务 +{title} +{description} + +项目: {project_id} | ID: {task_id} +类型: {task_type} | 优先级: {priority} +验收标准: {must_haves} + +{retry_context} + +## 你能做什么 +- 读任务详情(含依赖、讨论、产出): GET {api_base}/projects/{project_id}/tasks/{task_id}?expand=all +- 读所有活跃任务: GET {api_base}/projects/{project_id}/tasks?status=working,claimed,review +- 写产出: POST {api_base}/projects/{project_id}/tasks/{task_id}/outputs +- 写评论/交接: POST {api_base}/projects/{project_id}/tasks/{task_id}/comments +- 更新状态: POST {api_base}/projects/{project_id}/tasks/{task_id}/status +- 创建子任务: POST {api_base}/projects/{project_id}/tasks +- 认领任务: POST {api_base}/projects/{project_id}/tasks/{{{{id}}}}/claim + +## 约束 +- 完成后必须写产出物(output)并标 review,不能无产出就提交 +- 失败了标 failed 并写明原因 +- 产出物 handoff comment ≥ 50 字符(用于系统验证) +- 禁止使用 sessions_send 直接发消息(用 Mail API 或黑板 comment) +- 委托他人做事用黑板 comment @agent-id,系统自动路由(如 @zhaoyun-data 你来获取数据,无需手动传 mentions 数组) +- 安全红线: {guardrails_summary} + +### API 请求体示例 +写产出: POST .../outputs +```json +{{{{"agent": "{agent_id}", "content_type": "code", "title": "产出标题", "content_path": "/path/to/file", "summary": "简要说明"}}}} +``` + +写评论: POST .../comments +```json +{{{{"author": "{agent_id}", "body": "评论内容(≥50字符)", "comment_type": "handoff"}}}} +``` +""" + + +DISCUSSION_PROMPT_TEMPLATE = """你被 spawn 来参与黑板讨论。这是一个 v2.9 四相循环的讨论环节。 + +## 你的任务 + +{goal_snapshot} + +## 约束 + +{constraints} + +## 黑板 API + +你可以随时: +- 读黑板:GET http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}?expand=all(含 comments、outputs) +- 写 comment:POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/comments + body: {{"author": "{agent_id}", "body": "内容(@agent-id 自动路由)"}} +- 创建 sub task:POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks + body: {{"title": "...", "description": "...", "task_type": "...", "parent_task": "{task_id}", "must_haves": "{{\"capability\": \"...\"}}"}} +- 认领任务:POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{{sub_task_id}}/claim + +## 行为准则 + +1. **你是自主的。**读黑板、思考、行动,不要等指令。 +2. **不重复别人的工作。**动手前先读黑板看谁在做什么(Separation)。 +3. **保持方向对齐。**你的产出方向和 parent goal 对齐,不确定时 @pangtong-fujunshi(Alignment)。 +4. **产出可共享。**产出写入黑板,让其他人能看到你的成果(Cohesion)。 +5. **不越界。**安全红线不要碰,超出能力的 @ 庞统升级(Boundary)。 +6. **随时讨论。**执行过程中需要协作时 @ 对应 Agent,讨论是灵活的不是固定阶段的。 + +## 讨论完成后 + +- 如果讨论收敛到可执行的任务,直接创建 sub task +- 如果有分歧或不确定,在黑板上写 comment @ 庞统裁决 +- 标记完成: +```bash +curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/status \ + -H 'Content-Type: application/json' \ + -d '{{"status": "done", "agent": "{agent_id}"}}' +``` +""" + + +# Mail 续杯专用模板:不包含状态转换指令(系统自动标 done) +MAIL_RETRY_PROMPT = """你收到一个续杯提醒。你的任务在执行过程中被中断了。 + +发件者: {from_agent} +主题: {title} +续杯次数: 第 {retry_count} 次(上限 {max_retries} 次) + +请检查 session 历史中你之前做了什么,然后继续未完成的工作。 + +⚠️ 不要执行任何状态转换命令(标 working/done/review/failed 等),系统会自动处理。 +⚠️ 如果任务已完成,直接写产出即可,不要调 status API。 +""" + + +class AgentBusyError(Exception): + """Agent 无法 spawn(被占用/冷却/session 锁等) + + #07: reason 字段区分具体原因,便于 dispatcher 层区分处理。 + """ + + def __init__(self, agent_id: str, reason: str = "busy", + detail: Optional[dict] = None): + self.agent_id = agent_id + # counter_blocked / session_locked / session_running / session_compacting / session_stuck + self.reason = reason + self.detail = detail or {} + super().__init__(f"{agent_id}: {reason}") + + +class AgentSpawner: + """Agent spawn 管理""" + + def __init__( + self, + db_path: Optional[Path] = None, + agent_timeout: float = 630.0, + dry_run: bool = False, + api_host: str = "127.0.0.1", + api_port: int = 8083, + bootstrap_builder: Optional[Any] = None, + gateway_timeout: float = 600.0, + max_retries: int = 3, + max_monitor_timeouts: int = 3, + counter: Optional[Any] = None, + ): + """ + Args: + db_path: 项目黑板 DB 路径(用于写 task_attempts) + agent_timeout: Agent 超时秒数 + dry_run: 测试模式,不实际 spawn + api_host: API 地址(供 Agent 回写) + api_port: API 端口(供 Agent 回写) + """ + self.db_path = db_path + self.agent_timeout = agent_timeout + self.dry_run = dry_run + self.api_host = api_host + self.api_port = api_port + self.bootstrap_builder = bootstrap_builder + self.gateway_timeout = gateway_timeout + self.max_retries = max_retries + self.max_monitor_timeouts = max_monitor_timeouts + # v2.7.2: counter 引用(spawn_full_agent 内部 acquire/release) + self.counter = counter + # guardrails: 由 main.py 在初始化后赋值 + self.guardrails = None + + # session 注册表 {session_id: {...}} + self._sessions: Dict[str, Dict[str, Any]] = {} + # B2 compact 等待计数器 {task_id: count} + self._compact_waits: Dict[str, int] = {} + # B1 假死计数器 {task_id: count} + self._stuck_counts: Dict[str, int] = {} + self._valid_agents_cache: Optional[set] = None + + def _load_valid_agents(self) -> set: + """从 config/default.yaml 读取有效 Agent ID 列表(带缓存)""" + if self._valid_agents_cache is not None: + return self._valid_agents_cache + config_path = Path(__file__).parent.parent / "config" / "default.yaml" + if config_path.exists(): + try: + import yaml + with open(config_path) as f: + cfg = yaml.safe_load(f) + profiles = cfg.get("daemon", {}).get("agent_profiles", {}) + if profiles: + self._valid_agents_cache = set(profiles.keys()) + return self._valid_agents_cache + except Exception: + pass + self._valid_agents_cache = { + "zhangfei-dev", "guanyu-dev", "zhaoyun-data", + "jiangwei-infra", "pangtong-fujunshi", "simayi-challenger" + } + return self._valid_agents_cache + + @property + def active_sessions(self) -> Dict[str, Dict[str, Any]]: + """当前活跃的 spawn sessions""" + return {sid: s for sid, s in self._sessions.items() + if s.get("status") == "running"} + + def build_spawn_message( + self, + task_id: str, + title: str, + description: str, + task_type: str = "", + priority: int = 5, + must_haves: str = "", + project_id: str = "", + agent_id: str = "", + current_status: str = "claimed", + retry_context: str = "", + task: Optional[Any] = None, + project_config: Optional[Dict[str, Any]] = None, + spawn_type: str = "executor", # executor | discussion | review + ) -> str: + """构建 Agent spawn 的消息(优先用 BootstrapBuilder,fallback 用模板) + + Args: + current_status: 任务当前状态(动态生成状态机提示) + retry_context: 重试上下文(前轮产出摘要 + 审查意见) + task: Task 对象(BootstrapBuilder 用) + project_config: 项目配置(BootstrapBuilder 用) + spawn_type: spawn 类型(executor=执行, discussion=讨论, review=审查) + """ + # discussion 类型直接用模板(不走 BootstrapBuilder) + if spawn_type == "discussion": + return self._build_discussion_prompt( + task_id, title, description, must_haves, + project_id, agent_id) + + # handler 路径:Task/Mail/Toolchain 用各自的 PromptSection 构建 + handler = TaskTypeRegistry.get_by_project(project_id) + if handler: + from src.daemon.prompt_composer import PromptContext + # 从 must_haves 解析 mail 元数据(from / performative) + from_agent = "" + mail_type = "" + action_type = "" + action_steps = [] + try: + meta = json.loads(must_haves) if must_haves else {} + from_agent = meta.get("from", "") + mail_type = meta.get("performative", meta.get("type", "")) + # toolchain 字段提取 + action_type = meta.get("action_type", "") + action_steps = meta.get("steps", []) + except Exception: + pass + ctx = PromptContext( + task_id=task_id, title=title, description=description or "", + must_haves=must_haves or "", project_id=project_id, + agent_id=agent_id, role=spawn_type, + spawn_type=spawn_type, + from_agent=from_agent, mail_type=mail_type, + action_type=action_type, action_steps=action_steps, + ) + return handler.build_prompt(ctx) + + # 旧路径保留:_general 等非 handler 项目 + + # 走 BootstrapBuilder 新路径 + if self.bootstrap_builder and task is not None: + role_map = { + "executor": "executor", + "review": "reviewer", + "discussion": "planner"} + role = role_map.get(spawn_type, "executor") + bootstrap_prompt = self.bootstrap_builder.build_for_task( + task=task, + role=role, + ) + api_section = self._build_api_section( + project_id, task_id, agent_id) + return bootstrap_prompt + "\n\n---\n\n" + api_section + + # 无 BootstrapBuilder 或无 task 对象 → 最小 fallback + # 只保留任务上下文 + API 操作指令 + logger.warning( + "No BootstrapBuilder or task object, using minimal fallback") + return self._build_minimal_fallback( + task_id, title, description, must_haves, + project_id, agent_id) + + def _build_minimal_fallback(self, task_id, title, description, must_haves, + project_id, agent_id): + """最小 fallback:只有任务上下文 + API 指令""" + task_section = f"""## 任务 +{title} +{description or "(无描述)"} + +项目: {project_id} | ID: {task_id} +验收标准: {must_haves or "(无)"}""" + api_section = self._build_api_section(project_id, task_id, agent_id) + return task_section + "\n\n---\n\n" + api_section + + def _build_api_section(self, project_id: str, task_id: str, + agent_id: str) -> str: + """构建 API 回写操作指令(BootstrapBuilder 模式下补充)""" + # handler 项目(_mail/_toolchain)的 success_status 由 PromptSection 处理 + # 这里只处理无 handler 的项目(normal task) + handler = TaskTypeRegistry.get_by_project(project_id) + if handler: + success_status = '"done"' if handler.target_success_status == "done" else '"review"' + else: + success_status = '"review"' + return f"""## 操作指令 + +### 状态回写 +开始工作: +```bash +curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/tasks/{task_id}/status \ + -H 'Content-Type: application/json' \ + -d '{{"status": "working", "agent": "{agent_id}"}}' +``` + +### 写入产出 +```bash +curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/tasks/{task_id}/outputs \ + -H 'Content-Type: application/json' \ + -d '{{"agent": "{agent_id}", "type": "<类型>", "title": "<标题>", "content": "<内容>", "summary": "<摘要>"}}' +``` + +### 完成后 +成功:status → {success_status} | 失败:status → "failed" +""" + + def _build_discussion_prompt(self, task_id: str, title: str, + description: str, must_haves: str, + project_id: str, agent_id: str) -> str: + """构建讨论类 spawn prompt(§3.3 框架 + Boids)""" + goal_snapshot = description or title + constraints = must_haves or "(无特殊约束)" + + return DISCUSSION_PROMPT_TEMPLATE.format( + goal_snapshot=goal_snapshot, + constraints=constraints, + project_id=project_id, + task_id=task_id, + agent_id=agent_id, + api_host=self.api_host, + api_port=self.api_port, + ) + + def _inject_agent_identity(self, agent_id: str) -> str: + """#03: 注入 Agent 身份+专长""" + caps = "通用" + router = getattr(self, '_router_ref', None) + if router: + profile = router.agent_profiles.get(agent_id) + if profile and getattr(profile, 'capabilities_zh', None): + caps = ", ".join(profile.capabilities_zh) + return f"你是 {agent_id},专长: {caps}。" + + def _get_guardrails_summary(self) -> str: + """#03: 从 GuardrailEngine 提取红线摘要""" + if not self.guardrails: + return "无特殊限制" + try: + return "、".join(r.get("name", r.get("rule_id", "")) + for r in self.guardrails.rules[:6]) + except Exception: + return "无特殊限制" + + def _get_agent_profile(self, agent_id: str): + """获取 Agent 能力画像""" + router = getattr(self, '_router_ref', None) + if router: + return router.agent_profiles.get(agent_id) + return None + + def _build_mail_prompt(self, task_id: str, title: str, description: str, + must_haves: str, agent_id: str) -> str: + """构建 Mail 专用精简模板""" + # 解析 must_haves 获取 from 和 performative + from_agent = agent_id + performative = "request" + try: + meta = json.loads(must_haves) if must_haves else {} + from_agent = meta.get("from", agent_id) + performative = meta.get( + "performative", meta.get( + "type", "request")) + except Exception: + pass + + # 截断 title 和 text 用于模板安全 + safe_title = (title or "").replace('"', '\\"')[:100] + safe_text = (description or "").replace('"', '\\"') + + # 获取有效 Agent 列表(从 config/default.yaml 读取) + valid_agents_list = self._load_valid_agents() + valid_agents_str = " / ".join(sorted(valid_agents_list)) + + common_kwargs = dict( + from_agent=from_agent, + title=safe_title, + text=safe_text, + task_id=task_id, + agent_id=agent_id, + api_host=self.api_host, + api_port=self.api_port, + valid_agents=valid_agents_str, + ) + + if performative == "inform": + return MAIL_INFORM_TEMPLATE.format(**common_kwargs) + else: + return MAIL_REQUEST_TEMPLATE.format(**common_kwargs) + + async def spawn_full_agent( + self, + agent_id: str, + message: str, + new_session: bool = False, + task_id: Optional[str] = None, + on_complete: Optional[Any] = None, + use_main_session: bool = False, + task_db_path: Optional[Path] = None, + reuse_session_id: Optional[str] = None, + on_checks_passed: Optional[Any] = None, + skip_counter: bool = False, + broadcast_task_ids: Optional[List[str]] = None, + ) -> str: + """Spawn Full Agent(异步非阻塞) + + v2.7.2: counter acquire/release 在内部统一管理。 + 调用级生命周期:spawn 时 acquire,进程退出时 release(通过 wrapped_on_complete)。 + + Args: + on_complete: 业务回调(agent_id, outcome) - 不含 counter.release, + counter.release 由内部 wrapped_on_complete 保证。 + use_main_session: True = 投递到主 Agent session(不传 --session-id) + on_checks_passed: 所有检查通过后的回调(session check + counter acquire 后、subprocess 前) + reuse_session_id: 传入指定 session-id 复用(用于续杯) - deprecated,use_main_session=True 已替代 + + Returns: + session_id + + Raises: + AgentBusyError: agent 被 counter 占用或冷却中 + """ + # ── #07 Acquire-First: counter 前置 → session check 在锁内贴近 spawn ── + + # Step 0: 分配 session_id(纯计算,无 IO) + if use_main_session: + session_id = None + elif reuse_session_id: + session_id = reuse_session_id + else: + session_id = str(uuid.uuid4()) + _sid_key = session_id or "main" # counter 用的 key + + # Phase 0: Pre-acquire 修复(无锁) + # timeout/failed 状态先修复再 acquire。revive 只改 running→idle,幂等安全。 + # asyncio 协作式并发保证同一时刻只有一个协程在执行,revive 的 sessions.json + # 写操作不会真正并行。 + if use_main_session: + pre_state = self._check_session_state(agent_id) + if pre_state.get("status") in ("timeout", "failed"): + logger.info("Phase 0: %s status=%s, reviving before acquire", + agent_id, pre_state["status"]) + self._revive_session(agent_id) + elif pre_state.get("status") == "running" and not pre_state.get("lock_pid_alive"): + # status=running 但 lock PID 已死 → 假死,revive + logger.warning( + "Phase 0: %s status=running but lock PID dead, reviving", + agent_id) + self._revive_session(agent_id) + + # Phase 1: Counter acquire(互斥锁) + # v2.8.1 Bug-4 fix: retry 时跳过 counter(counter 从原始 spawn 保持到 retry 完成) + if self.counter and not skip_counter: + acquired = await self.counter.acquire(agent_id, _sid_key) + if not acquired: + raise AgentBusyError(agent_id, reason="counter_blocked") + + # Phase 2: Session check(在锁保护下,贴近 spawn) + # 并列收集所有 block 原因,统一判定。 + if use_main_session: + session_state = self._check_session_state(agent_id) + logger.info("Phase 2 session check for %s: status=%s lock_pid=%s lock_pid_alive=%s compact=%s", + agent_id, session_state.get( + 'status'), session_state.get('lock_pid'), + session_state.get('lock_pid_alive'), session_state.get('recent_compact')) + + blockers = [] + if session_state.get( + "lock_pid_alive") and not session_state.get("lock_expired"): + blockers.append( + ("session_locked", session_state.get("lock_pid"))) + if session_state.get("status") == "running": + if session_state.get("lock_pid_alive"): + # 真 running:外部进程占用 + blockers.append(("session_running", None)) + else: + # 假 running:lock PID 死了但 status 还在 running → Phase 2.5 处理 + pass + if session_state.get("recent_compact"): + blockers.append(("session_compacting", None)) + + if blockers: + # 释放 counter,报具体原因 + if self.counter and not skip_counter: + self.counter.release(agent_id, _sid_key) + primary_reason, primary_detail = blockers[0] + logger.info("Phase 2 blocked %s: %s (all=%s)", + agent_id, primary_reason, blockers) + raise AgentBusyError(agent_id, reason=primary_reason, + detail={"blockers": blockers}) + + # Phase 2.5: 假死修复(status=running + lock PID 死 → revive → 重检) + # 此场景应被 Phase 0 提前修复,这里做兜底 + if session_state.get("status") == "running" and not session_state.get( + "lock_pid_alive"): + logger.warning("Phase 2.5: %s status=running + lock dead (should be caught in Phase 0), reviving", + agent_id) + self._revive_session(agent_id) + session_state = self._check_session_state(agent_id) + if session_state.get("status") == "running": + if self.counter and not skip_counter: + self.counter.release(agent_id, _sid_key) + raise AgentBusyError(agent_id, reason="session_stuck", + detail={"status": "running after revive"}) + + # Phase 3: on_checks_passed 回调 + # 注意:如果回调抛异常,counter 已 acquire 但 subprocess 未启动, + # wrapped_on_complete 不会执行。需在此 try/except 中手动 release。 + if on_checks_passed: + try: + on_checks_passed() + except Exception: + if self.counter and not skip_counter: + self.counter.release(agent_id, _sid_key) + raise + + if self.dry_run: + logger.info( + "[DRY RUN] Would spawn agent %s (session=%s)", + agent_id, + _sid_key) + self._register_session(_sid_key, agent_id, task_id, pid=None) + return _sid_key + + # 4. wrapped_on_complete 保证 counter release(闭包捕获 _sid_key) + async def _wrapped_on_complete(aid, outcome): + try: + if self.counter: + self.counter.release(aid, _sid_key) + finally: + if on_complete: + try: + result = on_complete(aid, outcome) + if asyncio.iscoroutine(result): + await result + except Exception: + logger.warning( + "Business on_complete failed for %s", aid, exc_info=True) + + cmd = [ + "openclaw", "agent", + "--agent", agent_id, + ] + if session_id: + cmd.extend(["--session-id", session_id]) + cmd.extend([ + "--message", message, + "--json", + "--timeout", str(int(self.gateway_timeout)), + ]) + + try: + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + self._register_session(session_id, agent_id, task_id, proc.pid, + broadcast_task_ids=broadcast_task_ids) + logger.info("Spawned agent %s (session=%s, pid=%d)", + agent_id, session_id, proc.pid) + + # Schedule monitor(传 wrapped_on_complete) + asyncio.create_task( + self._monitor_process(session_id, proc, agent_id, task_id, + on_complete=_wrapped_on_complete, + db_path=task_db_path or self.db_path) + ) + + return session_id + + except Exception as e: + # spawn 失败也要 release counter + if self.counter: + self.counter.release(agent_id, _sid_key) + logger.exception("Failed to spawn agent %s", agent_id) + self._record_attempt( + task_id, + agent_id, + "spawn_failed", + error=str(e)) + raise + + async def spawn_subagent( + self, + task_description: str, + task_id: Optional[str] = None, + ) -> str: + """Spawn Subagent(占位,实际通过 Gateway API) + + Returns: + session_id + """ + session_id = str(uuid.uuid4()) + + if self.dry_run: + logger.info( + "[DRY RUN] Would spawn subagent (session=%s)", + session_id) + self._register_session(session_id, "subagent", task_id, pid=None) + return session_id + + # TODO: F17 通过 Gateway API sessions_spawn 实现 + logger.info("Subagent spawn (session=%s) - placeholder", session_id) + self._register_session(session_id, "subagent", task_id, pid=None) + return session_id + + # ── 续杯 Prompt 模板 ── + + RETRY_PROMPT = """你收到一个续杯提醒。你的任务在执行过程中被中断了。 + +## 任务信息 + +- 项目: {project_id} +- 任务ID: {task_id} +- 标题: {title} +- 续杯次数: 第 {retry_count} 次(上限 {max_retries} 次) + +请检查 session 历史中你之前做了什么,然后继续未完成的工作。 + +## 操作指令 + +### 查看任务当前状态 +```bash +curl http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}?expand=all +``` + +### 如果已经完成,标记 review +```bash +curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/status \\ + -H 'Content-Type: application/json' \\ + -d '{{"status": "review", "agent": "{agent_id}"}}' +``` + +### 写入产出(如果之前没写) +```bash +curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/outputs \\ + -H 'Content-Type: application/json' \\ + -d '{{"agent": "{agent_id}", "type": "<类型>", "title": "<标题>", "content": "<内容>", "summary": "<摘要>"}}' +``` + +### 如果无法解决,标记失败 +```bash +curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_id}/status \\ + -H 'Content-Type: application/json' \\ + -d '{{"status": "failed", "agent": "{agent_id}", "detail": "<失败原因>"}}' +``` + +{fallback_hint}""" + + async def _monitor_process( + self, + session_id: Optional[str], + proc: asyncio.subprocess.Process, + agent_id: str, + task_id: Optional[str], + on_complete: Optional[Any] = None, + db_path: Optional[Path] = None, + monitor_timeout_count: int = 0, + ) -> None: + """监控子进程全生命周期(设计文档 spawner-monitor-design.md)""" + stdout_chunks: list = [] + stderr_chunks: list = [] + + try: + # ── 等待进程退出 + 流式读取 ── + async def _read_streams(): + async def _read_out(): + while True: + chunk = await proc.stdout.read(4096) + if not chunk: + break + stdout_chunks.append(chunk) + + async def _read_err(): + while True: + chunk = await proc.stderr.read(4096) + if not chunk: + break + stderr_chunks.append(chunk) + + await asyncio.gather(_read_out(), _read_err(), proc.wait()) + + await asyncio.wait_for(_read_streams(), timeout=self.agent_timeout) + # ── 情况 A:进程退出 ── + exit_code = proc.returncode + await self._handle_exit( + session_id, agent_id, task_id, exit_code, + stdout_chunks, stderr_chunks, on_complete, db_path + ) + + except asyncio.TimeoutError: + # ── 情况 B:monitor timeout(进程没退出)── + logger.warning("Agent %s monitor timeout (session=%s, count=%d/%d)", + agent_id, session_id, monitor_timeout_count + 1, + self.max_monitor_timeouts) + await self._handle_monitor_timeout( + session_id, agent_id, task_id, proc, + on_complete, db_path, stderr_chunks, monitor_timeout_count + ) + + async def _handle_exit(self, session_id, agent_id, task_id, exit_code, + stdout_chunks, stderr_chunks, on_complete, db_path): + """情况 A:进程退出后的处理 + + v2.7.2: 进程退出 = counter release(由 on_complete = wrapped_on_complete 保证)。 + 只有 A2/A3(gateway_timeout)触发续杯,其他都不 retry。 + A9(api_error/429)额外推回 pending + 设冷却。 + """ + stdout_text = b"".join(stdout_chunks).decode("utf-8", errors="replace") + stderr_text = b"".join(stderr_chunks).decode("utf-8", errors="replace") + + # 解析 stdout JSON + json_result = self._parse_stdout_json(stdout_text) + logger.info("Parsed JSON result for agent=%s session=%s: %s", + agent_id, session_id, json_result) + + # 查任务实际状态 + task_status = self._get_task_status( + db_path, task_id) if task_id else None + + # 分类 + cls = self._classify_outcome( + exit_code, + json_result, + stderr_text, + task_status, + stdout_text) + outcome = cls["outcome"] + + # 更新 session 状态 + sid = session_id or "main" + if sid in self._sessions: + self._sessions[sid]["status"] = outcome + self._sessions[sid]["completed_at"] = datetime.utcnow().isoformat() + self._sessions[sid]["exit_code"] = exit_code + if json_result: + self._sessions[sid]["meta"] = json_result + + # 记录 attempt + self._record_attempt( + task_id, agent_id, outcome, exit_code=exit_code, + db_path=db_path, + metadata={ + "status": json_result.get("status"), + "summary": json_result.get("summary"), + "fallback_used": json_result.get("fallback_used"), + "fallback_reason": json_result.get("fallback_reason"), + "task_status_at_exit": task_status, + } + ) + + logger.info("Agent %s finished (session=%s, outcome=%s, exit=%d, task_status=%s)", + agent_id, session_id, outcome, exit_code, task_status) + + # 广播反馈追踪(Phase 1 bug fix) + if task_id == "broadcast" and hasattr( + self, '_ticker') and self._ticker: + # 广播任务:从 session 信息取真实 task_id 列表,逐一回调 tracker + sess_info = self._sessions.get(session_id or "main", {}) + bt_ids = sess_info.get("broadcast_task_ids") or [] + # 广播场景一律标 no_reply:Agent 只 claim 一个任务, + # 其余任务的 tracker 不能被 claimed 清除 + for real_task_id in bt_ids: + self._ticker.record_broadcast_response( + real_task_id, agent_id, "no_reply") + elif task_id and hasattr(self, '_ticker') and self._ticker: + outcome_str = "claimed" if cls.get( + "status") == "ok" else "no_reply" + self._ticker.record_broadcast_response( + task_id, agent_id, outcome_str) + + if cls["should_retry"]: + # cooldown: 新增的可恢复场景(A14/A15/A16/A8/A10) + cooldown_seconds = cls.get("cooldown_seconds", 0) + if cooldown_seconds and self.counter: + self.counter.set_cooldown(agent_id, seconds=cooldown_seconds) + # A2/A3: gateway_timeout → 续杯(on_complete 会 release counter) + await self._do_retry( + session_id, agent_id, task_id, on_complete, db_path, + cls.get("retry_field", "retry_count") + ) + elif outcome == "api_error": + # A9: [DEPRECATED] api_error 已改为 should_retry=True 走续杯路径。 + # 此分支理论上不再命中,保留作为安全兜底。 + # A9: 429/API 错误 → release counter(on_complete)+ 推回 pending + 冷却 + # 有上限:api_retry_count 累计达 max_retries 则标 failed + await self._do_on_complete_async(on_complete, agent_id, outcome) + if self.counter: + self.counter.set_cooldown(agent_id) + if db_path and task_id: + retry_counts = self._get_retry_counts(db_path, task_id) + api_count = retry_counts.get("api_retry_count", 0) + 1 + retry_counts["api_retry_count"] = api_count + self._update_retry_counts(db_path, task_id, retry_counts) + if api_count >= self.max_retries: + logger.error("Task %s api_retry_count=%d >= max_retries, marking failed", + task_id, api_count) + self._mark_task(db_path, task_id, "failed", { + "reason": "max_api_retry_count", "count": api_count, + }) + else: + self._mark_task(db_path, task_id, "pending", { + "reason": "api_error_retry", + "api_retry_count": api_count, + }) + logger.info("Task %s pushed back to pending (api_error, api_retry=%d/%d)", + task_id, api_count, self.max_retries) + elif outcome == "fallback_timeout" and not cls["should_retry"]: + # A3/A3b: fallback 分级处理 + # fallback_count 从 task_attempts.metadata 读取, + # 达 max_retries 标 failed(A3),否则 retry + cooldown(A3b) + fallback_count = 0 + if db_path and task_id: + retry_counts = self._get_retry_counts(db_path, task_id) + fallback_count = retry_counts.get("fallback_count", 0) + 1 + retry_counts["fallback_count"] = fallback_count + self._update_retry_counts(db_path, task_id, retry_counts) + + if fallback_count >= self.max_retries: + # A3: 连续 fallback 达上限,标 failed + logger.error("A3 fallback exhausted: agent=%s session=%s task=%s " + "fallback_count=%d reason=%s", + agent_id, session_id, task_id, fallback_count, + json_result.get("fallback_reason")) + await self._do_on_complete_async(on_complete, agent_id, outcome) + if db_path and task_id: + self._mark_task(db_path, task_id, "failed", { + "reason": "fallback_exhausted", + "fallback_count": fallback_count, + "fallback_reason": json_result.get("fallback_reason"), + }) + else: + # A3b: fallback 未达上限,retry + cooldown + logger.warning("A3b fallback retry: agent=%s session=%s task=%s " + "fallback_count=%d/%d reason=%s", + agent_id, session_id, task_id, fallback_count, + self.max_retries, json_result.get("fallback_reason")) + if self.counter: + self.counter.set_cooldown(agent_id, seconds=60) + await self._do_retry( + session_id, agent_id, task_id, on_complete, db_path, + "fallback_retry_count" # 独立计数,不与 gateway_timeout 的 retry_count 共用 + ) + else: + # 其他:A1(completed), A4(agent_failed), A7(auth_failed), + # A8(gateway_unreachable), A11(lock_conflict), + # A10(compact_failed), A12(agent_error) + # v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间 + if outcome == "crashed" and self.counter: + self.counter.set_cooldown(agent_id, seconds=60) + logger.info( + "Crash cooldown set for %s: 60s (outcome=%s)", + agent_id, + outcome) + elif outcome in ("compact_failed", "process_crash", "session_stuck", + "compact_hanging", "agent_error", "compact_interrupted") and self.counter: + self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟 + logger.info( + "Error cooldown set for %s: 300s (outcome=%s)", + agent_id, + outcome) + # F1: 不可恢复 outcome → 立刻标 failed + 写黑板 + if outcome in ("auth_failed", + "agent_error") and db_path and task_id: + logger.error( + "Task %s: unrecoverable outcome=%s, marking failed immediately", + task_id, + outcome) + self._mark_task(db_path, task_id, "failed", { + "reason": outcome, + "stderr_preview": (stderr_text or "")[:500], + }) + # 注意: cooldown 期间任务状态仍为 working,但 counter 已释放。 + # DB 中的 working 是"假 working"——ticker 不会重新分配,_check_timeouts 会 + # 在 cooldown 结束后回收。如果 ticker 在此期间给同一 agent 分配新任务,属正常行为。 + # 进程退出 → on_complete release counter + # 任务状态由各 outcome 自行处理(或等 ticker) + await self._do_on_complete_async(on_complete, agent_id, outcome) + + async def _handle_monitor_timeout(self, session_id, agent_id, task_id, proc, + on_complete, db_path, stderr_chunks, + monitor_timeout_count): + """情况 B:monitor timeout""" + # 读已缓冲的 stderr + try: + remaining = await asyncio.wait_for(proc.stderr.read(), timeout=2.0) + if remaining: + stderr_chunks.append(remaining) + except Exception: + pass + + # stderr collected but not used in this handler + # (kept for potential future diagnostics) + b"".join(stderr_chunks).decode("utf-8", errors="replace") + + # 检查 session 状态 + state = self._check_session_state(agent_id) + + # B1: 假死 - 先复活,连续假死 ≥2 次再 failed + if state.get("status") == "running" and not state.get( + "lock_pid_alive", True): + # 假死计数 + stuck_count = self._stuck_counts.get(task_id, 0) + 1 + self._stuck_counts[task_id] = stuck_count + + if stuck_count >= 2: + # 连续假死 ≥2 次,标 failed + logger.error("Agent %s session stuck %d times (session=%s, lock PID dead)", + agent_id, stuck_count, session_id) + self._mark_task(db_path, task_id, "failed", + {"reason": "session_stuck", "stuck_count": stuck_count, + "diagnostics": state}) + await self._do_on_complete_async(on_complete, agent_id, "session_stuck") + return + + # 第 1 次假死 → 尝试复活 + logger.warning("Agent %s session stuck (attempt %d), reviving (session=%s)", + agent_id, stuck_count, session_id) + revived = self._revive_session(agent_id) + if revived: + logger.info("Agent %s session revived, releasing counter for ticker re-dispatch", + agent_id) + # release counter → 任务保持 working → ticker 下次 re-dispatch + await self._do_on_complete_async(on_complete, agent_id, "session_revived") + else: + # 复活失败 → 标 failed + logger.error( + "Agent %s revive failed, marking failed", agent_id) + self._mark_task(db_path, task_id, "failed", + {"reason": "revive_failed", "stuck_count": stuck_count, + "diagnostics": state}) + await self._do_on_complete_async(on_complete, agent_id, "revive_failed") + return + + # B2/B3/B4: 进程还活着 + # B2: compact 进行中 - 不计入 monitor timeout 计数,继续等 + if state.get("recent_compact"): + logger.info("Agent %s recent compaction detected, extending patience " + "(session=%s, monitor=%d/%d)", + agent_id, session_id, monitor_timeout_count, self.max_monitor_timeouts) + # 不递增 monitor_timeout_count,但最多额外等 max_monitor_timeouts 次 + # 用独立计数器防止无限等待 + compact_wait_count = self._compact_waits.get(task_id, 0) + 1 + self._compact_waits[task_id] = compact_wait_count + if compact_wait_count >= self.max_monitor_timeouts: + # #07.3 ACT-2: compact_hanging 不标 failed,只 release counter + # 进程还活着但不 monitor,等 ticker _check_timeouts 超时回收 → 重新 dispatch + logger.warning("Agent %s compact hanging after %d waits, releasing counter for ticker re-dispatch", + agent_id, compact_wait_count) + self._compact_waits.pop(task_id, None) + await self._do_on_complete_async(on_complete, agent_id, "compact_hanging") + return + # 继续等 + asyncio.create_task( + self._monitor_process( + session_id, proc, agent_id, task_id, + on_complete=on_complete, db_path=db_path, + monitor_timeout_count=monitor_timeout_count, + ) + ) + return + + # B3/B4: 无 compact,正常计数 + monitor_timeout_count += 1 + if monitor_timeout_count >= self.max_monitor_timeouts: + logger.error("Agent %s max monitor timeouts (session=%s, count=%d)", + agent_id, session_id, monitor_timeout_count) + self._mark_task(db_path, task_id, "failed", { + "reason": "max_monitor_timeouts", + "count": monitor_timeout_count, + "elapsed_seconds": monitor_timeout_count * int(self.agent_timeout), + "diagnostics": state, + }) + await self._do_on_complete_async(on_complete, agent_id, "max_monitor_timeouts") + return + + # 未超限:继续等(不 release counter) + logger.info("Agent %s continuing monitor (session=%s, count=%d/%d)", + agent_id, session_id, monitor_timeout_count, self.max_monitor_timeouts) + asyncio.create_task( + self._monitor_process( + session_id, proc, agent_id, task_id, + on_complete=on_complete, db_path=db_path, + monitor_timeout_count=monitor_timeout_count, + ) + ) + + async def _do_retry(self, session_id, agent_id, task_id, on_complete, + db_path, retry_field="retry_count"): + """续杯:手动 release counter 后通过 spawn_full_agent 重新 spawn + + v2.7.2: 进程已退出但 wrapped_on_complete 未被调用(只有 should_retry 分支走到这里)。 + 需要手动 release counter,然后 spawn_full_agent 内部会 acquire。 + on_complete(含 counter release)置为 None,避免 double release。 + """ + # v2.8.1 Bug-4 fix: 不再手动 release counter + 置 None on_complete + # counter 从原始 spawn 保持到 retry 完成,避免窗口期 ticker acquire 同一 agent + # on_complete 保留原始 wrapped_on_complete,retry 完成后自然 release counter + + # 续杯前检查任务状态,已终态则跳过 + if db_path and task_id: + try: + conn = get_connection(db_path) + try: + row = conn.execute( + "SELECT status FROM tasks WHERE id=?", (task_id,) + ).fetchone() + # Bug-6 fix: pending 不是终态 + if row and row["status"] in ( + "done", "failed", "cancelled", "review"): + logger.info("Retry skip: task %s already %s (agent=%s)", + task_id, row["status"], agent_id) + # on_complete = wrapped_on_complete,会 release counter + await self._do_on_complete_async(on_complete, agent_id, "task_already_done") + return + finally: + conn.close() + except Exception: + logger.warning( + "Retry status check failed for %s, proceeding", task_id) + + # 直接读写 tasks 表的 retry_count + if retry_field == "retry_count" and db_path and task_id: + try: + conn = get_connection(db_path) + try: + conn.execute("BEGIN IMMEDIATE") + conn.execute( + "UPDATE tasks SET retry_count = COALESCE(retry_count, 0) + 1 WHERE id=?", + (task_id,), + ) + conn.commit() + row = conn.execute( + "SELECT retry_count FROM tasks WHERE id=?", (task_id,) + ).fetchone() + count = row["retry_count"] if row else 1 + finally: + conn.close() + except Exception: + logger.exception( + "Failed to update retry_count for task %s", task_id) + count = 1 + else: + retry_counts = self._get_retry_counts(db_path, task_id) + count = retry_counts.get(retry_field, 0) + 1 + retry_counts[retry_field] = count + self._update_retry_counts(db_path, task_id, retry_counts) + + if count >= self.max_retries: + logger.error("Agent %s max retries (session=%s, %s=%d)", + agent_id, session_id, retry_field, count) + self._mark_task(db_path, task_id, "failed", { + "reason": f"max_{retry_field}", "count": count, + }) + await self._do_on_complete_async(on_complete, agent_id, "max_retries") + return + + logger.info("Agent %s retry %s=%d/%d (session=%s)", + agent_id, retry_field, count, self.max_retries, session_id) + + # 构建续杯 message(Mail 用专用模板,Task 用标准模板) + task_info = self._get_task_info(db_path, task_id) or {} + project_id = task_info.get("project_id", "") + handler = TaskTypeRegistry.get_by_project(project_id) + is_handler = handler is not None + + if is_handler: + must_haves = task_info.get("must_haves", "{}") + try: + meta = json.loads(must_haves) if must_haves else {} + except Exception: + meta = {} + message = MAIL_RETRY_PROMPT.format( + from_agent=meta.get("from", "unknown"), + title=task_info.get("title", ""), + retry_count=count, + max_retries=self.max_retries, + ) + else: + fallback_hint = "\n⚠️ 之前有 fallback 执行,请调 API 检查任务当前状态和已有产出,确认是否已完成。" if retry_field == "retry_count" else "" + message = self.RETRY_PROMPT.format( + project_id=project_id, + task_id=task_id or "", + title=task_info.get("title", ""), + retry_count=count, + max_retries=self.max_retries, + api_host=self.api_host, + api_port=self.api_port, + agent_id=agent_id, + fallback_hint=fallback_hint, + ) + + # v2.7.2: 通过 spawn_full_agent 重新 spawn(内部 can_acquire + acquire) + # on_complete = wrapped_on_complete(含 counter release),作为业务回调传入 + try: + await self.spawn_full_agent( + agent_id=agent_id, + message=message, + task_id=task_id, + on_complete=on_complete, + use_main_session=True, # #02: 续杯走 main session + task_db_path=db_path, + skip_counter=True, # Bug-4 fix: counter 已在原始 spawn 中持有 + ) + except AgentBusyError as e: + # #07.3 ACT-3: session busy(compact/lock/running)= 暂时性阻塞 + # release counter → 任务保持 working → ticker 重新 dispatch + logger.warning("Retry spawn deferred: %s session busy (%s), releasing counter for ticker re-dispatch", + agent_id, e.reason) + await self._do_on_complete_async(on_complete, agent_id, "retry_session_busy") + except Exception: + logger.exception("Retry spawn failed for %s", agent_id) + await self._do_on_complete_async(on_complete, agent_id, "retry_spawn_failed") + + # ── 辅助方法 ── + + @staticmethod + def _parse_stdout_json(stdout_text: str) -> dict: + """解析 openclaw agent --json 的 stdout 输出 + + 返回可直接使用的字段:status, summary, fallback_used, fallback_reason, payloads + 不再提取 meta,直接用顶层字段。 + """ + text = stdout_text.strip() + if not text: + return {"status": None, "summary": None, "fallback_used": False, + "fallback_reason": None, "payloads": []} + try: + data = json.loads(text) + except json.JSONDecodeError: + # 多行输出,找最后一个 JSON + for line in reversed(text.splitlines()): + try: + data = json.loads(line) + break + except json.JSONDecodeError: + continue + else: + return {"status": None, "summary": None, "fallback_used": False, + "fallback_reason": None, "payloads": []} + + # 从 data.result.meta.executionTrace 取 fallback 信息 + result = data.get("result", {}) + meta = result.get("meta", {}) + trace = meta.get("executionTrace", {}) + + return { + "status": data.get("status"), + "summary": data.get("summary"), + "fallback_used": trace.get("fallbackUsed", False), + "fallback_reason": trace.get("fallbackReason"), + "payloads": result.get("payloads", []), + } + + @staticmethod + def _get_task_status( + db_path: Optional[Path], task_id: Optional[str]) -> Optional[str]: + """查任务实际 API 状态""" + if not db_path or not task_id: + return None + try: + conn = get_connection(db_path) + try: + row = conn.execute( + "SELECT status FROM tasks WHERE id=?", (task_id,) + ).fetchone() + return row["status"] if row else None + finally: + conn.close() + except Exception: + return None + + @staticmethod + def _get_task_info(db_path: Optional[Path], + task_id: Optional[str]) -> Optional[dict]: + """查任务基本信息""" + if not db_path or not task_id: + return None + try: + conn = get_connection(db_path) + try: + row = conn.execute( + "SELECT id, title, status FROM tasks WHERE id=?", ( + task_id,) + ).fetchone() + if not row: + return None + info = dict(row) + # 从 db_path 推断 project_id: data//blackboard.db + info["project_id"] = db_path.parent.name + return info + finally: + conn.close() + except Exception: + return None + + @staticmethod + def _revive_session(agent_id: str) -> bool: + """假死复活术:修改 sessions.json status 从 running 改为 idle""" + sessions_path = Path(os.environ.get( + "OPENCLAW_HOME", str(Path.home() / ".openclaw") + )) / "agents" / agent_id / "sessions" / "sessions.json" + if not sessions_path.exists(): + return False + try: + with open(sessions_path) as f: + sessions = json.load(f) + main_key = f"agent:{agent_id}:main" + main_session = sessions.get(main_key, {}) + if main_session.get("status") != "running": + return False # 不是 running 状态,不需要复活 + main_session["status"] = "idle" + sessions[main_key] = main_session + with open(sessions_path, "w") as f: + json.dump(sessions, f, indent=2) + logger.info( + "Revived %s: sessions.json status changed running→idle", + agent_id) + # #07 O4: 同时清理残留 lock 文件 + sf = main_session.get("sessionFile", "") + if sf: + lock_path = Path(sf + ".lock") + if lock_path.exists(): + try: + lock_path.unlink() + logger.info( + "Cleaned stale lock for %s: %s", + agent_id, + lock_path.name) + except Exception: + pass + return True + except Exception: + logger.exception("Failed to revive %s", agent_id) + return False + + # deprecated: §24 v3, 保留供方案 B 备选 + @staticmethod + def _get_recent_gateway_logs() -> list: + """获取当天和昨天的 gateway 日志路径。 + + 日志路径通过 OPENCLAW_LOG_DIR 环境变量配置,默认 /tmp/openclaw。 + 文件名格式:openclaw-{YYYY-MM-DD}.log + """ + from datetime import timedelta + log_dir = os.environ.get("OPENCLAW_LOG_DIR", "/tmp/openclaw") + now_local = datetime.now() + today = now_local.strftime("%Y-%m-%d") + yesterday = (now_local - timedelta(days=1)).strftime("%Y-%m-%d") + paths = [] + for d in [today, yesterday]: + p = os.path.join(log_dir, f"openclaw-{d}.log") + if os.path.exists(p): + paths.append(p) + return paths + + # deprecated: §24 v3, 保留供方案 B 备选(旧 rotation 结束标记检测,已被 v5 取代) + @staticmethod + def _check_compact_in_progress_gateway( + session_key: str, window_seconds: int = 120) -> bool: + """§24 v3 rotation-only: 检查 gateway 日志,判断指定 session 是否刚完成 compact。 + + 检测逻辑:读日志尾部 2MB,按目标 sessionKey 过滤, + 找最后一个 rotation 事件,如果在窗口内 → compact 可能仍在 retry 循环中。 + """ + from datetime import datetime as _dt, timezone as _tz, timedelta + log_paths = AgentSpawner._get_recent_gateway_logs() + if not log_paths: + return False + + now = _dt.now(_tz.utc) + window_start = now - timedelta(seconds=window_seconds) + + last_rotation_time = None + + for log_path in log_paths: + if not os.path.exists(log_path): + continue + try: + with open(log_path, "rb") as f: + f.seek(0, 2) + size = f.tell() + f.seek(max(0, size - 2 * 1024 * 1024)) + tail = f.read().decode("utf-8", errors="replace") + except Exception: + continue + + for line in tail.splitlines(): + if not line.strip(): + continue + try: + obj = json.loads(line) + except (json.JSONDecodeError, ValueError): + continue + + msg = obj.get("message", "") + # 只看包含目标 sessionKey 的事件 + if session_key not in msg: + continue + + # rotation 事件 + if "[compaction] rotated active transcript" in msg: + ts_str = obj.get("time", "") + if ts_str: + try: + event_time = _dt.fromisoformat( + ts_str.replace("Z", "+00:00")) + # timezone-aware: normalize to UTC + if event_time.tzinfo is None: + event_time = event_time.replace(tzinfo=_tz.utc) + if last_rotation_time is None or event_time > last_rotation_time: + last_rotation_time = event_time + except (ValueError, TypeError): + continue + + if last_rotation_time is not None: + return last_rotation_time >= window_start + + return False + + # ─── v5: compact 开始标记检测(gateway log)+ 结束标记检测(jsonl) ─── + + @staticmethod + def _find_compact_start_in_gateway_log( + agent_id: str, window_seconds: int = 900) -> Optional[str]: + """v5: 检查 gateway 日志,找最近的 compact 开始标记。 + + 只检测 precheck 路径:message 含 "[context-overflow-precheck]" 且 + "route=compact_then_truncate"。原因: + - overflow 标记("attempting auto-compaction")不含 sessionKey, + 被 `session_key not in msg` 前置过滤跳过,是死代码。 + - timeout 标记推测同理不含 sessionKey。 + - precheck 标记含 sessionKey 且实测总在 overflow 之前触发(同一 compact + 事件,precheck 先检测到,overflow 是 fallback),所以 precheck 已覆盖 + overflow 场景。 + - threshold/manual 触发的 compact 无开始标记(静默执行),依赖 + counter+lock+status 保护,不需要 gateway 日志检测。 + + 超时兜底:开始标记超过 window_seconds(默认 15 分钟)自动忽略。 + + 返回最近一个开始标记的 UTC ISO 时间字符串(带 Z 后缀),或 None。 + """ + from datetime import datetime as _dt, timezone as _tz, timedelta + log_paths = AgentSpawner._get_recent_gateway_logs() + if not log_paths: + return None + + session_key = f"agent:{agent_id}:main" + now = _dt.now(_tz.utc) + window_start = now - timedelta(seconds=window_seconds) + + latest_start_time = None # type: Optional[_dt] + latest_start_str = None # type: Optional[str] + + for log_path in log_paths: + if not os.path.exists(log_path): + continue + try: + with open(log_path, "rb") as f: + f.seek(0, 2) + size = f.tell() + f.seek(max(0, size - 2 * 1024 * 1024)) + tail = f.read().decode("utf-8", errors="replace") + except Exception: + continue + + for line in tail.splitlines(): + if not line.strip(): + continue + try: + obj = json.loads(line) + except (json.JSONDecodeError, ValueError): + continue + + msg = obj.get("message", "") + if session_key not in msg: + continue + + # 只检测 precheck 路径:route=compact_then_truncate + # overflow/timeout 标记不含 sessionKey,被前置过滤跳过(死代码),已删除 + if ("[context-overflow-precheck]" not in msg + or "route=compact_then_truncate" not in msg): + continue + + # 解析时间 + ts_str = obj.get("time", "") + if not ts_str: + continue + try: + event_time = _dt.fromisoformat( + ts_str.replace("Z", "+00:00")) + if event_time.tzinfo is None: + event_time = event_time.replace(tzinfo=_tz.utc) + else: + # 确保 UTC + event_time = event_time.astimezone(_tz.utc) + except (ValueError, TypeError): + continue + + # 超时兜底:超过窗口的忽略 + if event_time < window_start: + continue + + if latest_start_time is None or event_time > latest_start_time: + latest_start_time = event_time + latest_start_str = event_time.strftime( + "%Y-%m-%dT%H:%M:%S.") + f"{event_time.microsecond:06d}" + "Z" + + return latest_start_str + + @staticmethod + def _check_compaction_finished_in_jsonl( + session_file: str, after_time: str) -> bool: + """v5: 检查 jsonl 是否有 after_time 之后的 compaction entry。 + + 有 → compact 已完成 → True + 没有 → compact 可能仍在进行 → False + + after_time 格式:UTC ISO(如 2026-06-12T10:25:27.581Z)。 + jsonl timestamp 格式也是 UTC ISO。 + """ + if not session_file or not Path(session_file).exists(): + return False + try: + from datetime import datetime as _dt, timezone as _tz + after_dt = _dt.fromisoformat(after_time.replace("Z", "+00:00")) + if after_dt.tzinfo is None: + after_dt = after_dt.replace(tzinfo=_tz.utc) + + with open(session_file, "rb") as sf: + sf.seek(0, 2) + size = sf.tell() + sf.seek(max(0, size - 1048576)) + tail = sf.read().decode("utf-8", errors="replace") + + for line in reversed(tail.splitlines()): + if not line.strip(): + continue + try: + obj = json.loads(line) + except (json.JSONDecodeError, ValueError): + continue + if obj.get("type") == "compaction": + ts = obj.get("timestamp", "") + if ts: + try: + ct = _dt.fromisoformat(ts.replace("Z", "+00:00")) + if ct.tzinfo is None: + ct = ct.replace(tzinfo=_tz.utc) + if ct >= after_dt: + return True + except (ValueError, TypeError): + pass + # 遇到早于 after_time 的 entry → 不需要继续往前扫 + ts = obj.get("timestamp", "") + if ts: + try: + ct = _dt.fromisoformat(ts.replace("Z", "+00:00")) + if ct.tzinfo is None: + ct = ct.replace(tzinfo=_tz.utc) + if ct < after_dt: + break + except (ValueError, TypeError): + pass + return False + except Exception: + return False + + @staticmethod + def _check_recent_compaction_jsonl( + session_file: str, window_seconds: int = 900) -> bool: + """v2.8.2 Fix-2: 读 session jsonl 末尾,检查是否有 window_seconds 内的 compaction 记录。 + + 比 compactionCheckpoints 更可靠:Gateway 每次完成 compact 必然在 jsonl 末尾追加记录, + 但不保证更新 compactionCheckpoints。 + + v2.8.2: 窗口从 300s→900s(15min), 尾部读取从 50KB→1MB。 + 实测 50KB 在长对话中不够(compact 记录被推出窗口导致漏检)。 + 正常扫描量不变:从尾部往前扫,遇到超过 15min 的 timestamp 即 break。 + """ + if not session_file or not Path(session_file).exists(): + return False + try: + from datetime import datetime, timezone + now = datetime.now(timezone.utc) + with open(session_file, "rb") as sf: + sf.seek(0, 2) + size = sf.tell() + sf.seek(max(0, size - 1048576)) + tail = sf.read().decode("utf-8", errors="replace") + for line in reversed(tail.splitlines()): + if not line.strip(): + continue + try: + import json as _json + obj = _json.loads(line) + except (_json.JSONDecodeError, ValueError): + continue + if obj.get("type") == "compaction": + ts = obj.get("timestamp", "") + if ts: + try: + ct = datetime.fromisoformat( + ts.replace("Z", "+00:00")) + if (now - ct).total_seconds() < window_seconds: + return True + except (ValueError, TypeError): + pass + ts = obj.get("timestamp", "") + if ts: + try: + ct = datetime.fromisoformat(ts.replace("Z", "+00:00")) + if (now - ct).total_seconds() >= window_seconds: + break + except (ValueError, TypeError): + pass + return False + except Exception: + return False + + @staticmethod + def _check_session_state(agent_id: str) -> dict: + """检查 sessions.json 和 lock 状态 + + v2.8.1: compact 检测改用 session jsonl 末尾扫描(Fix-1), + 替代失效的 compactionCheckpoints 检测。 + """ + result = { + "status": "unknown", + "lock_pid": None, + "lock_pid_alive": False, + "recent_compact": False} + sessions_path = Path(os.environ.get( + "OPENCLAW_HOME", str(Path.home() / ".openclaw") + )) / "agents" / agent_id / "sessions" / "sessions.json" + if not sessions_path.exists(): + return result + try: + with open(sessions_path) as f: + sessions = json.load(f) + main_key = f"agent:{agent_id}:main" + main_session = sessions.get(main_key, {}) + result["status"] = main_session.get("status", "unknown") + + # 检查 lock (v3.1: done/timeout 时 lock 视为过期) + sf = main_session.get("sessionFile", "") + if sf: + lock_path = Path(sf + ".lock") + if lock_path.exists(): + try: + lock_data = json.loads(lock_path.read_text()) + pid = lock_data.get("pid") + result["lock_pid"] = pid + if pid: + try: + os.kill(pid, 0) + result["lock_pid_alive"] = True + except ProcessLookupError: + result["lock_pid_alive"] = False + # session 已完成/超时 > lock 是 Gateway 冷却锁,不阻塞新 turn + if result["status"] in ("done", "timeout"): + result["lock_pid_alive"] = False + result["lock_expired"] = True + # running + lock 超时 >30分钟 > 视为 idle,允许 dispatch + elif result["status"] == "running" and result["lock_pid_alive"]: + try: + lock_data = json.loads(lock_path.read_text()) + created_at_str = lock_data.get("createdAt", "") + if created_at_str: + from datetime import datetime as _dt, timezone as _tz + created_dt = _dt.fromisoformat( + created_at_str.replace("Z", "+00:00")) + elapsed = (_dt.now(_tz.utc) - + created_dt).total_seconds() + if elapsed > 1800: # 30 minutes + result["lock_pid_alive"] = False + result["lock_expired"] = True + logger.info("Lock expired for %s: running + lock age %.0fs > 1800s", + agent_id, elapsed) + except Exception: + pass + except Exception: + pass + + # §24 v5: compact 检测 = gateway log 开始标记 + jsonl 结束标记配对 + # 旧方法 (_check_compact_in_progress_trajectory, _check_recent_compaction_jsonl) + # 保留为 deprecated 但不再调用。 + # + # 逻辑: + # 1. 查 gateway log 最近的 compact 开始标记(precheck route=compact_then_truncate) + # 2. 有开始标记 → 查 jsonl 是否有对应的 compaction entry(结束标记) + # 3. 有开始无结束 → 阻塞(recent_compact=True) + # 4. 有开始有结束 → 放行 + # 5. 无开始标记 → threshold/manual 静默触发,靠 counter+lock+status 保护 + # 6. 超时兜底:开始标记超过 15 分钟自动忽略 + if result["status"] not in ("idle", "unknown", None) and sf: + compact_start = AgentSpawner._find_compact_start_in_gateway_log(agent_id) + if compact_start: + finished = AgentSpawner._check_compaction_finished_in_jsonl(sf, compact_start) + if not finished: + # 有开始标记且未完成 → 阻塞 + result["recent_compact"] = True + # 如果已完成 → recent_compact 保持 False(放行) + # 没有开始标记 → threshold/manual 静默触发,不阻塞 + except Exception: + pass + return result + + @staticmethod + def _check_compact_in_progress_trajectory( + session_file: str, timeout_minutes: int = 30) -> bool: + """§24 v4: 检查 trajectory jsonl 尾部,判断 session 是否处于非正常状态。 + + 检测逻辑:最后一个完整 turn 没有 prompt.submitted/skipped → 非正常 → skip。 + 覆盖:compact、timeout、hook block、session 结束等所有非正常状态。 + + Returns: + True = 非正常状态(skip ticker) + False = 正常(不 skip)或超时兜底放行 + """ + if not session_file: + return False + traj_path = f"{session_file}.trajectory.jsonl" + if not os.path.exists(traj_path): + return False + + try: + from datetime import datetime as _dt, timezone as _tz + + # 读尾部 500KB + with open(traj_path, "rb") as f: + f.seek(0, 2) + size = f.tell() + f.seek(max(0, size - 500 * 1024)) + tail = f.read().decode("utf-8", errors="replace") + + if not tail.strip(): + return False + + # 解析所有有效行 + events = [] + for line in tail.splitlines(): + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + events.append(obj) + except (json.JSONDecodeError, ValueError): + continue + + if not events: + return False + + # 按 session.started 分组找 turn + # 每个 turn 以 session.started 开始 + turns = [] + current_turn = [] + for evt in events: + if evt.get("type") == "session.started": + if current_turn: + turns.append(current_turn) + current_turn = [evt] + else: + current_turn.append(evt) + if current_turn: + turns.append(current_turn) + + if not turns: + return False + + # 检查最后一个完整 turn(包含 session.started) + last_turn = turns[-1] + turn_types = {evt.get("type") for evt in last_turn} + + # 有 prompt.submitted 或 prompt.skipped → 正常 turn + if "prompt.submitted" in turn_types or "prompt.skipped" in turn_types: + return False + + # 非正常状态 → 检查超时兜底 + # 找最后一个有 ts 的事件 + last_ts = None + for evt in reversed(events): + ts_str = evt.get("ts") + if ts_str: + try: + last_ts = _dt.fromisoformat( + ts_str.replace("Z", "+00:00")) + if last_ts.tzinfo is None: + last_ts = last_ts.replace(tzinfo=_tz.utc) + except (ValueError, TypeError): + continue + break + + if last_ts is None: + # 没有 ts 信息,无法判断超时 → 非正常 → skip + return True + + now = _dt.now(_tz.utc) + elapsed = (now - last_ts).total_seconds() + if elapsed > timeout_minutes * 60: + logger.debug("Trajectory last event %.0fs ago > %dm, fallback pass", + elapsed, timeout_minutes) + return False # 兜底放行 + + return True # 非正常状态且未超时 + + except Exception as e: + logger.debug("_check_compact_in_progress_trajectory error: %s", e) + return False + + @staticmethod + def _classify_outcome(exit_code: int, json_result: dict, stderr_text: str, + task_status: Optional[str], stdout_text: str = "") -> dict: + """分类退出原因,返回处理策略 + + v3.1: A0 拆分为 A14-A17(信号中断/stderr 智能分类)。 + A8/A10 改为可恢复 retry。cooldown 统一 60s。 + """ + status = json_result.get("status") + summary = json_result.get("summary", "") + fallback_used = json_result.get("fallback_used", False) + + # A4: 任务 DB status=failed(Agent 自己标的) + if task_status == "failed": + return {"outcome": "agent_failed", "should_retry": False} + + # A1: status=ok + completed + 非 fallback + if status == "ok" and summary == "completed" and not fallback_used: + return {"outcome": "completed", "should_retry": False} + + # A5/A6: status=ok + fallback + if status == "ok" and fallback_used: + return {"outcome": "fallback_timeout", "should_retry": False} + + # A2/A3: status=timeout → 唯一续杯场景 + # 注意: PM2 restart 时 daemon 自身也收到 SIGTERM,此时 retry spawn 的新进程 + # 会随 daemon 一起被杀。A14 retry 假设 daemon 存活,PM2 级重启不在此场景内。 + if status == "timeout": + return {"outcome": "gateway_timeout", "should_retry": True, + "retry_field": "retry_count"} + + # A0 拆分: 无 JSON 输出 + exit≠0 + if status is None and not stdout_text.strip() and exit_code != 0: + # A14: SIGINT(130) / SIGTERM(143) → 外部中断,可恢复 + if exit_code in (130, 143): + return {"outcome": "interrupted", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + # A15/A16: stderr 含 network/compact 关键字 → 可恢复 + if stderr_text: + stderr_lower = stderr_text.lower() + if any(kw in stderr_lower for kw in [ + "econnrefused", "etimedout", "gateway closed", "econnreset"]): + return {"outcome": "gateway_unreachable", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + if any(kw in stderr_lower for kw in [ + "compaction-diag", "context-overflow"]): + return {"outcome": "compact_interrupted", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + # A17: 真正的 crash → 保持 working,ticker 兜底 + return {"outcome": "crashed", "should_retry": False, + "original": "process_crash"} + + # A13 revised: stdout 为空但 exit=0 → 信任进程退出码,视为正常完成 + # 实测发现 openclaw session=None + exit=0 是正常场景(inform 通知等) + # 旧逻辑按 task_status 区分,非终态判 agent_error → 导致 inform 邮件永不标 done + if status is None and not stdout_text.strip() and exit_code == 0: + return {"outcome": "completed", "should_retry": False} + + # A7-A12: status=error → 不续杯,stderr 辅助分类 + if status == "error": + stderr_lower = stderr_text.lower() + if any(kw in stderr_lower for kw in [ + "401", "403", "unauthorized", "auth"]): + return {"outcome": "auth_failed", "should_retry": False} + if any(kw in stderr_lower for kw in [ + "econnrefused", "etimedout", "gateway closed", "econnreset"]): + return {"outcome": "gateway_unreachable", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + if any(kw in stderr_lower for kw in [ + "rate_limit", "500", "503", "api error"]): + return {"outcome": "api_error", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + if any(kw in stderr_lower for kw in [ + "compaction-diag", "context-overflow"]): + return {"outcome": "compact_failed", "should_retry": False} + if any(kw in stderr_lower for kw in [ + "lock", "busy", "concurrent", "lane task error"]): + return {"outcome": "lock_conflict", "should_retry": True, + "retry_field": "retry_count", "cooldown_seconds": 60} + return {"outcome": "agent_error", "should_retry": False} + + # 兜底:status 未知值 + return {"outcome": "agent_error", + "should_retry": False, "original": "unknown_status"} + + @staticmethod + def _get_retry_counts( + db_path: Optional[Path], task_id: Optional[str]) -> dict: + """从最新 task_attempt 的 metadata 读计数器""" + defaults = {"retry_count": 0, "connect_retry_count": 0, + "api_retry_count": 0, "lock_retry_count": 0, + "monitor_timeout_count": 0} + if not db_path or not task_id: + return defaults + try: + conn = get_connection(db_path) + try: + row = conn.execute( + "SELECT metadata FROM task_attempts WHERE task_id=? ORDER BY attempt_number DESC LIMIT 1", + (task_id,) + ).fetchone() + if row and row["metadata"]: + stored = json.loads(row["metadata"]) + for k in defaults: + if k in stored: + defaults[k] = stored[k] + finally: + conn.close() + except Exception: + pass + return defaults + + def _update_retry_counts(self, db_path: Optional[Path], + task_id: Optional[str], counts: dict): + """将 retry counts 写回最新 task_attempt 的 metadata""" + if not db_path or not task_id: + return + try: + conn = get_connection(db_path) + try: + conn.execute("BEGIN IMMEDIATE") + row = conn.execute( + "SELECT rowid, metadata FROM task_attempts " + "WHERE task_id=? ORDER BY attempt_number DESC LIMIT 1", + (task_id,) + ).fetchone() + if row: + meta = json.loads( + row["metadata"]) if row["metadata"] else {} + meta.update(counts) + conn.execute( + "UPDATE task_attempts SET metadata=? WHERE rowid=?", + (json.dumps(meta), row["rowid"]) + ) + conn.commit() + finally: + conn.close() + except Exception: + logger.exception( + "Failed to update retry counts for task %s", task_id) + + def _mark_task(self, db_path: Optional[Path], task_id: Optional[str], + status: str, detail: Optional[dict] = None): + """标记任务状态(用于 failed/escalate)""" + if not db_path or not task_id: + return + try: + conn = get_connection(db_path) + try: + conn.execute("BEGIN IMMEDIATE") + conn.execute( + "UPDATE tasks SET status=?, completed_at=datetime('now') WHERE id=?", + (status, task_id) + ) + if detail: + conn.execute( + "INSERT INTO events (task_id, agent, event_type, detail) VALUES (?,?,?,?)", + (task_id, "daemon", status, json.dumps( + detail, ensure_ascii=False)) + ) + conn.commit() + finally: + conn.close() + # F2: conn 已关闭,Blackboard 内部自己 get_connection + if status == "failed": + reason = (detail or {}).get("reason", "unknown") + try: + from src.daemon.mail_notify import _is_mail_project, notify_mail_failed + if _is_mail_project(db_path): + # Mail 失败:通知发件人,不 @pangtong + notify_mail_failed(db_path, task_id, reason, detail) + else: + # Task 失败:@pangtong(F2 原逻辑) + from src.blackboard.operations import Blackboard + bb = Blackboard(db_path) + cid = bb.add_comment(task_id, "daemon", + f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入", + comment_type="system") + bb.record_mentions(cid, task_id, ["pangtong-fujunshi"]) + logger.info( + "Task %s: failure notified pangtong via comment+mention (reason=%s)", + task_id, + reason) + except Exception as e: + logger.warning("Task %s: failed to notify: %s", task_id, e) + except Exception: + logger.exception("Failed to mark task %s as %s", task_id, status) + + @staticmethod + def _do_on_complete(on_complete, agent_id, outcome): + """执行 on_complete 回调(同步+异步兼容)""" + if not on_complete: + return + try: + result = on_complete(agent_id, outcome) + if asyncio.iscoroutine(result): + # 注意:这里是同步调用的,不能 await + # 在 _monitor_process 的 async 上下文中应该用 await + pass + except Exception: + pass + + async def _do_on_complete_async(self, on_complete, agent_id, outcome): + """异步执行 on_complete 回调""" + if not on_complete: + return + try: + result = on_complete(agent_id, outcome) + if asyncio.iscoroutine(result): + await result + except Exception: + logger.warning( + "on_complete callback failed for %s", + agent_id, + exc_info=True) + + def _register_session( + self, + session_id: str, + agent_id: str, + task_id: Optional[str], + pid: Optional[int], + broadcast_task_ids: Optional[List[str]] = None, + ) -> None: + """注册 spawn session""" + self._sessions[session_id] = { + "agent_id": agent_id, + "task_id": task_id, + "pid": pid, + "status": "running", + "started_at": datetime.utcnow().isoformat(), + "completed_at": None, + "broadcast_task_ids": broadcast_task_ids, + } + + def _record_attempt( + self, + task_id: Optional[str], + agent_id: str, + outcome: str, + exit_code: Optional[int] = None, + error: Optional[str] = None, + metadata: Optional[dict] = None, + db_path: Optional[Path] = None, + ) -> None: + """记录 task_attempt""" + # 广播 spawn 产生的 "broadcast" task_id 不记录 attempts,避免脏数据 + if task_id == "broadcast": + return + effective_db = db_path or self.db_path + if not task_id or not effective_db: + return + + try: + conn = get_connection(effective_db) + try: + conn.execute("BEGIN IMMEDIATE") + row = conn.execute( + "SELECT MAX(attempt_number) as max_a FROM task_attempts WHERE task_id=?", + (task_id,), + ).fetchone() + attempt_number = (row["max_a"] or 0) + 1 + + meta = metadata or {} + if error: + meta["error"] = error + conn.execute( + "INSERT INTO task_attempts " + "(task_id, attempt_number, agent, outcome, exit_code, metadata, completed_at) " + "VALUES (?,?,?,?,?,?,datetime('now'))", + (task_id, attempt_number, agent_id, outcome, + exit_code, json.dumps(meta)), + ) + conn.execute( + "INSERT INTO events (task_id, agent, event_type, detail) VALUES (?,?,?,?)", + (task_id, agent_id, + "agent_completed" if outcome == "completed" else "daemon_tick", + json.dumps({"outcome": outcome, "attempt": attempt_number})), + ) + conn.commit() + finally: + conn.close() + except Exception: + logger.exception("Failed to record attempt for task %s", task_id) + + def get_session(self, session_id: str) -> Optional[Dict[str, Any]]: + """获取 session 信息""" + return self._sessions.get(session_id) + + def get_session_by_agent(self, agent_id: str) -> Optional[Dict[str, Any]]: + """v2.7.2: 根据 agent_id 获取活跃 session 信息(用于进程存活性检查)""" + for sid, info in self._sessions.items(): + if info.get("agent_id") == agent_id and info.get( + "status") == "running": + return info + return None + + def cleanup_session(self, session_id: str) -> None: + """清理 session""" + if session_id in self._sessions: + session = self._sessions[session_id] + task_id = session.get("task_id") + del self._sessions[session_id] + # 清理 B2 compact 等待计数器 + if task_id and task_id in self._compact_waits: + del self._compact_waits[task_id] diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py new file mode 100644 index 0000000..4ecf503 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_handler.py @@ -0,0 +1,512 @@ +"""toolchain_handler.py - 工具链事件 handler。 + +处理 Gitea Webhook 事件(CI 失败、Review 请求、Issue 指派等)。 +L2 引擎层强约束:输入(结构化步骤)+ 执行(Red Flags)+ 输出(action_report 验证)。 +""" +from __future__ import annotations + +import json +import logging +import urllib.request +from pathlib import Path +from typing import Dict, List + +from src.daemon.base_task_handler import BaseTaskHandler, VerifyResult +from src.daemon.prompt_composer import PromptComposer, PromptContext +from src.daemon.toolchain_templates import render_template, _TEMPLATE_MAP +from src.blackboard.db import get_connection + +logger = logging.getLogger("moziplus-v2.handler.toolchain") + +# --------------------------------------------------------------------------- +# Gitea API 配置 +# --------------------------------------------------------------------------- + +_GITEA_BASE = "http://192.168.2.154:3000/api/v1" +_GITEA_TOKEN = "a6d596b826f4bfeaf983ef4d25ac25dab95bbc4e" + +# 业务失败连续次数阈值,超过则升级为系统失败 +_BUSINESS_FAIL_THRESHOLD = 3 + +# action_type → action_hint 映射 +_ACTION_HINTS: Dict[str, str] = { + "review_result": "你收到一个 Review 结果通知,这是一个需要你执行动作的事件(不是纯通知)。", + "review_request": "你收到一个 Review 请求,这是一个需要你审查并提交 Review 的事件。", + "review_updated": "你收到一个 PR 更新通知,这是一个需要你重新审查修改部分的事件。", + "review_comment": "你收到一个 Review 评论,这是一个需要你查看并响应的事件。", + "ci_failure": "你收到一个 CI 失败通知,这是一个需要你修复失败测试的事件。", + "issue_assigned": "你收到一个 Issue 指派,这是一个需要你编码实现的事件。", + "deploy_failure": "你收到一个部署失败通知,这是一个需要你排查并修复的事件。", + "mention": "你收到一个 @mention 通知,这是一个需要你按指引响应的事件。", + "review_merged": "你收到一个 PR 合并通知。这是一条纯通知,阅读即可。", + "infrastructure_failure": "你收到一个基础设施问题报告,请排查并修复。", +} + + +# --------------------------------------------------------------------------- +# Toolchain PromptSections +# --------------------------------------------------------------------------- + +class ToolchainContextSection: + """事件类型 + 事件详情 + 结构化步骤 + action_hint(priority=10)""" + + name: str = "toolchain_context" + priority: int = 10 + + def render(self, context: PromptContext) -> str: + event_type = context.event_type + event_data: Dict = context.event_data or {} + + # Part 1: 事件信息(现有模板引擎) + if event_type in _TEMPLATE_MAP: + variables = {k: str(v) for k, v in event_data.items()} + event_text = render_template(event_type, variables) + else: + lines = ["## 工具链事件", ""] + lines.append(f"- **事件类型**: {event_type or '未知'}") + if event_data: + lines.append("- **事件详情**:") + for key, value in event_data.items(): + lines.append(f" - {key}: {value}") + lines.append("") + event_text = "\n".join(lines) + + # Part 2: 结构化编号步骤(新增,从 action_steps 渲染) + steps: List[str] = context.action_steps or [] + if steps: + step_lines = ["", "### 必须执行的步骤", ""] + for i, step in enumerate(steps, 1): + step_lines.append(f"{i}. {step}") + steps_text = "\n".join(step_lines) + else: + steps_text = "" + + # Part 3: action 指引(新增,按 action_type 选择) + action_hint = _ACTION_HINTS.get( + context.action_type, + "你收到一个工具链事件,这是一个需要你执行动作的事件。", + ) + + return f"{action_hint}\n\n{event_text}{steps_text}" + + def should_include(self, context: PromptContext) -> bool: + return True + + +class ToolchainApiSection: + """API 操作指令(priority=40)-- action_report 提交指引""" + + name: str = "toolchain_api" + priority: int = 40 + + API_HOST = "localhost:8083" + + def render(self, context: PromptContext) -> str: + task_id = context.task_id + project_id = context.project_id + agent_id = context.agent_id + + lines = [ + "## API 操作指令", + "", + f"项目 ID: `{project_id}`", + f"任务 ID: `{task_id}`", + "", + "### 完成后必须提交 action report", + "", + "执行完所有步骤后,必须提交 action report:", + "```bash", + f'curl -s -X POST "http://{self.API_HOST}/api/projects/{project_id}/tasks/{task_id}/comments" \\', + ' -H "Content-Type: application/json" \\', + f' -d \'{{"author": "{agent_id}", "comment_type": "action_report", "body": "简要描述你执行了什么操作及结果"}}\'', + "```", + "", + "⚠️ 不提交 action report 的任务会被标记为 failed。", + "", + "### 提交产出", + "", + "如有产出(如 review 结果、修复方案),提交到任务 outputs:", + "```bash", + f'curl -s -X POST "http://{self.API_HOST}/api/projects/{project_id}/tasks/{task_id}/outputs" \\', + ' -H "Content-Type: application/json" \\', + ' -d \'{"content": "<你的产出内容>", "type": "text"}\'', + "```", + "", + "### 需要其他角色支持时", + "", + "如果在执行过程中需要其他角色协助(如缺数据、需要审批等),在关联的 PR/Issue 上创建 comment @对方:", + "```bash", + f'curl -s -X POST "{_GITEA_BASE}/repos/{{repo}}/issues/{{pr_number}}/comments" \\', + f' -H "Authorization: token " \\', + ' -H "Content-Type: application/json" \\', + ' -d \'{"body": "@{agent-id} 需要你的支持:{描述问题}"}\'', + "```", + "", + "⚠️ 不要使用 Mail API(飞鸽传书)。所有协作通过 Gitea 留痕。", + "", + ] + return "\n".join(lines) + + def should_include(self, context: PromptContext) -> bool: + return True + + +class ToolchainConstraintsSection: + """硬约束 + Red Flags(priority=50)""" + + name: str = "toolchain_constraints" + priority: int = 50 + + def render(self, context: PromptContext) -> str: + lines = [ + "## 硬约束(必须遵守)", + "", + "⚠️ 以下是强制要求,不是建议或参考。违反任何一条都会导致任务失败。", + "", + "### 1. 必须按步骤执行", + '- 检查上方“必须执行的步骤”列表', + '- 逐条执行每个步骤,不可跳过', + '- 不要只读不做——这不是纯通知', + "", + "### 2. 必须提交 action report", + '- 执行完所有步骤后,必须提交 action report', + "- 提交方式:POST comment(comment_type='action_report')", + '- 报告内容:简要描述你执行了什么操作、结果如何', + '- ⚠️ 不提交 action report 的任务会被标记为 failed', + "", + "### 3. 不要执行任何状态转换命令", + '- 不要手动标 working/done/review/failed,系统会自动处理', + "", + "### 4. 不需要回复", + '- action report 就是你的完成凭证', + '- 不要发送 Mail(飞鸽传书),你的所有操作在 toolchain 流程内完成', + "", + "### 5. 所有协作通过 Gitea 完成", + '- 如果遇到问题需要其他角色支持,在关联的 PR/Issue 上创建 comment @对方', + '- 不要使用 Mail API(飞鸽传书)发送消息', + '- 你的所有操作都在 toolchain 流程内,通过 Gitea 留痕', + "", + "### Red Flags(如果脑海中出现以下想法,说明你错了)", + "", + '| Agent 想法 | Red Flag 驳回 |', + '|------------|--------------|', + '| “这个通知看看就行了” | ❌ 错!这是 action 指令,必须执行步骤列表中的每一项 |', + '| “我不需要做任何事” | ❌ 错!检查“必须执行的步骤”列表,每一步都要执行 |', + '| “先放着等会处理” | ❌ 错!立即执行,不要推迟 |', + '| “我已经知道了” | ❌ 知道不等于执行。执行步骤 + 提交 action report 才算完成 |', + '| “步骤太多了,选几个做就行” | ❌ 错!必须逐条执行,不可跳过 |', + '| “这个步骤不适用于当前情况” | ❌ 如果确实不适用,在 action report 中说明原因,但其他步骤必须执行 |', + "", + ] + return "\n".join(lines) + + def should_include(self, context: PromptContext) -> bool: + return True + + +# --------------------------------------------------------------------------- +# ToolchainHandler +# --------------------------------------------------------------------------- + +class ToolchainHandler(BaseTaskHandler): + """工具链事件 handler。""" + + task_type = "toolchain" + virtual_project = "_toolchain" + display_name = "工具链事件" + + def target_success_status(self) -> str: + return "done" + + def pre_spawn(self, task_id: str, db_path: Path) -> bool: + """auto_working:pending → working""" + return self._auto_mark_working(task_id, db_path) + + def get_sections(self) -> list: + """返回 3 个 Toolchain PromptSection 实例""" + return [ + ToolchainContextSection(), + ToolchainApiSection(), + ToolchainConstraintsSection(), + ] + + def build_prompt(self, context: PromptContext) -> str: + """通过 PromptComposer 拼装 sections 为最终 prompt""" + composer = PromptComposer() + composer.add_many(self.get_sections()) + return composer.compose(context) + + def verify_completion(self, task_id: str, db_path: Path) -> VerifyResult: + """检查 action report(精确验证)+ 三层 fallback""" + try: + conn = get_connection(db_path) + try: + # 特殊处理:infrastructure_failure 始终通过(防递归) + row = conn.execute( + "SELECT must_haves FROM tasks WHERE id=?", (task_id,) + ).fetchone() + if row and row["must_haves"]: + try: + meta = json.loads(row["must_haves"]) + except Exception: + meta = {} + if meta.get("action_type") == "infrastructure_failure": + return VerifyResult(True, "infrastructure_passthrough", + "infrastructure_failure auto-pass") + + # 特殊处理:review_merged 始终通过(纯通知) + if meta.get("action_type") == "review_merged": + return VerifyResult(True, "merged_passthrough", + "review_merged auto-pass") + + # 1. 优先检查 action_report comment + report_row = conn.execute( + "SELECT id FROM comments WHERE task_id=? " + "AND comment_type='action_report' LIMIT 1", + (task_id,) + ).fetchone() + if report_row: + return VerifyResult(True, "has_action_report", "action_report found") + + # 2. fallback:检查 output(向后兼容) + output_count = conn.execute( + "SELECT COUNT(*) FROM outputs WHERE task_id=?", (task_id,) + ).fetchone()[0] + if output_count > 0: + return VerifyResult(True, "has_output", f"output_count={output_count}") + + # 3. fallback:检查有实质内容的 comment(向后兼容) + comment_count = conn.execute( + "SELECT COUNT(*) FROM comments WHERE task_id=? " + "AND author != 'system' AND LENGTH(body) >= 20", + (task_id,) + ).fetchone()[0] + if comment_count > 0: + return VerifyResult(True, "has_comment", f"comment_count={comment_count}") + + return VerifyResult(False, "no_action", + "no action_report, no output, no valid comment") + finally: + conn.close() + except Exception as e: + logger.error("Toolchain %s: verify error: %s", task_id, e) + return VerifyResult(False, "verify_error", str(e)) + + def on_failure(self, task_id: str, agent_id: str, + db_path: Path, verify: VerifyResult) -> None: + """验证失败 → 三分路处理(业务/系统/基础设施)""" + self._mark_task_status(db_path, task_id, "failed") + logger.info("Toolchain %s: verify failed (%s), marked failed", + task_id, verify.reason) + + # 读取 must_haves 获取事件上下文 + meta = {} + try: + conn = get_connection(db_path) + row = conn.execute( + "SELECT must_haves FROM tasks WHERE id=?", (task_id,) + ).fetchone() + if row and row["must_haves"]: + meta = json.loads(row["must_haves"]) + # 统计该 task 的业务失败次数 + fail_count = conn.execute( + "SELECT COUNT(*) FROM events WHERE task_id=? " + "AND event_type='status_change' AND payload LIKE '%failed%'", + (task_id,) + ).fetchone()[0] + conn.close() + except Exception: + fail_count = 0 + + action_type = meta.get("action_type", "") + context_data = meta.get("context", {}) + assignee = meta.get("assignee", "") or meta.get("from", "") + + # 三分路决策 + route = self._classify_failure(verify, fail_count) + + if route == "business": + self._handle_business_failure( + task_id, agent_id, verify, action_type, context_data, assignee, db_path) + elif route == "system": + self._handle_system_failure( + task_id, agent_id, verify, action_type, context_data, db_path) + else: # infrastructure + self._handle_infrastructure_failure( + task_id, agent_id, verify, db_path) + + def _classify_failure(self, verify: VerifyResult, fail_count: int) -> str: + """分类失败类型:business / system / infrastructure""" + # verify_error 或 DB 不可用 → 基础设施失败 + if verify.reason == "verify_error": + return "infrastructure" + # 连续业务失败超过阈值 → 升级为系统失败 + if fail_count >= _BUSINESS_FAIL_THRESHOLD: + return "system" + # 默认:业务失败 + return "business" + + def _handle_business_failure( + self, task_id: str, agent_id: str, verify: VerifyResult, + action_type: str, context_data: dict, assignee: str, + db_path: Path, + ) -> None: + """业务失败 → 在关联 PR/Issue 上创建 comment @原始 assignee""" + repo = context_data.get("repo", "") + pr_number = context_data.get("pr_number") or context_data.get("issue_number", "") + + if repo and pr_number: + comment_body = ( + f"@{assignee or agent_id} 工具链任务执行失败\n\n" + f"任务 ID: {task_id}\n" + f"失败原因: {verify.reason}\n" + f"证据: {verify.evidence}\n\n" + f"请检查黑板任务并处理。" + ) + success = self._create_gitea_comment(repo, pr_number, comment_body) + if success: + logger.info("Toolchain %s: business failure → Gitea comment on %s#%s", + task_id, repo, pr_number) + return + # Gitea API failed → escalate to system failure + logger.warning( + "Toolchain %s: Gitea comment failed, escalating to system failure", + task_id) + self._handle_system_failure( + task_id, agent_id, verify, action_type, context_data, db_path) + else: + # 没有 PR/Issue 关联 → fallback 到系统失败 + logger.warning( + "Toolchain %s: no PR/Issue context for business failure, " + "escalating to system failure", task_id) + self._handle_system_failure( + task_id, agent_id, verify, action_type, context_data, db_path) + + def _handle_system_failure( + self, task_id: str, agent_id: str, verify: VerifyResult, + action_type: str, context_data: dict, db_path: Path, + ) -> None: + """系统失败 → 创建 Gitea Issue @pangtong-fujunshi""" + repo = context_data.get("repo", "sanguo/sanguo_moziplus_v2") + title = f"[toolchain-handler] 工具链事件处理失败: {task_id}" + body = ( + f"任务 {task_id} 验证失败\n\n" + f"事件类型: {action_type or '未知'}\n" + f"失败原因: {verify.reason}\n" + f"证据: {verify.evidence}\n\n" + f"@pangtong-fujunshi 请检查黑板任务并手动处理。" + ) + + # 尝试在 Gitea 创建 Issue + created = self._create_gitea_issue(repo, title, body, ["pangtong-fujunshi"]) + if created: + logger.info("Toolchain %s: system failure → Gitea Issue created on %s", + task_id, repo) + else: + # Gitea API 不可用 → 基础设施失败 + logger.error( + "Toolchain %s: Gitea API unavailable, escalating to infrastructure failure", + task_id) + self._handle_infrastructure_failure( + task_id, agent_id, verify, db_path) + + def _handle_infrastructure_failure( + self, task_id: str, agent_id: str, + verify: VerifyResult, db_path: Path, + ) -> None: + """基础设施失败 → _send_toolchain_task @jiangwei-infra(防递归)""" + # 直接在 _toolchain DB 创建 task(不走 Gitea webhook) + try: + from src.api.toolchain_routes import _send_toolchain_task + _send_toolchain_task( + to_agent="jiangwei-infra", + title=f"[基础设施] Gitea API 不可用 - {task_id}", + description=( + f"Gitea API 不可用,原任务 {task_id} 无法通过正常路径处理。\n" + f"请检查 Gitea 服务状态和网络连通性。" + ), + event_type="infrastructure_failure", + action_type="infrastructure_failure", + steps=[ + "检查 Gitea 服务状态(http://192.168.2.154:3000)", + "检查网络连通性", + "恢复后提交 action report", + ], + context_data={"original_task_id": task_id, "verify_reason": verify.reason}, + source="toolchain_handler", + ) + logger.info("Toolchain %s: infrastructure failure → task created for jiangwei-infra", + task_id) + except Exception as e: + logger.error( + "Toolchain %s: failed to create infrastructure_failure task: %s", + task_id, e) + + # ----------------------------------------------------------------------- + # Gitea API 辅助 + # ----------------------------------------------------------------------- + + def _create_gitea_comment( + self, repo: str, pr_number: int, body: str, + ) -> bool: + """在 PR/Issue 上创建 comment。返回是否成功。""" + payload = json.dumps({"body": body}, ensure_ascii=False).encode("utf-8") + try: + req = urllib.request.Request( + f"{_GITEA_BASE}/repos/{repo}/issues/{pr_number}/comments", + data=payload, + headers={ + "Authorization": f"token {_GITEA_TOKEN}", + "Content-Type": "application/json", + }, + ) + urllib.request.urlopen(req, timeout=5) + return True + except Exception as e: + logger.warning("Gitea comment failed on %s#%s: %s", repo, pr_number, e) + return False + + def _create_gitea_issue( + self, repo: str, title: str, body: str, + assignees: list = None, + ) -> bool: + """创建 Gitea Issue。返回是否成功。""" + data = {"title": title, "body": body} + if assignees: + data["assignees"] = assignees + payload = json.dumps(data, ensure_ascii=False).encode("utf-8") + try: + req = urllib.request.Request( + f"{_GITEA_BASE}/repos/{repo}/issues", + data=payload, + headers={ + "Authorization": f"token {_GITEA_TOKEN}", + "Content-Type": "application/json", + }, + ) + urllib.request.urlopen(req, timeout=5) + return True + except Exception as e: + logger.warning("Gitea create issue failed on %s: %s", repo, e) + return False + + # ----------------------------------------------------------------------- + # 兼容:保留旧方法签名(但不再被 on_failure 调用) + # ----------------------------------------------------------------------- + + def _build_gitea_links(self, event_type: str, event_data: dict) -> str: + """根据事件类型构建 Gitea 链接。""" + links = [] + repo = event_data.get("repo", "") + base_url = "http://192.168.2.154:3000" + + if "pr_number" in event_data: + links.append(f"PR: {base_url}/{repo}/pulls/{event_data['pr_number']}") + if "issue_number" in event_data: + links.append(f"Issue: {base_url}/{repo}/issues/{event_data['issue_number']}") + if "commit" in event_data: + links.append(f"Commit: {base_url}/{repo}/commit/{event_data['commit']}") + if "branch" in event_data and "commit" not in event_data: + links.append(f"分支: {event_data['branch']}") + + return "\n".join(links) if links else "(无法提取链接,请检查黑板任务详情)" diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py new file mode 100644 index 0000000..44ab599 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/src/daemon/toolchain_templates.py @@ -0,0 +1,89 @@ +"""工具链事件模板引擎(Toolchain Event Hub) + +加载 templates/toolchain/ 下的 Markdown 模板,提供 {variable} 占位符渲染。 +""" + +from __future__ import annotations + +import logging +from collections import defaultdict +from pathlib import Path +from typing import Dict + +logger = logging.getLogger(__name__) + +TEMPLATES_DIR = Path(__file__).parent.parent.parent / "templates" / "toolchain" + +# 模板文件名映射 +_TEMPLATE_MAP: Dict[str, str] = { + "review_request": "review_request.md", + "review_result": "review_result.md", + "issue_assigned": "issue_assigned.md", + "ci_failure": "ci_failure.md", + "deploy_failure": "deploy_failure.md", + "review_updated": "review_updated.md", + "review_comment": "review_comment.md", + "review_merged": "review_merged.md", + "mention": "mention.md", +} + +# 模板缓存 +_template_cache: Dict[str, str] = {} + + +def _load_template(name: str) -> str: + """加载并缓存模板文件内容。 + + Args: + name: 模板名称(不含 .md 后缀) + + Returns: + 模板文本内容 + + Raises: + FileNotFoundError: 模板文件不存在 + """ + if name in _template_cache: + return _template_cache[name] + + filename = _TEMPLATE_MAP.get(name) + if not filename: + raise ValueError(f"Unknown template: {name}") + + path = TEMPLATES_DIR / filename + if not path.exists(): + raise FileNotFoundError(f"Template not found: {path}") + + content = path.read_text(encoding="utf-8") + _template_cache[name] = content + logger.debug("Loaded template: %s (%d bytes)", name, len(content)) + return content + + +def _escape_braces(value: str) -> str: + """转义花括号防止 format_map 报错""" + return str(value).replace("{", "{{").replace("}", "}}") + + +def render_template(name: str, variables: Dict[str, str]) -> str: + """渲染模板,将 {variable} 占位符替换为实际值。 + + 使用 defaultdict(str) 确保未提供的变量替换为空字符串而非报错。 + + Args: + name: 模板名称 + variables: 变量字典 + + Returns: + 渲染后的文本 + """ + template_text = _load_template(name) + # 先对所有变量值转义花括号,防止 format_map 报错 + escaped_vars = {k: _escape_braces(v) for k, v in variables.items()} + safe_vars: Dict[str, str] = defaultdict(str, escaped_vars) + return template_text.format_map(safe_vars) + + +def clear_cache() -> None: + """清空模板缓存(用于测试或热更新)""" + _template_cache.clear() diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md b/~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md new file mode 100644 index 0000000..22d2895 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/templates/toolchain/mention.md @@ -0,0 +1,16 @@ +{mention_type}通知 + +来源: {source_type} {source_url} +评论者: {commenter} +意图: {intent_hint} +内容: +{content_snippet} + +📋 获取完整上下文: +1. 查看{source_type}详情: GET {gitea_api}/repos/{repo}/{source_detail_api_path} +2. 查看评论列表: GET {gitea_api}/repos/{repo}/{source_comments_api_path} + +📌 响应指引: +{response_guidance} + +完成后按指引操作。 diff --git a/~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py b/~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py new file mode 100644 index 0000000..0a32cc7 --- /dev/null +++ b/~/.sanguo_projects/sanguo_moziplus_v2/tests/unit/test_mention_utils.py @@ -0,0 +1,129 @@ +"""mention_utils 单元测试 — §25.7 覆盖。""" + +import pytest + +from src.api.mention_utils import ( + extract_mentions, + should_suppress_mention, + infer_intent, +) + + +# --------------------------------------------------------------------------- +# extract_mentions +# --------------------------------------------------------------------------- + +class TestExtractMentions: + """测试 @mention 提取逻辑。""" + + def test_exact_match(self): + """@zhangfei-dev 精确匹配。""" + assert extract_mentions("@zhangfei-dev 请看一下", "someone") == ["zhangfei-dev"] + + def test_chinese_alias(self): + """@张飞 中文别名匹配。""" + assert extract_mentions("@张飞 帮忙看看", "someone") == ["zhangfei-dev"] + + def test_english_short_name(self): + """@zhangfei 英文短名匹配。""" + assert extract_mentions("@zhangfei 快来", "someone") == ["zhangfei-dev"] + + def test_prefix_unique(self): + """@zhangf 前缀唯一匹配。""" + assert extract_mentions("@zhangf 来一下", "someone") == ["zhangfei-dev"] + + def test_prefix_ambiguous_no_match(self): + """@z 前缀模糊,多个候选,不匹配。""" + assert extract_mentions("@z 看看", "someone") == [] + + def test_dedup_same_person(self): + """@张飞 @zhangfei-dev 同时出现去重。""" + result = extract_mentions("@张飞 @zhangfei-dev 来一下", "someone") + assert result == ["zhangfei-dev"] + + def test_exclude_self(self): + """@zhangfei-dev 排除自己(sender=zhangfei-dev)。""" + assert extract_mentions("@zhangfei-dev 自己说", "zhangfei-dev") == [] + + def test_unknown_person(self): + """@unknown 不匹配任何 Agent。""" + assert extract_mentions("@unknown 你好", "someone") == [] + + def test_multiple_mentions(self): + """多个 @mention 返回多个 Agent。""" + result = set(extract_mentions("@张飞 @关羽 来讨论", "someone")) + assert result == {"zhangfei-dev", "guanyu-dev"} + + def test_mention_with_hyphen_in_middle(self): + """@mention 后面紧跟标点也能识别。""" + result = extract_mentions("@赵云,请看下", "someone") + assert result == ["zhaoyun-data"] + + +# --------------------------------------------------------------------------- +# should_suppress_mention +# --------------------------------------------------------------------------- + +class TestShouldSuppressMention: + """测试 @mention 通知抑制逻辑。""" + + def test_suppress_when_in_list(self): + """被提及者在自动通知列表中 → 抑制。""" + assert should_suppress_mention("zhangfei-dev", ["zhangfei-dev", "guanyu-dev"]) is True + + def test_not_suppress_when_not_in_list(self): + """被提及者不在自动通知列表中 → 不抑制。""" + assert should_suppress_mention("zhangfei-dev", ["guanyu-dev"]) is False + + def test_suppress_empty_list(self): + """自动通知列表为空 → 不抑制。""" + assert should_suppress_mention("zhangfei-dev", []) is False + + +# --------------------------------------------------------------------------- +# infer_intent +# --------------------------------------------------------------------------- + +class TestInferIntent: + """测试意图推断逻辑。 + + 优先级:assign → collaborate → help → notify(默认) + """ + + def test_help_question_mark(self): + """疑问句 → help。""" + assert infer_intent("@赵云 数据格式是什么?") == "help" + + def test_notify_plain_mention(self): + """纯通知(无关键词) → notify。""" + assert infer_intent("@关羽 这个 PR 涉及风控变更") == "notify" + + def test_collaborate_please_help(self): + """'请帮忙' → collaborate(NOT help!)。""" + assert infer_intent("@庞统 请帮忙澄清需求") == "collaborate" + + def test_assign_keywords(self): + """'交给你' → assign。""" + assert infer_intent("@张飞 前端部分交给你") == "assign" + + def test_help_how_to(self): + """'如何' → help。""" + assert infer_intent("@姜维 如何部署这个服务") == "help" + + def test_collaborate_please_review(self): + """'请review' → collaborate。""" + assert infer_intent("@司马懿 请review 这个方案") == "collaborate" + + def test_notify_default(self): + """无任何关键词 → notify。""" + assert infer_intent("@赵云 已更新数据") == "notify" + + def test_assign_takes_priority_over_help(self): + """assign 关键词优先于 help 关键词。""" + # "交给" in body → assign, even though "?" also present + assert infer_intent("@张飞 这个模块交给你,有问题?") == "assign" + + def test_collaborate_takes_priority_over_help(self): + """collaborate 关键词优先于 help 关键词。""" + # "请帮忙" in body → collaborate, even though "?" absent + assert infer_intent("@赵云 请帮忙看看数据") == "collaborate"