From 05201d778e646a11a6eb6c40e817d29b4f47d2df Mon Sep 17 00:00:00 2001 From: cfdaily Date: Tue, 9 Jun 2026 22:49:10 +0800 Subject: [PATCH 1/3] =?UTF-8?q?fix(ci):=20=E5=8E=BB=E6=8E=89push=E8=A7=A6?= =?UTF-8?q?=E5=8F=91=E9=81=BF=E5=85=8D=E5=8F=8C=E5=80=8D=E8=A7=A6=E5=8F=91?= =?UTF-8?q?=20+=20=E4=BF=AE=E5=A4=8Dnotify=E8=AF=AF=E6=8A=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 触发器:去掉 push,只保留 pull_request(opened, synchronize) - 每次 push 到 PR 分支不再跑 2 次 CI 2. notify-on-failure:只有明确的 failure 状态才发通知 - 之前:空状态/unknown/pending 都触发通知(误报根因) - 现在:只有 STATUS=failure 才发通知 3. venv 路径:统一用 /tmp/ci-venv-lint 和 /tmp/ci-venv-test - 避免 host 模式下与开发目录 .venv 冲突 --- .gitea/workflows/ci.yml | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 3bb1537..bb73040 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -1,9 +1,10 @@ # CI 管道 — moziplus v2.0 # # 触发条件: -# - push(非 main 分支) # - pull_request(opened, synchronize) # +# 注意:只保留 pull_request 触发,避免 push + pull_request 双倍触发 +# # Gitea v1.23.4 限制注意: # - 不支持 failure() 表达式,用 always() + shell 条件判断替代 # - 不支持 concurrency / continue-on-error / timeout-minutes / permissions @@ -13,10 +14,6 @@ name: CI on: - push: - branches: - - '**' - - '!main' pull_request: types: [opened, synchronize] @@ -54,6 +51,7 @@ jobs: # ── Job 3: CI 失败通知 ─────────────────────────────── # v1.23 不支持 failure(),用 always() + shell 检查 commit status 替代 + # 修复:只有明确的 failure 才发通知,空状态/未知状态不发(避免误报) notify-on-failure: runs-on: macos-arm64 needs: [lint, test] @@ -69,10 +67,11 @@ jobs: "${{ gitea.api_url }}/repos/${{ gitea.repository }}/commits/${{ gitea.sha }}/status" \ | python3 -c "import sys,json; print(json.load(sys.stdin).get('state',''))" 2>/dev/null || echo "") - echo "Commit status: $STATUS" + echo "Commit status: [$STATUS]" - if [ "$STATUS" != "success" ]; then - echo "CI failed or status unknown, sending notification..." + # 只在明确 failure 时发通知(空/unknown/success/pending 都不发) + if [ "$STATUS" = "failure" ]; then + echo "CI explicitly failed, sending notification..." # 如果是 PR 事件,写评论通知 PR_NUMBER="${{ gitea.event.pull_request.number }}" @@ -88,5 +87,5 @@ jobs: echo "Not a PR event, skipping PR comment." fi else - echo "CI passed, no notification needed." + echo "CI status is [$STATUS], no failure notification needed." fi From 20b3b5facb4b6e35faf2a6f0de06405b3ca48349 Mon Sep 17 00:00:00 2001 From: cfdaily Date: Tue, 9 Jun 2026 22:59:16 +0800 Subject: [PATCH 2/3] =?UTF-8?q?fix(ci):=20=E4=BF=AE=E5=A4=8Dnotify?= =?UTF-8?q?=E7=AB=9E=E6=80=81=E6=9D=A1=E4=BB=B6=20-=20=E7=94=A8needs.resul?= =?UTF-8?q?t=E6=9B=BF=E4=BB=A3commit=20status=E6=9F=A5=E8=AF=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 根因:notify-on-failure job 通过 commit status API 查询结果时, 自身的 pending status 会污染查询结果(竞态条件): 1. lint/test 都 success 2. notify 开始运行,自身状态 pending 写入 commit status 3. notify 查询 commit status → 看到 pending(自己的)≠ success 4. 误发 [CI] 失败 评论 + webhook 触发 Mail 通知 修复方案: - 不再查询 commit status API - 直接用 needs.lint.result 和 needs.test.result 判断 - 只有明确的 failure 才发通知 - 同时去掉 push 触发避免双倍运行 --- .gitea/workflows/ci.yml | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index bb73040..4b98af7 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -50,8 +50,8 @@ jobs: /tmp/ci-venv-test/bin/pytest tests/ -m "not e2e" -x -q # ── Job 3: CI 失败通知 ─────────────────────────────── - # v1.23 不支持 failure(),用 always() + shell 检查 commit status 替代 - # 修复:只有明确的 failure 才发通知,空状态/未知状态不发(避免误报) + # 使用 needs..result 直接判断,不查询 commit status API + # 根因:notify 自身的 pending status 会污染 commit status 查询结果(竞态条件) notify-on-failure: runs-on: macos-arm64 needs: [lint, test] @@ -60,32 +60,34 @@ jobs: - name: Check results and notify env: GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }} + LINT_RESULT: ${{ needs.lint.result }} + TEST_RESULT: ${{ needs.test.result }} run: | - # 查询当前 commit 的 status - STATUS=$(curl -sf \ - -H "Authorization: token $GITEA_TOKEN" \ - "${{ gitea.api_url }}/repos/${{ gitea.repository }}/commits/${{ gitea.sha }}/status" \ - | python3 -c "import sys,json; print(json.load(sys.stdin).get('state',''))" 2>/dev/null || echo "") + echo "Lint result: $LINT_RESULT" + echo "Test result: $TEST_RESULT" - echo "Commit status: [$STATUS]" - - # 只在明确 failure 时发通知(空/unknown/success/pending 都不发) - if [ "$STATUS" = "failure" ]; then - echo "CI explicitly failed, sending notification..." + # 只有 lint 或 test 明确失败时才发通知 + if [ "$LINT_RESULT" = "failure" ] || [ "$TEST_RESULT" = "failure" ]; then + echo "CI has failures, sending notification..." # 如果是 PR 事件,写评论通知 PR_NUMBER="${{ gitea.event.pull_request.number }}" if [ -n "$PR_NUMBER" ]; then + # 构建失败摘要 + FAILED_JOBS="" + [ "$LINT_RESULT" = "failure" ] && FAILED_JOBS="${FAILED_JOBS}lint " + [ "$TEST_RESULT" = "failure" ] && FAILED_JOBS="${FAILED_JOBS}test " + curl -sf -X POST \ -H "Authorization: token $GITEA_TOKEN" \ -H "Content-Type: application/json" \ "${{ gitea.api_url }}/repos/${{ gitea.repository }}/issues/${PR_NUMBER}/comments" \ - -d "{\"body\": \"[CI] 失败\\n\\n分支: ${{ gitea.ref_name }}\\n触发 commit: \`${{ gitea.sha }}\`\\n请检查 CI 日志并修复。\"}" \ + -d "{\"body\": \"[CI] 失败\\n\\n分支: ${{ gitea.ref_name }}\\n触发 commit: \`${{ gitea.sha }}\`\\n失败 Job: ${FAILED_JOBS}\\n请检查 CI 日志并修复。\"}" \ || echo "Failed to post PR comment" echo "PR comment posted." else echo "Not a PR event, skipping PR comment." fi else - echo "CI status is [$STATUS], no failure notification needed." + echo "No explicit failures (results: lint=$LINT_RESULT, test=$TEST_RESULT), no notification needed." fi From ee1ef23acea444d0a04378cd8eba4fa62bf0dca0 Mon Sep 17 00:00:00 2001 From: cfdaily Date: Tue, 9 Jun 2026 23:35:02 +0800 Subject: [PATCH 3/3] =?UTF-8?q?fix(spawner):=20crash=20cooldown=E5=88=86?= =?UTF-8?q?=E7=BA=A7=20+=20inform=20mail=20crash=E8=AF=AF=E6=A0=87done?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - crashed outcome cooldown 60s(vs 其他 300s) - import init_db - whitespace/lint fixes --- src/daemon/spawner.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/daemon/spawner.py b/src/daemon/spawner.py index aee09ea..7876435 100644 --- a/src/daemon/spawner.py +++ b/src/daemon/spawner.py @@ -15,7 +15,7 @@ from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional -from src.blackboard.db import get_connection +from src.blackboard.db import get_connection, init_db logger = logging.getLogger("moziplus-v2.spawner") @@ -163,7 +163,6 @@ class AgentBusyError(Exception): #07: reason 字段区分具体原因,便于 dispatcher 层区分处理。 """ - def __init__(self, agent_id: str, reason: str = "busy", detail: Optional[dict] = None): self.agent_id = agent_id self.reason = reason # counter_blocked / session_locked / session_running / session_compacting / session_stuck @@ -300,7 +299,7 @@ class AgentSpawner: project_id, agent_id) def _build_minimal_fallback(self, task_id, title, description, must_haves, - project_id, agent_id): + project_id, agent_id): """最小 fallback:只有任务上下文 + API 指令""" task_section = f"""## 任务 {title} @@ -312,7 +311,7 @@ class AgentSpawner: return task_section + "\n\n---\n\n" + api_section def _build_api_section(self, project_id: str, task_id: str, - agent_id: str) -> str: + agent_id: str) -> str: """构建 API 回写操作指令(BootstrapBuilder 模式下补充)""" # mail 任务直接 done,不走 review success_status = '"done"' if project_id == "_mail" else '"review"' @@ -338,8 +337,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta """ def _build_discussion_prompt(self, task_id: str, title: str, - description: str, must_haves: str, - project_id: str, agent_id: str) -> str: + description: str, must_haves: str, + project_id: str, agent_id: str) -> str: """构建讨论类 spawn prompt(§3.3 框架 + Boids)""" goal_snapshot = description or title constraints = must_haves or "(无特殊约束)" @@ -380,8 +379,9 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta return router.agent_profiles.get(agent_id) return None + def _build_mail_prompt(self, task_id: str, title: str, description: str, - must_haves: str, agent_id: str) -> str: + must_haves: str, agent_id: str) -> str: """构建 Mail 专用精简模板""" # 解析 must_haves 获取 from 和 performative from_agent = agent_id @@ -575,7 +575,7 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta stderr=asyncio.subprocess.PIPE, ) self._register_session(session_id, agent_id, task_id, proc.pid, - broadcast_task_ids=broadcast_task_ids) + broadcast_task_ids=broadcast_task_ids) logger.info("Spawned agent %s (session=%s, pid=%d)", agent_id, session_id, proc.pid) @@ -848,10 +848,13 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_ # A8(gateway_unreachable), A11(lock_conflict), # A10(compact_failed), A12(agent_error) # v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间 - if outcome in ("crashed", "compact_failed", "process_crash", "session_stuck", + if outcome == "crashed" and self.counter: + self.counter.set_cooldown(agent_id, seconds=60) + logger.info("Crash cooldown set for %s: 60s (outcome=%s)", agent_id, outcome) + elif outcome in ("compact_failed", "process_crash", "session_stuck", "compact_hanging", "agent_error", "compact_interrupted") and self.counter: self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟 - logger.info("Crash/error cooldown set for %s: 300s (outcome=%s)", agent_id, outcome) + logger.info("Error cooldown set for %s: 300s (outcome=%s)", agent_id, outcome) # F1: 不可恢复 outcome → 立刻标 failed + 写黑板 if outcome in ("auth_failed", "agent_error") and db_path and task_id: logger.error("Task %s: unrecoverable outcome=%s, marking failed immediately", task_id, outcome) @@ -878,6 +881,9 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_ except Exception: pass + stderr_text = b"".join(stderr_chunks).decode("utf-8", errors="replace") + + # 检查 session 状态 state = self._check_session_state(agent_id) # B1: 假死 - 先复活,连续假死 ≥2 次再 failed @@ -1213,7 +1219,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_ 实测 50KB 在长对话中不够(compact 记录被推出窗口导致漏检)。 正常扫描量不变:从尾部往前扫,遇到超过 15min 的 timestamp 即 break。 """ - if not session_file or not Path(session_file).exists(): + if not session_file or not pathlib.Path(session_file).exists(): return False try: from datetime import datetime, timezone @@ -1422,7 +1428,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_ return defaults def _update_retry_counts(self, db_path: Optional[Path], - task_id: Optional[str], counts: dict): + task_id: Optional[str], counts: dict): """将 retry counts 写回最新 task_attempt 的 metadata""" if not db_path or not task_id: return @@ -1482,8 +1488,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_ from src.blackboard.operations import Blackboard bb = Blackboard(db_path) cid = bb.add_comment(task_id, "daemon", - f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入", - comment_type="system") + f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入", + comment_type="system") bb.record_mentions(cid, task_id, ["pangtong-fujunshi"]) logger.info("Task %s: failure notified pangtong via comment+mention (reason=%s)", task_id, reason) except Exception as e: