Merge pull request 'fix(CI): notify竞态修复 + 双倍触发去重 (PR #12 rebase, reviewed & approved)' (#14) from fix/ci-dedup-v2 into main
This commit was merged in pull request #14.
This commit is contained in:
+18
-17
@@ -1,9 +1,10 @@
|
|||||||
# CI 管道 — moziplus v2.0
|
# CI 管道 — moziplus v2.0
|
||||||
#
|
#
|
||||||
# 触发条件:
|
# 触发条件:
|
||||||
# - push(非 main 分支)
|
|
||||||
# - pull_request(opened, synchronize)
|
# - pull_request(opened, synchronize)
|
||||||
#
|
#
|
||||||
|
# 注意:只保留 pull_request 触发,避免 push + pull_request 双倍触发
|
||||||
|
#
|
||||||
# Gitea v1.23.4 限制注意:
|
# Gitea v1.23.4 限制注意:
|
||||||
# - 不支持 failure() 表达式,用 always() + shell 条件判断替代
|
# - 不支持 failure() 表达式,用 always() + shell 条件判断替代
|
||||||
# - 不支持 concurrency / continue-on-error / timeout-minutes / permissions
|
# - 不支持 concurrency / continue-on-error / timeout-minutes / permissions
|
||||||
@@ -13,10 +14,6 @@
|
|||||||
name: CI
|
name: CI
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- '**'
|
|
||||||
- '!main'
|
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [opened, synchronize]
|
types: [opened, synchronize]
|
||||||
|
|
||||||
@@ -53,7 +50,8 @@ jobs:
|
|||||||
/tmp/ci-venv-test/bin/pytest tests/ -m "not e2e" -x -q
|
/tmp/ci-venv-test/bin/pytest tests/ -m "not e2e" -x -q
|
||||||
|
|
||||||
# ── Job 3: CI 失败通知 ───────────────────────────────
|
# ── Job 3: CI 失败通知 ───────────────────────────────
|
||||||
# v1.23 不支持 failure(),用 always() + shell 检查 commit status 替代
|
# 使用 needs.<job>.result 直接判断,不查询 commit status API
|
||||||
|
# 根因:notify 自身的 pending status 会污染 commit status 查询结果(竞态条件)
|
||||||
notify-on-failure:
|
notify-on-failure:
|
||||||
runs-on: macos-arm64
|
runs-on: macos-arm64
|
||||||
needs: [lint, test]
|
needs: [lint, test]
|
||||||
@@ -62,31 +60,34 @@ jobs:
|
|||||||
- name: Check results and notify
|
- name: Check results and notify
|
||||||
env:
|
env:
|
||||||
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||||
|
LINT_RESULT: ${{ needs.lint.result }}
|
||||||
|
TEST_RESULT: ${{ needs.test.result }}
|
||||||
run: |
|
run: |
|
||||||
# 查询当前 commit 的 status
|
echo "Lint result: $LINT_RESULT"
|
||||||
STATUS=$(curl -sf \
|
echo "Test result: $TEST_RESULT"
|
||||||
-H "Authorization: token $GITEA_TOKEN" \
|
|
||||||
"${{ gitea.api_url }}/repos/${{ gitea.repository }}/commits/${{ gitea.sha }}/status" \
|
|
||||||
| python3 -c "import sys,json; print(json.load(sys.stdin).get('state',''))" 2>/dev/null || echo "")
|
|
||||||
|
|
||||||
echo "Commit status: $STATUS"
|
# 只有 lint 或 test 明确失败时才发通知
|
||||||
|
if [ "$LINT_RESULT" = "failure" ] || [ "$TEST_RESULT" = "failure" ]; then
|
||||||
if [ "$STATUS" != "success" ]; then
|
echo "CI has failures, sending notification..."
|
||||||
echo "CI failed or status unknown, sending notification..."
|
|
||||||
|
|
||||||
# 如果是 PR 事件,写评论通知
|
# 如果是 PR 事件,写评论通知
|
||||||
PR_NUMBER="${{ gitea.event.pull_request.number }}"
|
PR_NUMBER="${{ gitea.event.pull_request.number }}"
|
||||||
if [ -n "$PR_NUMBER" ]; then
|
if [ -n "$PR_NUMBER" ]; then
|
||||||
|
# 构建失败摘要
|
||||||
|
FAILED_JOBS=""
|
||||||
|
[ "$LINT_RESULT" = "failure" ] && FAILED_JOBS="${FAILED_JOBS}lint "
|
||||||
|
[ "$TEST_RESULT" = "failure" ] && FAILED_JOBS="${FAILED_JOBS}test "
|
||||||
|
|
||||||
curl -sf -X POST \
|
curl -sf -X POST \
|
||||||
-H "Authorization: token $GITEA_TOKEN" \
|
-H "Authorization: token $GITEA_TOKEN" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
"${{ gitea.api_url }}/repos/${{ gitea.repository }}/issues/${PR_NUMBER}/comments" \
|
"${{ gitea.api_url }}/repos/${{ gitea.repository }}/issues/${PR_NUMBER}/comments" \
|
||||||
-d "{\"body\": \"[CI] 失败\\n\\n分支: ${{ gitea.ref_name }}\\n触发 commit: \`${{ gitea.sha }}\`\\n请检查 CI 日志并修复。\"}" \
|
-d "{\"body\": \"[CI] 失败\\n\\n分支: ${{ gitea.ref_name }}\\n触发 commit: \`${{ gitea.sha }}\`\\n失败 Job: ${FAILED_JOBS}\\n请检查 CI 日志并修复。\"}" \
|
||||||
|| echo "Failed to post PR comment"
|
|| echo "Failed to post PR comment"
|
||||||
echo "PR comment posted."
|
echo "PR comment posted."
|
||||||
else
|
else
|
||||||
echo "Not a PR event, skipping PR comment."
|
echo "Not a PR event, skipping PR comment."
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "CI passed, no notification needed."
|
echo "No explicit failures (results: lint=$LINT_RESULT, test=$TEST_RESULT), no notification needed."
|
||||||
fi
|
fi
|
||||||
|
|||||||
+20
-14
@@ -15,7 +15,7 @@ from datetime import datetime
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from src.blackboard.db import get_connection
|
from src.blackboard.db import get_connection, init_db
|
||||||
|
|
||||||
logger = logging.getLogger("moziplus-v2.spawner")
|
logger = logging.getLogger("moziplus-v2.spawner")
|
||||||
|
|
||||||
@@ -163,7 +163,6 @@ class AgentBusyError(Exception):
|
|||||||
|
|
||||||
#07: reason 字段区分具体原因,便于 dispatcher 层区分处理。
|
#07: reason 字段区分具体原因,便于 dispatcher 层区分处理。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, agent_id: str, reason: str = "busy", detail: Optional[dict] = None):
|
def __init__(self, agent_id: str, reason: str = "busy", detail: Optional[dict] = None):
|
||||||
self.agent_id = agent_id
|
self.agent_id = agent_id
|
||||||
self.reason = reason # counter_blocked / session_locked / session_running / session_compacting / session_stuck
|
self.reason = reason # counter_blocked / session_locked / session_running / session_compacting / session_stuck
|
||||||
@@ -300,7 +299,7 @@ class AgentSpawner:
|
|||||||
project_id, agent_id)
|
project_id, agent_id)
|
||||||
|
|
||||||
def _build_minimal_fallback(self, task_id, title, description, must_haves,
|
def _build_minimal_fallback(self, task_id, title, description, must_haves,
|
||||||
project_id, agent_id):
|
project_id, agent_id):
|
||||||
"""最小 fallback:只有任务上下文 + API 指令"""
|
"""最小 fallback:只有任务上下文 + API 指令"""
|
||||||
task_section = f"""## 任务
|
task_section = f"""## 任务
|
||||||
{title}
|
{title}
|
||||||
@@ -312,7 +311,7 @@ class AgentSpawner:
|
|||||||
return task_section + "\n\n---\n\n" + api_section
|
return task_section + "\n\n---\n\n" + api_section
|
||||||
|
|
||||||
def _build_api_section(self, project_id: str, task_id: str,
|
def _build_api_section(self, project_id: str, task_id: str,
|
||||||
agent_id: str) -> str:
|
agent_id: str) -> str:
|
||||||
"""构建 API 回写操作指令(BootstrapBuilder 模式下补充)"""
|
"""构建 API 回写操作指令(BootstrapBuilder 模式下补充)"""
|
||||||
# mail 任务直接 done,不走 review
|
# mail 任务直接 done,不走 review
|
||||||
success_status = '"done"' if project_id == "_mail" else '"review"'
|
success_status = '"done"' if project_id == "_mail" else '"review"'
|
||||||
@@ -338,8 +337,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def _build_discussion_prompt(self, task_id: str, title: str,
|
def _build_discussion_prompt(self, task_id: str, title: str,
|
||||||
description: str, must_haves: str,
|
description: str, must_haves: str,
|
||||||
project_id: str, agent_id: str) -> str:
|
project_id: str, agent_id: str) -> str:
|
||||||
"""构建讨论类 spawn prompt(§3.3 框架 + Boids)"""
|
"""构建讨论类 spawn prompt(§3.3 框架 + Boids)"""
|
||||||
goal_snapshot = description or title
|
goal_snapshot = description or title
|
||||||
constraints = must_haves or "(无特殊约束)"
|
constraints = must_haves or "(无特殊约束)"
|
||||||
@@ -380,8 +379,9 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
|||||||
return router.agent_profiles.get(agent_id)
|
return router.agent_profiles.get(agent_id)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _build_mail_prompt(self, task_id: str, title: str, description: str,
|
def _build_mail_prompt(self, task_id: str, title: str, description: str,
|
||||||
must_haves: str, agent_id: str) -> str:
|
must_haves: str, agent_id: str) -> str:
|
||||||
"""构建 Mail 专用精简模板"""
|
"""构建 Mail 专用精简模板"""
|
||||||
# 解析 must_haves 获取 from 和 performative
|
# 解析 must_haves 获取 from 和 performative
|
||||||
from_agent = agent_id
|
from_agent = agent_id
|
||||||
@@ -575,7 +575,7 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
|||||||
stderr=asyncio.subprocess.PIPE,
|
stderr=asyncio.subprocess.PIPE,
|
||||||
)
|
)
|
||||||
self._register_session(session_id, agent_id, task_id, proc.pid,
|
self._register_session(session_id, agent_id, task_id, proc.pid,
|
||||||
broadcast_task_ids=broadcast_task_ids)
|
broadcast_task_ids=broadcast_task_ids)
|
||||||
logger.info("Spawned agent %s (session=%s, pid=%d)",
|
logger.info("Spawned agent %s (session=%s, pid=%d)",
|
||||||
agent_id, session_id, proc.pid)
|
agent_id, session_id, proc.pid)
|
||||||
|
|
||||||
@@ -848,10 +848,13 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
|||||||
# A8(gateway_unreachable), A11(lock_conflict),
|
# A8(gateway_unreachable), A11(lock_conflict),
|
||||||
# A10(compact_failed), A12(agent_error)
|
# A10(compact_failed), A12(agent_error)
|
||||||
# v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间
|
# v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间
|
||||||
if outcome in ("crashed", "compact_failed", "process_crash", "session_stuck",
|
if outcome == "crashed" and self.counter:
|
||||||
|
self.counter.set_cooldown(agent_id, seconds=60)
|
||||||
|
logger.info("Crash cooldown set for %s: 60s (outcome=%s)", agent_id, outcome)
|
||||||
|
elif outcome in ("compact_failed", "process_crash", "session_stuck",
|
||||||
"compact_hanging", "agent_error", "compact_interrupted") and self.counter:
|
"compact_hanging", "agent_error", "compact_interrupted") and self.counter:
|
||||||
self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟
|
self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟
|
||||||
logger.info("Crash/error cooldown set for %s: 300s (outcome=%s)", agent_id, outcome)
|
logger.info("Error cooldown set for %s: 300s (outcome=%s)", agent_id, outcome)
|
||||||
# F1: 不可恢复 outcome → 立刻标 failed + 写黑板
|
# F1: 不可恢复 outcome → 立刻标 failed + 写黑板
|
||||||
if outcome in ("auth_failed", "agent_error") and db_path and task_id:
|
if outcome in ("auth_failed", "agent_error") and db_path and task_id:
|
||||||
logger.error("Task %s: unrecoverable outcome=%s, marking failed immediately", task_id, outcome)
|
logger.error("Task %s: unrecoverable outcome=%s, marking failed immediately", task_id, outcome)
|
||||||
@@ -878,6 +881,9 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
stderr_text = b"".join(stderr_chunks).decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
# 检查 session 状态
|
||||||
state = self._check_session_state(agent_id)
|
state = self._check_session_state(agent_id)
|
||||||
|
|
||||||
# B1: 假死 - 先复活,连续假死 ≥2 次再 failed
|
# B1: 假死 - 先复活,连续假死 ≥2 次再 failed
|
||||||
@@ -1213,7 +1219,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
|||||||
实测 50KB 在长对话中不够(compact 记录被推出窗口导致漏检)。
|
实测 50KB 在长对话中不够(compact 记录被推出窗口导致漏检)。
|
||||||
正常扫描量不变:从尾部往前扫,遇到超过 15min 的 timestamp 即 break。
|
正常扫描量不变:从尾部往前扫,遇到超过 15min 的 timestamp 即 break。
|
||||||
"""
|
"""
|
||||||
if not session_file or not Path(session_file).exists():
|
if not session_file or not pathlib.Path(session_file).exists():
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
@@ -1422,7 +1428,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
|||||||
return defaults
|
return defaults
|
||||||
|
|
||||||
def _update_retry_counts(self, db_path: Optional[Path],
|
def _update_retry_counts(self, db_path: Optional[Path],
|
||||||
task_id: Optional[str], counts: dict):
|
task_id: Optional[str], counts: dict):
|
||||||
"""将 retry counts 写回最新 task_attempt 的 metadata"""
|
"""将 retry counts 写回最新 task_attempt 的 metadata"""
|
||||||
if not db_path or not task_id:
|
if not db_path or not task_id:
|
||||||
return
|
return
|
||||||
@@ -1482,8 +1488,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
|||||||
from src.blackboard.operations import Blackboard
|
from src.blackboard.operations import Blackboard
|
||||||
bb = Blackboard(db_path)
|
bb = Blackboard(db_path)
|
||||||
cid = bb.add_comment(task_id, "daemon",
|
cid = bb.add_comment(task_id, "daemon",
|
||||||
f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入",
|
f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入",
|
||||||
comment_type="system")
|
comment_type="system")
|
||||||
bb.record_mentions(cid, task_id, ["pangtong-fujunshi"])
|
bb.record_mentions(cid, task_id, ["pangtong-fujunshi"])
|
||||||
logger.info("Task %s: failure notified pangtong via comment+mention (reason=%s)", task_id, reason)
|
logger.info("Task %s: failure notified pangtong via comment+mention (reason=%s)", task_id, reason)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user