[moz] feat: Runaway Guard per-task dispatch 上限
§15 Runaway Guard — per-task dispatch_count 上限,防止无限循环 dispatch 问题:mail/toolchain task 走 handler auto-working(跳过 claim),不受 claim_timeout 3 次重试兜底保护。如果反复 spawn 但永远到不了 done/failed, 会无限循环消耗资源(实际案例:2026-06-15 mention 重复投递事件)。 设计: - tasks 表新增 dispatch_count 字段 - 每次 ticker 成功 dispatch 时递增 - dispatch_count >= 10 时自动标 failed(reason=runaway_guard) - 覆盖所有非终态(pending/working/claimed) - 参考 Hermes v0.13 §3 Per-Task 重试上限 改动文件: - src/blackboard/db.py: _safe_add_column dispatch_count - src/blackboard/models.py: Task dataclass 加 dispatch_count - src/daemon/ticker.py: dispatch 递增 + _check_timeouts runaway guard - docs/design/15-runaway-guard.md: 设计文档 - tests/integration/test_ticker_integration.py: E13 测试 3 个 测试:456 passed, 3 skipped
This commit is contained in:
@@ -543,3 +543,94 @@ class TestCheckTimeoutsUnified:
|
||||
reclaimed = ticker._check_timeouts(db_path)
|
||||
|
||||
assert "t-review-dead" not in reclaimed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# E13: §15 Runaway Guard — per-task dispatch_count 上限
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRunawayGuard:
|
||||
"""E13: dispatch_count >= 10 → 自动标 failed(覆盖所有非终态)"""
|
||||
|
||||
@pytest.fixture
|
||||
def guard_project(self, tmp_path):
|
||||
"""创建项目 + 任务"""
|
||||
data_root = tmp_path / "projects"
|
||||
registry = ProjectRegistry(data_root)
|
||||
registry.create_project("guard-proj", "Guard Test", agents=["agent-a"])
|
||||
db_path = data_root / "guard-proj" / "blackboard.db"
|
||||
bb = Blackboard(db_path)
|
||||
return registry, db_path, bb
|
||||
|
||||
def test_runaway_guard_triggers_working(self, guard_project):
|
||||
"""E13.1: working 状态 dispatch_count >= 10 → 标 failed"""
|
||||
registry, db_path, bb = guard_project
|
||||
|
||||
bb.create_task(Task(
|
||||
id="t-runaway", title="Runaway Task", status="working",
|
||||
assigned_by="daemon", current_agent="agent-a",
|
||||
))
|
||||
|
||||
conn = bb._conn()
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE tasks SET dispatch_count = 10 WHERE id = ?", ("t-runaway",))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
ticker = Ticker(registry, tick_interval=30)
|
||||
reclaimed = ticker._check_timeouts(db_path)
|
||||
|
||||
assert "t-runaway" in reclaimed
|
||||
task = Queries(db_path).task_by_id("t-runaway")
|
||||
assert task.status == "failed"
|
||||
|
||||
def test_runaway_guard_triggers_pending(self, guard_project):
|
||||
"""E13.2: pending 状态 dispatch_count >= 10 → 标 failed"""
|
||||
registry, db_path, bb = guard_project
|
||||
|
||||
bb.create_task(Task(
|
||||
id="t-pending-runaway", title="Pending Runaway", status="pending",
|
||||
assigned_by="daemon",
|
||||
))
|
||||
|
||||
conn = bb._conn()
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE tasks SET dispatch_count = 10 WHERE id = ?",
|
||||
("t-pending-runaway",))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
ticker = Ticker(registry, tick_interval=30)
|
||||
reclaimed = ticker._check_timeouts(db_path)
|
||||
|
||||
assert "t-pending-runaway" in reclaimed
|
||||
task = Queries(db_path).task_by_id("t-pending-runaway")
|
||||
assert task.status == "failed"
|
||||
|
||||
def test_runaway_guard_not_triggered(self, guard_project):
|
||||
"""E13.3: dispatch_count < 10 → 正常流程不受影响"""
|
||||
registry, db_path, bb = guard_project
|
||||
|
||||
bb.create_task(Task(
|
||||
id="t-normal", title="Normal Task", status="working",
|
||||
assigned_by="daemon", current_agent="agent-a",
|
||||
))
|
||||
|
||||
conn = bb._conn()
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE tasks SET dispatch_count = 5 WHERE id = ?", ("t-normal",))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
ticker = Ticker(registry, tick_interval=30)
|
||||
reclaimed = ticker._check_timeouts(db_path)
|
||||
|
||||
assert "t-normal" not in reclaimed
|
||||
task = Queries(db_path).task_by_id("t-normal")
|
||||
assert task.status == "working"
|
||||
|
||||
Reference in New Issue
Block a user