chore: simayi-approved changes - lint fixes, toolchain improvements, healthz

All changes reviewed and APPROVED in PR #12 (Review ID: 40):
- toolchain_routes: webhook repo/org format compat, content dedup (sha256), closed issue filter
- dispatcher: inform mail crash 误标 done 修复
- ticker: cleanup and improvements
- healthz endpoint
- conftest: integration/e2e deselect markers
- docs: design docs, test-guide updates
- various lint/whitespace fixes across 30 files
This commit is contained in:
cfdaily
2026-06-09 23:35:36 +08:00
parent a1a4d7c5a7
commit f7fbdac89c
30 changed files with 362 additions and 125 deletions
+2 -1
View File
@@ -11,7 +11,8 @@ A 类 Skill 由引擎确定性注入全文,不靠 Description 触发。
import logging
import os
from typing import Any, List
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger("moziplus-v2.bootstrap")
+1 -1
View File
@@ -73,7 +73,7 @@ class ActiveAgentCounter:
cd = seconds if seconds is not None else self._default_cooldown_seconds
self._cooldown_until[agent_id] = time.time() + cd
logger.info("Cooldown set for %s: %.0fs (until %.0f)",
agent_id, cd, self._cooldown_until[agent_id])
agent_id, cd, self._cooldown_until[agent_id])
async def can_acquire(self, agent_id: str, session_id: str = "main") -> bool:
"""三层检查:cooldown → global → per agent → per session key"""
+18 -10
View File
@@ -14,6 +14,7 @@ from __future__ import annotations
import json
import logging
import sqlite3
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional
@@ -21,7 +22,7 @@ from typing import Any, Dict, List, Optional
from src.blackboard.models import Task
from src.blackboard.db import get_connection
from src.daemon.spawner import AgentBusyError
from src.daemon.router import AgentRouter
from src.daemon.router import AgentRouter, RouteDecision
logger = logging.getLogger("moziplus-v2.dispatcher")
@@ -193,7 +194,6 @@ class Dispatcher:
_task_id = task.id
_mail_db = db_path
_disp = self
def _mail_on_checks_passed():
nonlocal _mail_marked_working
if not _disp._mail_auto_working(_task_id, _mail_db):
@@ -203,8 +203,8 @@ class Dispatcher:
# 构建 spawn message
message = self._build_spawn_message(task, agent_id, project_config,
mode=decision.get("mode", ""),
spawn_type=action_type or "executor")
mode=decision.get("mode", ""),
spawn_type=action_type or "executor")
# v2.7.2: on_complete 只含业务逻辑,不含 counter.release
# counter.release 由 spawn_full_agent 内部的 wrapped_on_complete 保证
@@ -218,7 +218,7 @@ class Dispatcher:
def _mail_on_complete(aid, outcome):
# 幻觉门控:检查是否有回复,自动标 done/failed
try:
_dispatcher._mail_auto_complete(_task_id, aid, _mail_db, _must_haves)
_dispatcher._mail_auto_complete(_task_id, aid, _mail_db, _must_haves, outcome=outcome)
except Exception as e:
logger.error("Mail %s: on_complete error: %s", _task_id, e)
on_complete = _mail_on_complete
@@ -269,8 +269,8 @@ class Dispatcher:
from src.blackboard.blackboard import Blackboard
bb = Blackboard(_task_db)
bb.add_comment(_task_id, "daemon",
f"@{task_row['assignee']} 审查结论: {verdict_str},请查看详情并决定接受或反驳",
comment_type="review")
f"@{task_row['assignee']} 审查结论: {verdict_str},请查看详情并决定接受或反驳",
comment_type="review")
logger.info("Task %s: review verdict=%s, notified assignee=%s",
_task_id, verdict_str, task_row["assignee"] if task_row else "?")
# 不标 done,保持 review 状态
@@ -576,7 +576,7 @@ class Dispatcher:
def _mail_oc_legacy(aid, outcome):
try:
_disp._mail_auto_complete(_t_id, aid, _m_db, _m_mh)
_disp._mail_auto_complete(_t_id, aid, _m_db, _m_mh, outcome=outcome)
except Exception as e:
logger.error("Mail %s: legacy on_complete error: %s", _t_id, e)
on_complete_legacy = _mail_oc_legacy
@@ -661,7 +661,7 @@ class Dispatcher:
logger.error("Mail %s: failed to revert to pending: %s", task_id, e)
def _mail_auto_complete(self, task_id: str, agent_id: str,
db_path: Path, must_haves: str) -> None:
db_path: Path, must_haves: str, outcome=None) -> None:
"""Mail 任务:on_complete 后自动标 done/failed(含幻觉门控)"""
try:
# 解析 performative
@@ -712,6 +712,14 @@ class Dispatcher:
logger.error("Mail %s: all 3 failed attempts failed, leaving for ticker", task_id)
return
# inform 类型:只对成功 outcome 标 done,失败 outcome 留 working 等 ticker 重投
# Task 路径不受此 bug 影响(走 _task_auto_complete 独立逻辑)
if performative == "inform":
INFORM_DONE_OUTCOMES = {"completed", "claimed", "no_reply"}
if outcome not in INFORM_DONE_OUTCOMES:
logger.info("Mail %s: inform outcome=%s, skip auto-done", task_id, outcome)
return
# 标 done(重试 3 次)
for attempt in range(3):
try:
@@ -858,7 +866,7 @@ class Dispatcher:
logger.error("Task %s: mark status error: %s", task_id, e)
@staticmethod
def _check_crash_limit(task_id: str, db_path: Path, limit: int = 3,
def _check_crash_limit(task_id: str, db_path: pathlib.Path, limit: int = 3,
window_minutes: int = 30) -> bool:
"""v2.8.1 Fix-3c: 检查 task 最近 window_minutes 内的 crash 次数是否超限。
+3 -3
View File
@@ -14,7 +14,7 @@ import logging
import re
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger("moziplus-v2.experience")
@@ -68,7 +68,7 @@ class Experience:
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> Experience:
return cls(**{k: v for k, v in data.items() if k != "id"},
experience_id=data.get("id"))
experience_id=data.get("id"))
class ExperienceStore:
@@ -284,7 +284,7 @@ class ExperienceDistiller:
all_tags.append(task_type)
results = self.store.search(tags=all_tags if all_tags else None,
query=query, limit=limit)
query=query, limit=limit)
# 按置信度排序
results.sort(key=lambda e: e.confidence, reverse=True)
+1 -1
View File
@@ -4,7 +4,7 @@ from __future__ import annotations
import logging
import re
from dataclasses import dataclass
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
+4 -2
View File
@@ -9,9 +9,9 @@ from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import Any, Dict
from typing import Any, Dict, Optional
from src.blackboard.db import get_connection
from src.blackboard.db import get_connection, init_db
from src.blackboard.queries import Queries
logger = logging.getLogger("moziplus-v2.health")
@@ -41,6 +41,7 @@ class HealthChecker:
{"healthy": bool, "zombie": bool, "stale_ticks": int,
"alert_written": bool, "resolved": bool}
"""
db_key = str(db_path)
result: Dict[str, Any] = {
"healthy": True,
"zombie": False,
@@ -57,6 +58,7 @@ class HealthChecker:
# 用 event count 变化判断是否有真实变更
conn = queries._conn()
try:
total_events = conn.execute("SELECT COUNT(*) FROM events").fetchone()[0]
non_tick_events = conn.execute(
"SELECT COUNT(*) FROM events WHERE event_type != 'daemon_tick' "
"AND event_type != 'agent_zombie_detected'"
+3 -2
View File
@@ -15,6 +15,7 @@ from __future__ import annotations
import asyncio
import json
import logging
import os
from pathlib import Path
from typing import Any, Callable, Coroutine, Dict, List, Optional
@@ -56,7 +57,7 @@ class InboxWatcher:
self._running = True
self._task = asyncio.create_task(self._loop())
logger.info("Inbox watcher started (path=%s, interval=%.1fs)",
self.inbox_path, self.watch_interval)
self.inbox_path, self.watch_interval)
async def stop(self) -> None:
"""停止监听"""
@@ -68,7 +69,7 @@ class InboxWatcher:
except asyncio.CancelledError:
pass
logger.info("Inbox watcher stopped (processed=%d, errors=%d)",
self._total_processed, self._total_errors)
self._total_processed, self._total_errors)
@property
def is_running(self) -> bool:
+1 -1
View File
@@ -108,7 +108,7 @@ def notify_mail_failed(db_path: Path, original_mail_id: str,
)
bb.create_task(notify_task)
logger.info("Mail %s: sent failure notification to %s (original_sender=%s, reason=%s, notify_id=%s)",
original_mail_id, target_agent, from_agent, reason, notify_id)
original_mail_id, target_agent, from_agent, reason, notify_id)
except Exception as e:
logger.warning("notify_mail_failed: failed to send notification for mail %s: %s", original_mail_id, e)
+4 -1
View File
@@ -8,12 +8,15 @@ from __future__ import annotations
import json
import logging
import re
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
from typing import Any, Callable, Dict, List, Optional, Tuple
from src.blackboard.models import Task
from src.blackboard.operations import Blackboard
from src.blackboard.queries import Queries
logger = logging.getLogger("moziplus-v2.review")
+2 -1
View File
@@ -10,11 +10,12 @@ from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Callable, Dict, List, Optional, Tuple
logger = logging.getLogger("moziplus-v2.skill")
+4 -6
View File
@@ -1373,13 +1373,11 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
# A17: 真正的 crash → 保持 working,ticker 兜底
return {"outcome": "crashed", "should_retry": False, "original": "process_crash"}
# stdout 为空但 exit=0:可能是正常完成但 --json 没输出
# 查任务状态判断
# A13 revised: stdout 为空但 exit=0 → 信任进程退出码,视为正常完成
# 实测发现 openclaw session=None + exit=0 是正常场景(inform 通知等)
# 旧逻辑按 task_status 区分,非终态判 agent_error → 导致 inform 邮件永不标 done
if status is None and not stdout_text.strip() and exit_code == 0:
terminal_statuses = {"done", "review"}
if task_status in terminal_statuses:
return {"outcome": "completed", "should_retry": False}
return {"outcome": "agent_error", "should_retry": False}
return {"outcome": "completed", "should_retry": False}
# A7-A12: status=error → 不续杯,stderr 辅助分类
if status == "error":
+4 -1
View File
@@ -9,11 +9,14 @@ from __future__ import annotations
import asyncio
import json
import logging
import subprocess
import uuid
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Set
from src.blackboard.models import Event
logger = logging.getLogger("moziplus-v2.sse")
+26 -25
View File
@@ -21,6 +21,7 @@ from dataclasses import dataclass, field as dc_field
from src.blackboard.operations import Blackboard
from src.blackboard.db import get_connection
from src.blackboard.models import Task
from src.daemon.spawner import AgentBusyError
from src.blackboard.queries import Queries
from src.blackboard.registry import ProjectRegistry
@@ -34,7 +35,6 @@ class BroadcastRound:
responded_agents: set = dc_field(default_factory=set) # 已返回反馈的 Agent(含 NO_REPLY
round_number: int = 0 # 当前第几轮(0=未开始,1=第1轮)
logger = logging.getLogger("moziplus-v2.ticker")
@@ -391,7 +391,7 @@ class Ticker:
MAX_ROUNDS = 5 # §4.5 防无限循环
async def _check_round_complete(self, db_path: Path,
project_id: str) -> List[str]:
project_id: str) -> List[str]:
"""检测 parent task 下所有 sub task 终态 → spawn 庞统 review
流程§4.4
@@ -462,7 +462,7 @@ class Ticker:
"Round %d review spawned for parent %s (subs: %s)",
new_round, parent_id, summary
)
except Exception:
except Exception as e:
logger.exception("Round check error for parent %s", parent_id)
return reviewed
@@ -531,9 +531,9 @@ Parent Task ID: {parent_task.id}
"""
async def _spawn_pangtong_review(self, parent_task,
review_prompt: str,
project_id: str,
new_round: int = 0) -> bool:
review_prompt: str,
project_id: str,
new_round: int = 0) -> bool:
"""Spawn 庞统进行 review
流程
@@ -543,6 +543,7 @@ Parent Task ID: {parent_task.id}
"""
try:
agent_id = "pangtong-fujunshi"
session_id = f"review-{parent_task.id}-r{new_round}"
# 构造 on_complete 回调:解析庞统结论,更新 parent 状态
async def _on_review_complete(aid: str, outcome: str):
@@ -585,7 +586,7 @@ Parent Task ID: {parent_task.id}
self._set_parent_reviewing(parent_task.id, project_id)
return True
return False
except Exception:
except Exception as e:
logger.exception("Failed to spawn pangtong review for %s", parent_task.id)
return False
@@ -602,14 +603,14 @@ Parent Task ID: {parent_task.id}
(parent_id,))
conn.commit()
logger.info("Parent %s → reviewing (round review in progress)",
parent_id)
parent_id)
finally:
conn.close()
except Exception:
logger.exception("Failed to set parent %s to reviewing", parent_id)
def _handle_review_conclusion(self, parent_id: str, project_id: str,
review_text: str, round_num: int):
review_text: str, round_num: int):
"""解析庞统 review 结论,更新 parent 状态
review_text 是庞统回复的文本 spawner session meta payloads 拼接
@@ -664,8 +665,8 @@ Parent Task ID: {parent_task.id}
def _resolve_db_path(self, project_id: str) -> Path:
"""解析项目 DB 路径"""
import src.utils as _utils
return _utils.get_data_root() / project_id / "blackboard.db"
from src.utils import get_data_root
return get_data_root() / project_id / "blackboard.db"
# ------------------------------------------------------------------
# @mention 通知处理 (v2.9 #01)
@@ -674,7 +675,7 @@ Parent Task ID: {parent_task.id}
MENTION_MAX_RETRIES = 5
async def _process_mentions(self, db_path: Path,
project_id: str) -> List[str]:
project_id: str) -> List[str]:
"""扫描 pending mentions → spawn 被 @ 的 Agent
流程§3.4
@@ -766,8 +767,8 @@ Parent Task ID: {parent_task.id}
from src.blackboard.blackboard import Blackboard
bb2 = Blackboard(rdb_path)
bb2.add_comment(_t_id, "daemon",
f"@{t_row['assignee']} 审查结论: {verdict_str},请查看详情并决定接受或反驳",
comment_type="review")
f"@{t_row['assignee']} 审查结论: {verdict_str},请查看详情并决定接受或反驳",
comment_type="review")
logger.info("Rebuttal: task %s still %s after rebuttal", _t_id, verdict_str)
except Exception:
logger.exception("Rebuttal on_complete failed for task %s", _t_id)
@@ -804,7 +805,7 @@ Parent Task ID: {parent_task.id}
# Agent 忙,不递增 retry_count,等下次 tick 自然重试
logger.info("Mention spawn skipped: %s busy, will retry next tick", agent_id)
except Exception:
except Exception as e:
logger.exception("Mention processing error for agent %s", agent_id)
for item in items:
try:
@@ -947,7 +948,7 @@ Parent Task ID: {parent_task.id}
# ------------------------------------------------------------------
async def _dispatch_pending(self, db_path: Path,
project_id: str) -> List[str]:
project_id: str) -> List[str]:
"""扫描 pending 任务并调度
v3.0: 两条路径
@@ -1241,7 +1242,7 @@ Parent Task ID: {parent_task.id}
return [aid for aid in all_agents if active.get(aid, 0) == 0]
async def _dispatch_reviews(self, db_path: Path,
project_id: str) -> List[str]:
project_id: str) -> List[str]:
"""扫描 review 状态任务,检查是否有产出,调度审查 Agent"""
# mail 任务不走 review 流程,直接跳过
if project_id == "_mail":
@@ -1343,7 +1344,7 @@ Parent Task ID: {parent_task.id}
)
reclaimed.append(task.id)
logger.warning("Escalated %s: no taker after %d broadcasts",
task.id, retry_count)
task.id, retry_count)
finally:
conn.close()
else:
@@ -1422,7 +1423,7 @@ Parent Task ID: {parent_task.id}
if ok:
reclaimed.append(task.id)
logger.info("Mail %s: ticker recheck found reply, marked done (%.1fm)",
task.id, elapsed)
task.id, elapsed)
finally:
conn.close()
continue
@@ -1439,7 +1440,7 @@ Parent Task ID: {parent_task.id}
if ok:
reclaimed.append(task.id)
logger.warning("Task %s timed out (working %.1fm > %.1fm)",
task.id, elapsed, timeout_minutes)
task.id, elapsed, timeout_minutes)
finally:
conn.close()
except (ValueError, TypeError):
@@ -1500,7 +1501,7 @@ Parent Task ID: {parent_task.id}
return True # 保守:查询失败假设有回复
def _check_recent_routing(self, db_path: Path, task_id: str,
action_type: str) -> bool:
action_type: str) -> bool:
"""检查最近 5 分钟内是否已 dispatch 过指定类型的路由(防重复)"""
try:
conn = get_connection(db_path)
@@ -1578,11 +1579,11 @@ Parent Task ID: {parent_task.id}
if recovery_report["total_recovered"] > 0:
logger.info("Startup recovery: %d tasks recovered across %d projects",
recovery_report["total_recovered"],
len(recovery_report["projects"]))
recovery_report["total_recovered"],
len(recovery_report["projects"]))
elif recovery_report["total_noop"] > 0:
logger.info("Startup recovery: %d tasks kept as-is (no recovery needed)",
recovery_report["total_noop"])
recovery_report["total_noop"])
else:
logger.info("Startup recovery: no non-terminal tasks found, clean start")
@@ -1628,7 +1629,7 @@ Parent Task ID: {parent_task.id}
return recovered, noop_count
def _determine_recovery_action(self, conn, task, status: str,
db_path: Path) -> Optional[str]:
db_path: Path) -> Optional[str]:
"""根据黑板线索决定恢复动作,返回 None 表示不需要干预"""
task_id = task["id"]