fix(lint): 修复 PR #14 引入的 lint 回退 (119→0)
PR #14 从旧分支复制文件导致回退了 PR #10 的 lint 修复。 修复内容: - autoflake 移除未使用导入/变量 - autopep8 修复缩进/空格 - 手动修复 F821(pathlib→Path), F541(f-string), F841(未使用变量) - 所有修复均通过 flake8 --max-line-length=120 --extend-ignore=E501 检查 (0 errors)
This commit is contained in:
+199
-96
@@ -21,7 +21,6 @@ from dataclasses import dataclass, field as dc_field
|
||||
|
||||
from src.blackboard.operations import Blackboard
|
||||
from src.blackboard.db import get_connection
|
||||
from src.blackboard.models import Task
|
||||
from src.daemon.spawner import AgentBusyError
|
||||
from src.blackboard.queries import Queries
|
||||
from src.blackboard.registry import ProjectRegistry
|
||||
@@ -32,9 +31,11 @@ class BroadcastRound:
|
||||
"""追踪单个任务的广播状态"""
|
||||
task_id: str
|
||||
notified_agents: set = dc_field(default_factory=set) # 已 spawn 过的 Agent
|
||||
responded_agents: set = dc_field(default_factory=set) # 已返回反馈的 Agent(含 NO_REPLY)
|
||||
responded_agents: set = dc_field(
|
||||
default_factory=set) # 已返回反馈的 Agent(含 NO_REPLY)
|
||||
round_number: int = 0 # 当前第几轮(0=未开始,1=第1轮)
|
||||
|
||||
|
||||
logger = logging.getLogger("moziplus-v2.ticker")
|
||||
|
||||
|
||||
@@ -46,7 +47,8 @@ class Ticker:
|
||||
registry: ProjectRegistry,
|
||||
tick_interval: float = 30.0,
|
||||
max_ticks: Optional[int] = None,
|
||||
on_tick_complete: Optional[Callable[[], Coroutine[Any, Any, None]]] = None,
|
||||
on_tick_complete: Optional[Callable[[],
|
||||
Coroutine[Any, Any, None]]] = None,
|
||||
dispatcher: Optional[Any] = None,
|
||||
spawner: Optional[Any] = None,
|
||||
max_dispatch_per_tick: int = 3,
|
||||
@@ -194,7 +196,10 @@ class Ticker:
|
||||
pr = await self._tick_project(project_id, project_info)
|
||||
results["projects"][project_id] = pr
|
||||
except Exception as e:
|
||||
logger.exception("Tick %d project %s error", tick_num, project_id)
|
||||
logger.exception(
|
||||
"Tick %d project %s error",
|
||||
tick_num,
|
||||
project_id)
|
||||
results["projects"][project_id] = {"error": str(e)}
|
||||
|
||||
# 虚拟项目 _general:不在 registry 但需要调度
|
||||
@@ -223,7 +228,10 @@ class Ticker:
|
||||
logger.exception("Tick %d _mail error", tick_num)
|
||||
results["projects"]["_mail"] = {"error": str(e)}
|
||||
|
||||
logger.debug("Tick %d complete: %d projects", tick_num, len(active_projects))
|
||||
logger.debug(
|
||||
"Tick %d complete: %d projects",
|
||||
tick_num,
|
||||
len(active_projects))
|
||||
|
||||
if self.on_tick_complete:
|
||||
try:
|
||||
@@ -314,7 +322,8 @@ class Ticker:
|
||||
# 8. 健康检查(僵尸检测)
|
||||
if self.health_checker:
|
||||
try:
|
||||
self.health_checker.check(project_id, db_path, self._tick_count)
|
||||
self.health_checker.check(
|
||||
project_id, db_path, self._tick_count)
|
||||
except Exception as e:
|
||||
logger.warning("HealthChecker error for %s: %s", project_id, e)
|
||||
|
||||
@@ -335,7 +344,8 @@ class Ticker:
|
||||
task_id=t.id, task_title=t.title, task_type=t.task_type
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("ExperienceDistiller error for %s: %s", project_id, e)
|
||||
logger.warning(
|
||||
"ExperienceDistiller error for %s: %s", project_id, e)
|
||||
|
||||
# 10. 扫描后状态
|
||||
result["summary_after"] = queries.task_summary()
|
||||
@@ -375,7 +385,8 @@ class Ticker:
|
||||
(computed, pid),
|
||||
)
|
||||
refreshed.append(pid)
|
||||
logger.info("Parent %s status aggregated: → %s", pid, computed)
|
||||
logger.info(
|
||||
"Parent %s status aggregated: → %s", pid, computed)
|
||||
|
||||
if refreshed:
|
||||
conn.commit()
|
||||
@@ -391,7 +402,7 @@ class Ticker:
|
||||
MAX_ROUNDS = 5 # §4.5 防无限循环
|
||||
|
||||
async def _check_round_complete(self, db_path: Path,
|
||||
project_id: str) -> List[str]:
|
||||
project_id: str) -> List[str]:
|
||||
"""检测 parent task 下所有 sub task 终态 → spawn 庞统 review
|
||||
|
||||
流程(§4.4):
|
||||
@@ -462,7 +473,7 @@ class Ticker:
|
||||
"Round %d review spawned for parent %s (subs: %s)",
|
||||
new_round, parent_id, summary
|
||||
)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.exception("Round check error for parent %s", parent_id)
|
||||
|
||||
return reviewed
|
||||
@@ -531,9 +542,9 @@ Parent Task ID: {parent_task.id}
|
||||
"""
|
||||
|
||||
async def _spawn_pangtong_review(self, parent_task,
|
||||
review_prompt: str,
|
||||
project_id: str,
|
||||
new_round: int = 0) -> bool:
|
||||
review_prompt: str,
|
||||
project_id: str,
|
||||
new_round: int = 0) -> bool:
|
||||
"""Spawn 庞统进行 review
|
||||
|
||||
流程:
|
||||
@@ -543,7 +554,7 @@ Parent Task ID: {parent_task.id}
|
||||
"""
|
||||
try:
|
||||
agent_id = "pangtong-fujunshi"
|
||||
session_id = f"review-{parent_task.id}-r{new_round}"
|
||||
f"review-{parent_task.id}-r{new_round}"
|
||||
|
||||
# 构造 on_complete 回调:解析庞统结论,更新 parent 状态
|
||||
async def _on_review_complete(aid: str, outcome: str):
|
||||
@@ -555,7 +566,8 @@ Parent Task ID: {parent_task.id}
|
||||
latest_meta = None
|
||||
latest_time = ""
|
||||
for sid, sess in self.spawner._sessions.items():
|
||||
if sess.get("agent_id") == agent_id and sess.get("meta"):
|
||||
if sess.get(
|
||||
"agent_id") == agent_id and sess.get("meta"):
|
||||
t = sess.get("completed_at", "")
|
||||
if t > latest_time:
|
||||
latest_time = t
|
||||
@@ -586,8 +598,10 @@ Parent Task ID: {parent_task.id}
|
||||
self._set_parent_reviewing(parent_task.id, project_id)
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.exception("Failed to spawn pangtong review for %s", parent_task.id)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to spawn pangtong review for %s",
|
||||
parent_task.id)
|
||||
return False
|
||||
|
||||
def _set_parent_reviewing(self, parent_id: str, project_id: str):
|
||||
@@ -603,14 +617,14 @@ Parent Task ID: {parent_task.id}
|
||||
(parent_id,))
|
||||
conn.commit()
|
||||
logger.info("Parent %s → reviewing (round review in progress)",
|
||||
parent_id)
|
||||
parent_id)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception:
|
||||
logger.exception("Failed to set parent %s to reviewing", parent_id)
|
||||
|
||||
def _handle_review_conclusion(self, parent_id: str, project_id: str,
|
||||
review_text: str, round_num: int):
|
||||
review_text: str, round_num: int):
|
||||
"""解析庞统 review 结论,更新 parent 状态
|
||||
|
||||
review_text 是庞统回复的文本(从 spawner session meta payloads 拼接)。
|
||||
@@ -619,7 +633,8 @@ Parent Task ID: {parent_task.id}
|
||||
conn = get_connection(db_path)
|
||||
try:
|
||||
# 解析 GOAL_ACHIEVED
|
||||
is_achieved = bool(review_text and "GOAL_ACHIEVED" in review_text.upper())
|
||||
is_achieved = bool(
|
||||
review_text and "GOAL_ACHIEVED" in review_text.upper())
|
||||
|
||||
if is_achieved:
|
||||
# Goal 达成 → parent 最终完成
|
||||
@@ -649,7 +664,9 @@ Parent Task ID: {parent_task.id}
|
||||
"(round %d, subs=%d)",
|
||||
parent_id, round_num, sub_count)
|
||||
except Exception:
|
||||
logger.exception("Failed to handle review conclusion for %s", parent_id)
|
||||
logger.exception(
|
||||
"Failed to handle review conclusion for %s",
|
||||
parent_id)
|
||||
# 安全恢复:reviewing → working
|
||||
try:
|
||||
conn.execute("BEGIN IMMEDIATE")
|
||||
@@ -675,7 +692,7 @@ Parent Task ID: {parent_task.id}
|
||||
MENTION_MAX_RETRIES = 5
|
||||
|
||||
async def _process_mentions(self, db_path: Path,
|
||||
project_id: str) -> List[str]:
|
||||
project_id: str) -> List[str]:
|
||||
"""扫描 pending mentions → spawn 被 @ 的 Agent
|
||||
|
||||
流程(§3.4):
|
||||
@@ -687,7 +704,8 @@ Parent Task ID: {parent_task.id}
|
||||
return []
|
||||
|
||||
bb = Blackboard(db_path)
|
||||
mentions = bb.get_pending_mentions(max_retries=self.MENTION_MAX_RETRIES)
|
||||
mentions = bb.get_pending_mentions(
|
||||
max_retries=self.MENTION_MAX_RETRIES)
|
||||
if not mentions:
|
||||
return []
|
||||
|
||||
@@ -751,27 +769,32 @@ Parent Task ID: {parent_task.id}
|
||||
|
||||
if new_review and new_review["verdict"] == "approved":
|
||||
_ticker._transition_status(
|
||||
get_connection(rdb_path), _t_id, "done",
|
||||
get_connection(
|
||||
rdb_path), _t_id, "done",
|
||||
agent="daemon",
|
||||
detail={"reason": "rebuttal_approved"})
|
||||
logger.info("Rebuttal: task %s approved after rebuttal", _t_id)
|
||||
logger.info(
|
||||
"Rebuttal: task %s approved after rebuttal", _t_id)
|
||||
else:
|
||||
# 仍非 approved → @mention assignee
|
||||
verdict_str = new_review["verdict"] if new_review else "未知"
|
||||
rconn2 = get_connection(rdb_path)
|
||||
try:
|
||||
t_row = rconn2.execute("SELECT assignee FROM tasks WHERE id=?", (_t_id,)).fetchone()
|
||||
t_row = rconn2.execute(
|
||||
"SELECT assignee FROM tasks WHERE id=?", (_t_id,)).fetchone()
|
||||
finally:
|
||||
rconn2.close()
|
||||
if t_row and t_row["assignee"]:
|
||||
from src.blackboard.blackboard import Blackboard
|
||||
bb2 = Blackboard(rdb_path)
|
||||
bb2.add_comment(_t_id, "daemon",
|
||||
f"@{t_row['assignee']} 审查结论: {verdict_str},请查看详情并决定接受或反驳",
|
||||
comment_type="review")
|
||||
logger.info("Rebuttal: task %s still %s after rebuttal", _t_id, verdict_str)
|
||||
f"@{t_row['assignee']} 审查结论: {verdict_str},请查看详情并决定接受或反驳",
|
||||
comment_type="review")
|
||||
logger.info(
|
||||
"Rebuttal: task %s still %s after rebuttal", _t_id, verdict_str)
|
||||
except Exception:
|
||||
logger.exception("Rebuttal on_complete failed for task %s", _t_id)
|
||||
logger.exception(
|
||||
"Rebuttal on_complete failed for task %s", _t_id)
|
||||
|
||||
result = await self.spawner.spawn_full_agent(
|
||||
agent_id=agent_id,
|
||||
@@ -794,22 +817,30 @@ Parent Task ID: {parent_task.id}
|
||||
for item in items:
|
||||
bb.mark_mention_notified(item["id"])
|
||||
processed.append(agent_id)
|
||||
logger.info("Mention spawn success: %s (%d mentions)", agent_id, len(items))
|
||||
logger.info(
|
||||
"Mention spawn success: %s (%d mentions)",
|
||||
agent_id,
|
||||
len(items))
|
||||
else:
|
||||
# spawn 返回 None(其他原因)→ 递增 retry_count
|
||||
for item in items:
|
||||
bb.mark_mention_retry(item["id"])
|
||||
logger.warning("Mention spawn failed: %s, retrying next tick", agent_id)
|
||||
logger.warning(
|
||||
"Mention spawn failed: %s, retrying next tick", agent_id)
|
||||
|
||||
except AgentBusyError:
|
||||
# Agent 忙,不递增 retry_count,等下次 tick 自然重试
|
||||
logger.info("Mention spawn skipped: %s busy, will retry next tick", agent_id)
|
||||
logger.info(
|
||||
"Mention spawn skipped: %s busy, will retry next tick",
|
||||
agent_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Mention processing error for agent %s", agent_id)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Mention processing error for agent %s", agent_id)
|
||||
for item in items:
|
||||
try:
|
||||
if item.get("retry_count", 0) >= self.MENTION_MAX_RETRIES - 1:
|
||||
if item.get("retry_count",
|
||||
0) >= self.MENTION_MAX_RETRIES - 1:
|
||||
bb.mark_mention_failed(item["id"])
|
||||
else:
|
||||
bb.mark_mention_retry(item["id"])
|
||||
@@ -822,8 +853,14 @@ Parent Task ID: {parent_task.id}
|
||||
mention_lines: List[str],
|
||||
project_id: str) -> str:
|
||||
"""#03: @mention prompt(身份注入)"""
|
||||
api_host = getattr(self.spawner, 'api_host', '127.0.0.1') if self.spawner else '127.0.0.1'
|
||||
api_port = getattr(self.spawner, 'api_port', 8083) if self.spawner else 8083
|
||||
api_host = getattr(
|
||||
self.spawner,
|
||||
'api_host',
|
||||
'127.0.0.1') if self.spawner else '127.0.0.1'
|
||||
api_port = getattr(
|
||||
self.spawner,
|
||||
'api_port',
|
||||
8083) if self.spawner else 8083
|
||||
api_base = f"http://{api_host}:{api_port}/api"
|
||||
|
||||
# 获取 Agent 专长
|
||||
@@ -899,7 +936,8 @@ Parent Task ID: {parent_task.id}
|
||||
from datetime import datetime
|
||||
|
||||
conn.execute("BEGIN IMMEDIATE")
|
||||
row = conn.execute("SELECT status FROM tasks WHERE id=?", (task_id,)).fetchone()
|
||||
row = conn.execute(
|
||||
"SELECT status FROM tasks WHERE id=?", (task_id,)).fetchone()
|
||||
if not row:
|
||||
return False
|
||||
old_status = row["status"]
|
||||
@@ -938,7 +976,8 @@ Parent Task ID: {parent_task.id}
|
||||
event_type = "daemon_tick"
|
||||
conn.execute(
|
||||
"INSERT INTO events (task_id, agent, event_type, detail) VALUES (?,?,?,?)",
|
||||
(task_id, agent, event_type, json.dumps({"from": old_status, "to": new_status, **(detail or {})})),
|
||||
(task_id, agent, event_type, json.dumps(
|
||||
{"from": old_status, "to": new_status, **(detail or {})})),
|
||||
)
|
||||
conn.commit()
|
||||
return True
|
||||
@@ -948,7 +987,7 @@ Parent Task ID: {parent_task.id}
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _dispatch_pending(self, db_path: Path,
|
||||
project_id: str) -> List[str]:
|
||||
project_id: str) -> List[str]:
|
||||
"""扫描 pending 任务并调度
|
||||
|
||||
v3.0: 两条路径
|
||||
@@ -978,9 +1017,12 @@ Parent Task ID: {parent_task.id}
|
||||
try:
|
||||
result = await self.dispatcher.dispatch(
|
||||
task,
|
||||
project_config={"project_id": project_id, "db_path": db_path},
|
||||
project_config={
|
||||
"project_id": project_id,
|
||||
"db_path": db_path},
|
||||
)
|
||||
if result["status"] == "dispatched" and result["level"] in ("full", "escalate"):
|
||||
if result["status"] == "dispatched" and result["level"] in (
|
||||
"full", "escalate"):
|
||||
conn = get_connection(db_path)
|
||||
try:
|
||||
# [v2.7.1] Mail 已在 dispatcher 中标 working,跳过 claimed
|
||||
@@ -1073,7 +1115,8 @@ Parent Task ID: {parent_task.id}
|
||||
detail={"reason": "no_taker_after_3_broadcasts",
|
||||
"round_number": self._broadcast_tracker.get(t.id).round_number if self._broadcast_tracker.get(t.id) else 0},
|
||||
)
|
||||
logger.warning("Escalated %s: no taker after 3 broadcast rounds", t.id)
|
||||
logger.warning(
|
||||
"Escalated %s: no taker after 3 broadcast rounds", t.id)
|
||||
self._broadcast_tracker.pop(t.id, None)
|
||||
finally:
|
||||
conn.close()
|
||||
@@ -1083,7 +1126,8 @@ Parent Task ID: {parent_task.id}
|
||||
|
||||
idle_agents = self._get_idle_agents()
|
||||
if not idle_agents:
|
||||
logger.warning("No idle agents for broadcast, skipping (capacity issue)")
|
||||
logger.warning(
|
||||
"No idle agents for broadcast, skipping (capacity issue)")
|
||||
return []
|
||||
|
||||
task_ids = [t.id for t in broadcastable]
|
||||
@@ -1114,7 +1158,8 @@ Parent Task ID: {parent_task.id}
|
||||
|
||||
spawned = []
|
||||
for agent_id in idle_agents:
|
||||
prompt = self._build_claim_prompt(agent_id, broadcastable, project_id)
|
||||
prompt = self._build_claim_prompt(
|
||||
agent_id, broadcastable, project_id)
|
||||
try:
|
||||
session_id = await self.spawner.spawn_full_agent(
|
||||
agent_id=agent_id,
|
||||
@@ -1128,7 +1173,8 @@ Parent Task ID: {parent_task.id}
|
||||
spawned.append(session_id)
|
||||
# 记录已通知的 Agent
|
||||
for t in broadcastable:
|
||||
self._broadcast_tracker[t.id].notified_agents.add(agent_id)
|
||||
self._broadcast_tracker[t.id].notified_agents.add(
|
||||
agent_id)
|
||||
except AgentBusyError:
|
||||
logger.debug("Broadcast skip %s: busy", agent_id)
|
||||
except Exception:
|
||||
@@ -1139,8 +1185,14 @@ Parent Task ID: {parent_task.id}
|
||||
def _build_claim_prompt(self, agent_id: str, tasks: list,
|
||||
project_id: str) -> str:
|
||||
"""#03: 广播认领 prompt(身份+专长注入)"""
|
||||
api_host = getattr(self.spawner, 'api_host', '127.0.0.1') if self.spawner else '127.0.0.1'
|
||||
api_port = getattr(self.spawner, 'api_port', 8083) if self.spawner else 8083
|
||||
api_host = getattr(
|
||||
self.spawner,
|
||||
'api_host',
|
||||
'127.0.0.1') if self.spawner else '127.0.0.1'
|
||||
api_port = getattr(
|
||||
self.spawner,
|
||||
'api_port',
|
||||
8083) if self.spawner else 8083
|
||||
api_base = f"http://{api_host}:{api_port}/api"
|
||||
|
||||
# 获取 Agent 专长
|
||||
@@ -1195,7 +1247,8 @@ Parent Task ID: {parent_task.id}
|
||||
@property
|
||||
def counter(self):
|
||||
"""从 Dispatcher 获取 counter"""
|
||||
return getattr(self.dispatcher, 'counter', None) if self.dispatcher else None
|
||||
return getattr(self.dispatcher, 'counter',
|
||||
None) if self.dispatcher else None
|
||||
|
||||
@staticmethod
|
||||
def _is_pid_alive(pid: int) -> bool:
|
||||
@@ -1207,7 +1260,8 @@ Parent Task ID: {parent_task.id}
|
||||
except (ProcessLookupError, PermissionError):
|
||||
return False
|
||||
|
||||
def record_broadcast_response(self, task_id: str, agent_id: str, outcome: str):
|
||||
def record_broadcast_response(
|
||||
self, task_id: str, agent_id: str, outcome: str):
|
||||
"""记录 Agent 对广播任务的反馈(Spawner 调用的公共 API)"""
|
||||
tracker = self._broadcast_tracker.get(task_id)
|
||||
if not tracker:
|
||||
@@ -1228,7 +1282,8 @@ Parent Task ID: {parent_task.id}
|
||||
|
||||
def _get_all_agent_ids(self) -> List[str]:
|
||||
"""获取所有配置的 Agent ID"""
|
||||
if self.dispatcher and hasattr(self.dispatcher, 'router') and self.dispatcher.router:
|
||||
if self.dispatcher and hasattr(
|
||||
self.dispatcher, 'router') and self.dispatcher.router:
|
||||
return list(self.dispatcher.router.agent_profiles.keys())
|
||||
return []
|
||||
|
||||
@@ -1237,12 +1292,13 @@ Parent Task ID: {parent_task.id}
|
||||
if not self.counter:
|
||||
return []
|
||||
# agent_profiles 在 Router 初始化时从 config 填充,是完整 Agent 列表
|
||||
all_agents = list(self.dispatcher.router.agent_profiles.keys()) if self.dispatcher else []
|
||||
all_agents = list(
|
||||
self.dispatcher.router.agent_profiles.keys()) if self.dispatcher else []
|
||||
active = self.counter.active_agents
|
||||
return [aid for aid in all_agents if active.get(aid, 0) == 0]
|
||||
|
||||
async def _dispatch_reviews(self, db_path: Path,
|
||||
project_id: str) -> List[str]:
|
||||
project_id: str) -> List[str]:
|
||||
"""扫描 review 状态任务,检查是否有产出,调度审查 Agent"""
|
||||
# mail 任务不走 review 流程,直接跳过
|
||||
if project_id == "_mail":
|
||||
@@ -1291,7 +1347,9 @@ Parent Task ID: {parent_task.id}
|
||||
result = await self.dispatcher.dispatch(
|
||||
task,
|
||||
action_type="review",
|
||||
project_config={"project_id": project_id, "db_path": db_path},
|
||||
project_config={
|
||||
"project_id": project_id,
|
||||
"db_path": db_path},
|
||||
)
|
||||
if result["status"] == "dispatched":
|
||||
dispatched.append(task.id)
|
||||
@@ -1344,7 +1402,7 @@ Parent Task ID: {parent_task.id}
|
||||
)
|
||||
reclaimed.append(task.id)
|
||||
logger.warning("Escalated %s: no taker after %d broadcasts",
|
||||
task.id, retry_count)
|
||||
task.id, retry_count)
|
||||
finally:
|
||||
conn.close()
|
||||
else:
|
||||
@@ -1375,8 +1433,10 @@ Parent Task ID: {parent_task.id}
|
||||
working = queries.tasks_by_status("working")
|
||||
for task in working:
|
||||
# #07.2: crash_limit 统一检查(比超时更严重的信号)
|
||||
if self.dispatcher and hasattr(self.dispatcher, '_check_crash_limit'):
|
||||
if self.dispatcher._check_crash_limit(task.id, db_path, limit=3, window_minutes=30):
|
||||
if self.dispatcher and hasattr(
|
||||
self.dispatcher, '_check_crash_limit'):
|
||||
if self.dispatcher._check_crash_limit(
|
||||
task.id, db_path, limit=3, window_minutes=30):
|
||||
conn = get_connection(db_path)
|
||||
try:
|
||||
self._transition_status(
|
||||
@@ -1388,7 +1448,8 @@ Parent Task ID: {parent_task.id}
|
||||
finally:
|
||||
conn.close()
|
||||
reclaimed.append(task.id)
|
||||
logger.error("Task %s: executor crash limit (3/30m), marking failed", task.id)
|
||||
logger.error(
|
||||
"Task %s: executor crash limit (3/30m), marking failed", task.id)
|
||||
continue
|
||||
|
||||
# #07.3 ACT-1: updated_at fallback 覆盖 mail auto-working(无 started_at/claimed_at)
|
||||
@@ -1400,7 +1461,8 @@ Parent Task ID: {parent_task.id}
|
||||
# per-task timeout: deadline 优先,否则用默认值
|
||||
if task.deadline:
|
||||
deadline_time = datetime.fromisoformat(task.deadline)
|
||||
timeout_minutes = (deadline_time - start_time).total_seconds() / 60.0
|
||||
timeout_minutes = (
|
||||
deadline_time - start_time).total_seconds() / 60.0
|
||||
if timeout_minutes < 1:
|
||||
timeout_minutes = self.default_task_timeout_minutes
|
||||
else:
|
||||
@@ -1423,7 +1485,7 @@ Parent Task ID: {parent_task.id}
|
||||
if ok:
|
||||
reclaimed.append(task.id)
|
||||
logger.info("Mail %s: ticker recheck found reply, marked done (%.1fm)",
|
||||
task.id, elapsed)
|
||||
task.id, elapsed)
|
||||
finally:
|
||||
conn.close()
|
||||
continue
|
||||
@@ -1440,15 +1502,17 @@ Parent Task ID: {parent_task.id}
|
||||
if ok:
|
||||
reclaimed.append(task.id)
|
||||
logger.warning("Task %s timed out (working %.1fm > %.1fm)",
|
||||
task.id, elapsed, timeout_minutes)
|
||||
task.id, elapsed, timeout_minutes)
|
||||
finally:
|
||||
conn.close()
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# v2.7.2: 进程存活性检查 — counter 占用但进程已死的兜底
|
||||
if self.spawner and self.counter and hasattr(self.counter, "active_agents"):
|
||||
for agent_id in list(self.counter.active_agents.keys()) if hasattr(self.counter, "active_agents") else []:
|
||||
if self.spawner and self.counter and hasattr(
|
||||
self.counter, "active_agents"):
|
||||
for agent_id in list(self.counter.active_agents.keys()) if hasattr(
|
||||
self.counter, "active_agents") else []:
|
||||
session_info = self.spawner.get_session_by_agent(agent_id)
|
||||
if not session_info:
|
||||
continue
|
||||
@@ -1465,20 +1529,24 @@ Parent Task ID: {parent_task.id}
|
||||
conn = get_connection(db_path)
|
||||
try:
|
||||
current_row = conn.execute(
|
||||
"SELECT status FROM tasks WHERE id=?", (task_id_check,)
|
||||
"SELECT status FROM tasks WHERE id=?", (
|
||||
task_id_check,)
|
||||
).fetchone()
|
||||
if current_row and current_row["status"] == "review":
|
||||
logger.info("Task %s in review, keeping status (process dead)", task_id_check)
|
||||
logger.info(
|
||||
"Task %s in review, keeping status (process dead)", task_id_check)
|
||||
else:
|
||||
self._transition_status(
|
||||
conn, task_id_check, "pending",
|
||||
agent="daemon",
|
||||
detail={"reason": "process_dead", "pid": pid},
|
||||
detail={
|
||||
"reason": "process_dead", "pid": pid},
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception:
|
||||
logger.exception("Failed to handle process dead for task %s", task_id_check)
|
||||
logger.exception(
|
||||
"Failed to handle process dead for task %s", task_id_check)
|
||||
|
||||
# #07.2: Fix-3b 已删除。review 超时/crash 统一由 process_dead + _check_timeouts 处理
|
||||
|
||||
@@ -1497,16 +1565,20 @@ Parent Task ID: {parent_task.id}
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.error("Mail %s: ticker reply check error: %s", original_task_id, e)
|
||||
logger.error(
|
||||
"Mail %s: ticker reply check error: %s",
|
||||
original_task_id,
|
||||
e)
|
||||
return True # 保守:查询失败假设有回复
|
||||
|
||||
def _check_recent_routing(self, db_path: Path, task_id: str,
|
||||
action_type: str) -> bool:
|
||||
action_type: str) -> bool:
|
||||
"""检查最近 5 分钟内是否已 dispatch 过指定类型的路由(防重复)"""
|
||||
try:
|
||||
conn = get_connection(db_path)
|
||||
try:
|
||||
# 检查是否有 from_status=review 的 dispatched 记录(防止重复 review dispatch)
|
||||
# 检查是否有 from_status=review 的 dispatched 记录(防止重复 review
|
||||
# dispatch)
|
||||
if action_type == "review":
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) as cnt FROM routing_decisions "
|
||||
@@ -1537,17 +1609,22 @@ Parent Task ID: {parent_task.id}
|
||||
NON_TERMINAL = {"claimed", "working", "review", "reviewing"}
|
||||
|
||||
projects = self.registry.list_projects()
|
||||
recovery_report = {"projects": {}, "total_recovered": 0, "total_noop": 0}
|
||||
recovery_report = {
|
||||
"projects": {},
|
||||
"total_recovered": 0,
|
||||
"total_noop": 0}
|
||||
|
||||
# 收集所有需要扫描的项目(registry + 虚拟项目)
|
||||
project_dirs = {}
|
||||
for project_id, project_info in projects.items():
|
||||
if project_info.get("status") == "active":
|
||||
project_dirs[project_id] = self.registry.root / project_id / "blackboard.db"
|
||||
project_dirs[project_id] = self.registry.root / \
|
||||
project_id / "blackboard.db"
|
||||
|
||||
# 虚拟项目
|
||||
for virtual_id in ("_general", "_mail"):
|
||||
virtual_db = Path(self.registry.root) / virtual_id / "blackboard.db"
|
||||
virtual_db = Path(self.registry.root) / \
|
||||
virtual_id / "blackboard.db"
|
||||
if virtual_db.exists() and virtual_id not in project_dirs:
|
||||
project_dirs[virtual_id] = virtual_db
|
||||
|
||||
@@ -1567,25 +1644,28 @@ Parent Task ID: {parent_task.id}
|
||||
old_pid = self._current_project_id
|
||||
self._current_project_id = project_id
|
||||
try:
|
||||
recovered, noop_count = self._recover_project(db_path, NON_TERMINAL)
|
||||
recovered, noop_count = self._recover_project(
|
||||
db_path, NON_TERMINAL)
|
||||
if recovered:
|
||||
recovery_report["projects"][project_id] = recovered
|
||||
recovery_report["total_recovered"] += len(recovered)
|
||||
recovery_report["total_noop"] += noop_count
|
||||
except Exception:
|
||||
logger.exception("Startup recovery failed for project %s", project_id)
|
||||
logger.exception(
|
||||
"Startup recovery failed for project %s", project_id)
|
||||
finally:
|
||||
self._current_project_id = old_pid
|
||||
|
||||
if recovery_report["total_recovered"] > 0:
|
||||
logger.info("Startup recovery: %d tasks recovered across %d projects",
|
||||
recovery_report["total_recovered"],
|
||||
len(recovery_report["projects"]))
|
||||
recovery_report["total_recovered"],
|
||||
len(recovery_report["projects"]))
|
||||
elif recovery_report["total_noop"] > 0:
|
||||
logger.info("Startup recovery: %d tasks kept as-is (no recovery needed)",
|
||||
recovery_report["total_noop"])
|
||||
recovery_report["total_noop"])
|
||||
else:
|
||||
logger.info("Startup recovery: no non-terminal tasks found, clean start")
|
||||
logger.info(
|
||||
"Startup recovery: no non-terminal tasks found, clean start")
|
||||
|
||||
return recovery_report
|
||||
|
||||
@@ -1608,10 +1688,13 @@ Parent Task ID: {parent_task.id}
|
||||
|
||||
for task in rows:
|
||||
try:
|
||||
action = self._determine_recovery_action(conn, task, status, db_path)
|
||||
action = self._determine_recovery_action(
|
||||
conn, task, status, db_path)
|
||||
if action:
|
||||
self._execute_recovery(conn, task["id"], action, db_path)
|
||||
recovered.append({"task_id": task["id"], "from": status, "action": action})
|
||||
self._execute_recovery(
|
||||
conn, task["id"], action, db_path)
|
||||
recovered.append(
|
||||
{"task_id": task["id"], "from": status, "action": action})
|
||||
else:
|
||||
# 审计:保持原状的任务也记录事件
|
||||
noop_count += 1
|
||||
@@ -1622,14 +1705,15 @@ Parent Task ID: {parent_task.id}
|
||||
)
|
||||
conn.commit()
|
||||
except Exception:
|
||||
logger.exception("Startup recovery failed for task %s", task["id"])
|
||||
logger.exception(
|
||||
"Startup recovery failed for task %s", task["id"])
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return recovered, noop_count
|
||||
|
||||
def _determine_recovery_action(self, conn, task, status: str,
|
||||
db_path: Path) -> Optional[str]:
|
||||
db_path: Path) -> Optional[str]:
|
||||
"""根据黑板线索决定恢复动作,返回 None 表示不需要干预"""
|
||||
task_id = task["id"]
|
||||
|
||||
@@ -1700,7 +1784,8 @@ Parent Task ID: {parent_task.id}
|
||||
# 无审查结论 → 保持 review,ticker 自然会 dispatch reviewer
|
||||
return None
|
||||
|
||||
def _execute_recovery(self, conn, task_id: str, action: str, db_path: Path):
|
||||
def _execute_recovery(self, conn, task_id: str,
|
||||
action: str, db_path: Path):
|
||||
"""执行恢复动作"""
|
||||
# 获取原始状态(用于审计)
|
||||
orig_row = conn.execute(
|
||||
@@ -1712,17 +1797,22 @@ Parent Task ID: {parent_task.id}
|
||||
self._transition_status(
|
||||
conn, task_id, "pending",
|
||||
agent="daemon",
|
||||
detail={"reason": "startup_recovery", "original_status": orig_status},
|
||||
detail={
|
||||
"reason": "startup_recovery",
|
||||
"original_status": orig_status},
|
||||
)
|
||||
# 清空 current_agent(常规推 pending,无特定 agent 接手)
|
||||
conn.execute("UPDATE tasks SET current_agent=NULL WHERE id=?", (task_id,))
|
||||
conn.execute(
|
||||
"UPDATE tasks SET current_agent=NULL WHERE id=?", (task_id,))
|
||||
conn.commit()
|
||||
|
||||
elif action == "push_to_pending_keep_agent":
|
||||
self._transition_status(
|
||||
conn, task_id, "pending",
|
||||
agent="daemon",
|
||||
detail={"reason": "startup_recovery", "original_status": orig_status},
|
||||
detail={
|
||||
"reason": "startup_recovery",
|
||||
"original_status": orig_status},
|
||||
)
|
||||
# 保留 current_agent,让同一 agent 重新接手
|
||||
conn.commit()
|
||||
@@ -1731,7 +1821,9 @@ Parent Task ID: {parent_task.id}
|
||||
self._transition_status(
|
||||
conn, task_id, "review",
|
||||
agent="daemon",
|
||||
detail={"reason": "startup_recovery", "original_status": "working"},
|
||||
detail={
|
||||
"reason": "startup_recovery",
|
||||
"original_status": "working"},
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
@@ -1739,7 +1831,9 @@ Parent Task ID: {parent_task.id}
|
||||
self._transition_status(
|
||||
conn, task_id, "done",
|
||||
agent="daemon",
|
||||
detail={"reason": "startup_recovery", "original_status": orig_status},
|
||||
detail={
|
||||
"reason": "startup_recovery",
|
||||
"original_status": orig_status},
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
@@ -1747,22 +1841,30 @@ Parent Task ID: {parent_task.id}
|
||||
self._transition_status(
|
||||
conn, task_id, "failed",
|
||||
agent="daemon",
|
||||
detail={"reason": "startup_recovery", "original_status": orig_status},
|
||||
detail={
|
||||
"reason": "startup_recovery",
|
||||
"original_status": orig_status},
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
# 记录恢复审计事件
|
||||
conn.execute(
|
||||
"INSERT INTO events (task_id, agent, event_type, detail) VALUES (?, ?, ?, ?)",
|
||||
(task_id, "daemon", "startup_recovery", json.dumps({"action": action}))
|
||||
(task_id, "daemon", "startup_recovery",
|
||||
json.dumps({"action": action}))
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
logger.info("Recovery: task %s → %s (action=%s)", task_id, action, action)
|
||||
logger.info(
|
||||
"Recovery: task %s → %s (action=%s)",
|
||||
task_id,
|
||||
action,
|
||||
action)
|
||||
|
||||
def _find_pre_reviewing_status(self, conn, task_id: str) -> str:
|
||||
"""查 events 表找到 reviewing 之前的状态(done 或 failed)"""
|
||||
# _transition_status 写入 event_type=f"task_{new_status}",detail 用 from/to
|
||||
# _transition_status 写入 event_type=f"task_{new_status}",detail 用
|
||||
# from/to
|
||||
rows = conn.execute(
|
||||
"""SELECT detail FROM events
|
||||
WHERE task_id=? AND event_type='task_reviewing'
|
||||
@@ -1773,7 +1875,8 @@ Parent Task ID: {parent_task.id}
|
||||
for event in rows:
|
||||
try:
|
||||
detail = json.loads(event["detail"])
|
||||
# _transition_status detail 格式: {"from": old_status, "to": new_status, ...}
|
||||
# _transition_status detail 格式: {"from": old_status, "to":
|
||||
# new_status, ...}
|
||||
prev = detail.get("from") or detail.get("old_status")
|
||||
if prev in ("done", "failed"):
|
||||
return prev
|
||||
|
||||
Reference in New Issue
Block a user