fix(lint): 修复 PR #14 引入的 lint 回退 (119→0)
CI / lint (pull_request) Successful in 6s
CI / test (pull_request) Successful in 9s
CI / notify-on-failure (pull_request) Successful in 0s

PR #14 从旧分支复制文件导致回退了 PR #10 的 lint 修复。
修复内容:
- autoflake 移除未使用导入/变量
- autopep8 修复缩进/空格
- 手动修复 F821(pathlib→Path), F541(f-string), F841(未使用变量)
- 所有修复均通过 flake8 --max-line-length=120 --extend-ignore=E501 检查 (0 errors)
This commit is contained in:
cfdaily
2026-06-09 23:53:29 +08:00
parent 7184079a75
commit d58e38d58f
27 changed files with 863 additions and 417 deletions
+168 -75
View File
@@ -15,7 +15,7 @@ from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
from src.blackboard.db import get_connection, init_db
from src.blackboard.db import get_connection
logger = logging.getLogger("moziplus-v2.spawner")
@@ -163,9 +163,12 @@ class AgentBusyError(Exception):
#07: reason 字段区分具体原因,便于 dispatcher 层区分处理。
"""
def __init__(self, agent_id: str, reason: str = "busy", detail: Optional[dict] = None):
def __init__(self, agent_id: str, reason: str = "busy",
detail: Optional[dict] = None):
self.agent_id = agent_id
self.reason = reason # counter_blocked / session_locked / session_running / session_compacting / session_stuck
# counter_blocked / session_locked / session_running / session_compacting / session_stuck
self.reason = reason
self.detail = detail or {}
super().__init__(f"{agent_id}: {reason}")
@@ -277,11 +280,15 @@ class AgentSpawner:
# mail 任务用精简模板
if project_id == "_mail":
return self._build_mail_prompt(task_id, title, description, must_haves, agent_id)
return self._build_mail_prompt(
task_id, title, description, must_haves, agent_id)
# 走 BootstrapBuilder 新路径
if self.bootstrap_builder and task is not None:
role_map = {"executor": "executor", "review": "reviewer", "discussion": "planner"}
role_map = {
"executor": "executor",
"review": "reviewer",
"discussion": "planner"}
role = role_map.get(spawn_type, "executor")
bootstrap_prompt = self.bootstrap_builder.build_for_task(
task=task,
@@ -293,13 +300,14 @@ class AgentSpawner:
# 无 BootstrapBuilder 或无 task 对象 → 最小 fallback
# 只保留任务上下文 + API 操作指令
logger.warning("No BootstrapBuilder or task object, using minimal fallback")
logger.warning(
"No BootstrapBuilder or task object, using minimal fallback")
return self._build_minimal_fallback(
task_id, title, description, must_haves,
project_id, agent_id)
def _build_minimal_fallback(self, task_id, title, description, must_haves,
project_id, agent_id):
project_id, agent_id):
"""最小 fallback:只有任务上下文 + API 指令"""
task_section = f"""## 任务
{title}
@@ -311,7 +319,7 @@ class AgentSpawner:
return task_section + "\n\n---\n\n" + api_section
def _build_api_section(self, project_id: str, task_id: str,
agent_id: str) -> str:
agent_id: str) -> str:
"""构建 API 回写操作指令(BootstrapBuilder 模式下补充)"""
# mail 任务直接 done,不走 review
success_status = '"done"' if project_id == "_mail" else '"review"'
@@ -337,8 +345,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
"""
def _build_discussion_prompt(self, task_id: str, title: str,
description: str, must_haves: str,
project_id: str, agent_id: str) -> str:
description: str, must_haves: str,
project_id: str, agent_id: str) -> str:
"""构建讨论类 spawn prompt(§3.3 框架 + Boids)"""
goal_snapshot = description or title
constraints = must_haves or "(无特殊约束)"
@@ -368,7 +376,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
if not self.guardrails:
return "无特殊限制"
try:
return "".join(r.get("name", r.get("rule_id", "")) for r in self.guardrails.rules[:6])
return "".join(r.get("name", r.get("rule_id", ""))
for r in self.guardrails.rules[:6])
except Exception:
return "无特殊限制"
@@ -379,9 +388,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
return router.agent_profiles.get(agent_id)
return None
def _build_mail_prompt(self, task_id: str, title: str, description: str,
must_haves: str, agent_id: str) -> str:
must_haves: str, agent_id: str) -> str:
"""构建 Mail 专用精简模板"""
# 解析 must_haves 获取 from 和 performative
from_agent = agent_id
@@ -389,7 +397,9 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
try:
meta = json.loads(must_haves) if must_haves else {}
from_agent = meta.get("from", agent_id)
performative = meta.get("performative", meta.get("type", "request"))
performative = meta.get(
"performative", meta.get(
"type", "request"))
except Exception:
pass
@@ -472,7 +482,9 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
self._revive_session(agent_id)
elif pre_state.get("status") == "running" and not pre_state.get("lock_pid_alive"):
# status=running 但 lock PID 已死 → 假死,revive
logger.warning("Phase 0: %s status=running but lock PID dead, reviving", agent_id)
logger.warning(
"Phase 0: %s status=running but lock PID dead, reviving",
agent_id)
self._revive_session(agent_id)
# Phase 1: Counter acquire(互斥锁)
@@ -487,12 +499,15 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
if use_main_session:
session_state = self._check_session_state(agent_id)
logger.info("Phase 2 session check for %s: status=%s lock_pid=%s lock_pid_alive=%s compact=%s",
agent_id, session_state.get('status'), session_state.get('lock_pid'),
agent_id, session_state.get(
'status'), session_state.get('lock_pid'),
session_state.get('lock_pid_alive'), session_state.get('recent_compact'))
blockers = []
if session_state.get("lock_pid_alive") and not session_state.get("lock_expired"):
blockers.append(("session_locked", session_state.get("lock_pid")))
if session_state.get(
"lock_pid_alive") and not session_state.get("lock_expired"):
blockers.append(
("session_locked", session_state.get("lock_pid")))
if session_state.get("status") == "running":
if session_state.get("lock_pid_alive"):
# 真 running:外部进程占用
@@ -515,7 +530,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
# Phase 2.5: 假死修复(status=running + lock PID 死 → revive → 重检)
# 此场景应被 Phase 0 提前修复,这里做兜底
if session_state.get("status") == "running" and not session_state.get("lock_pid_alive"):
if session_state.get("status") == "running" and not session_state.get(
"lock_pid_alive"):
logger.warning("Phase 2.5: %s status=running + lock dead (should be caught in Phase 0), reviving",
agent_id)
self._revive_session(agent_id)
@@ -538,7 +554,10 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
raise
if self.dry_run:
logger.info("[DRY RUN] Would spawn agent %s (session=%s)", agent_id, _sid_key)
logger.info(
"[DRY RUN] Would spawn agent %s (session=%s)",
agent_id,
_sid_key)
self._register_session(_sid_key, agent_id, task_id, pid=None)
return _sid_key
@@ -554,7 +573,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
if asyncio.iscoroutine(result):
await result
except Exception:
logger.warning("Business on_complete failed for %s", aid, exc_info=True)
logger.warning(
"Business on_complete failed for %s", aid, exc_info=True)
cmd = [
"openclaw", "agent",
@@ -575,7 +595,7 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
stderr=asyncio.subprocess.PIPE,
)
self._register_session(session_id, agent_id, task_id, proc.pid,
broadcast_task_ids=broadcast_task_ids)
broadcast_task_ids=broadcast_task_ids)
logger.info("Spawned agent %s (session=%s, pid=%d)",
agent_id, session_id, proc.pid)
@@ -593,7 +613,11 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
if self.counter:
self.counter.release(agent_id, _sid_key)
logger.exception("Failed to spawn agent %s", agent_id)
self._record_attempt(task_id, agent_id, "spawn_failed", error=str(e))
self._record_attempt(
task_id,
agent_id,
"spawn_failed",
error=str(e))
raise
async def spawn_subagent(
@@ -609,7 +633,9 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
session_id = str(uuid.uuid4())
if self.dry_run:
logger.info("[DRY RUN] Would spawn subagent (session=%s)", session_id)
logger.info(
"[DRY RUN] Would spawn subagent (session=%s)",
session_id)
self._register_session(session_id, "subagent", task_id, pid=None)
return session_id
@@ -729,10 +755,16 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
agent_id, session_id, json_result)
# 查任务实际状态
task_status = self._get_task_status(db_path, task_id) if task_id else None
task_status = self._get_task_status(
db_path, task_id) if task_id else None
# 分类
cls = self._classify_outcome(exit_code, json_result, stderr_text, task_status, stdout_text)
cls = self._classify_outcome(
exit_code,
json_result,
stderr_text,
task_status,
stdout_text)
outcome = cls["outcome"]
# 更新 session 状态
@@ -761,17 +793,21 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
agent_id, session_id, outcome, exit_code, task_status)
# 广播反馈追踪(Phase 1 bug fix)
if task_id == "broadcast" and hasattr(self, '_ticker') and self._ticker:
if task_id == "broadcast" and hasattr(
self, '_ticker') and self._ticker:
# 广播任务:从 session 信息取真实 task_id 列表,逐一回调 tracker
sess_info = self._sessions.get(session_id or "main", {})
bt_ids = sess_info.get("broadcast_task_ids") or []
# 广播场景一律标 no_reply:Agent 只 claim 一个任务,
# 其余任务的 tracker 不能被 claimed 清除
for real_task_id in bt_ids:
self._ticker.record_broadcast_response(real_task_id, agent_id, "no_reply")
self._ticker.record_broadcast_response(
real_task_id, agent_id, "no_reply")
elif task_id and hasattr(self, '_ticker') and self._ticker:
outcome_str = "claimed" if cls.get("status") == "ok" else "no_reply"
self._ticker.record_broadcast_response(task_id, agent_id, outcome_str)
outcome_str = "claimed" if cls.get(
"status") == "ok" else "no_reply"
self._ticker.record_broadcast_response(
task_id, agent_id, outcome_str)
if cls["should_retry"]:
# cooldown: 新增的可恢复场景(A14/A15/A16/A8/A10)
@@ -850,14 +886,24 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
# v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间
if outcome == "crashed" and self.counter:
self.counter.set_cooldown(agent_id, seconds=60)
logger.info("Crash cooldown set for %s: 60s (outcome=%s)", agent_id, outcome)
logger.info(
"Crash cooldown set for %s: 60s (outcome=%s)",
agent_id,
outcome)
elif outcome in ("compact_failed", "process_crash", "session_stuck",
"compact_hanging", "agent_error", "compact_interrupted") and self.counter:
"compact_hanging", "agent_error", "compact_interrupted") and self.counter:
self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟
logger.info("Error cooldown set for %s: 300s (outcome=%s)", agent_id, outcome)
logger.info(
"Error cooldown set for %s: 300s (outcome=%s)",
agent_id,
outcome)
# F1: 不可恢复 outcome → 立刻标 failed + 写黑板
if outcome in ("auth_failed", "agent_error") and db_path and task_id:
logger.error("Task %s: unrecoverable outcome=%s, marking failed immediately", task_id, outcome)
if outcome in ("auth_failed",
"agent_error") and db_path and task_id:
logger.error(
"Task %s: unrecoverable outcome=%s, marking failed immediately",
task_id,
outcome)
self._mark_task(db_path, task_id, "failed", {
"reason": outcome,
"stderr_preview": (stderr_text or "")[:500],
@@ -881,13 +927,16 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
except Exception:
pass
stderr_text = b"".join(stderr_chunks).decode("utf-8", errors="replace")
# stderr collected but not used in this handler
# (kept for potential future diagnostics)
b"".join(stderr_chunks).decode("utf-8", errors="replace")
# 检查 session 状态
state = self._check_session_state(agent_id)
# B1: 假死 - 先复活,连续假死 ≥2 次再 failed
if state.get("status") == "running" and not state.get("lock_pid_alive", True):
if state.get("status") == "running" and not state.get(
"lock_pid_alive", True):
# 假死计数
stuck_count = self._stuck_counts.get(task_id, 0) + 1
self._stuck_counts[task_id] = stuck_count
@@ -913,7 +962,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
await self._do_on_complete_async(on_complete, agent_id, "session_revived")
else:
# 复活失败 → 标 failed
logger.error("Agent %s revive failed, marking failed", agent_id)
logger.error(
"Agent %s revive failed, marking failed", agent_id)
self._mark_task(db_path, task_id, "failed",
{"reason": "revive_failed", "stuck_count": stuck_count,
"diagnostics": state})
@@ -994,7 +1044,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
"SELECT status FROM tasks WHERE id=?", (task_id,)
).fetchone()
# Bug-6 fix: pending 不是终态
if row and row["status"] in ("done", "failed", "cancelled", "review"):
if row and row["status"] in (
"done", "failed", "cancelled", "review"):
logger.info("Retry skip: task %s already %s (agent=%s)",
task_id, row["status"], agent_id)
# on_complete = wrapped_on_complete,会 release counter
@@ -1003,7 +1054,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
finally:
conn.close()
except Exception:
logger.warning("Retry status check failed for %s, proceeding", task_id)
logger.warning(
"Retry status check failed for %s, proceeding", task_id)
# 直接读写 tasks 表的 retry_count
if retry_field == "retry_count" and db_path and task_id:
@@ -1023,7 +1075,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
finally:
conn.close()
except Exception:
logger.exception("Failed to update retry_count for task %s", task_id)
logger.exception(
"Failed to update retry_count for task %s", task_id)
count = 1
else:
retry_counts = self._get_retry_counts(db_path, task_id)
@@ -1107,7 +1160,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
"""
text = stdout_text.strip()
if not text:
return {"status": None, "summary": None, "fallback_used": False, "fallback_reason": None, "payloads": []}
return {"status": None, "summary": None, "fallback_used": False,
"fallback_reason": None, "payloads": []}
try:
data = json.loads(text)
except json.JSONDecodeError:
@@ -1119,7 +1173,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
except json.JSONDecodeError:
continue
else:
return {"status": None, "summary": None, "fallback_used": False, "fallback_reason": None, "payloads": []}
return {"status": None, "summary": None, "fallback_used": False,
"fallback_reason": None, "payloads": []}
# 从 data.result.meta.executionTrace 取 fallback 信息
result = data.get("result", {})
@@ -1135,7 +1190,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
}
@staticmethod
def _get_task_status(db_path: Optional[Path], task_id: Optional[str]) -> Optional[str]:
def _get_task_status(
db_path: Optional[Path], task_id: Optional[str]) -> Optional[str]:
"""查任务实际 API 状态"""
if not db_path or not task_id:
return None
@@ -1152,7 +1208,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
return None
@staticmethod
def _get_task_info(db_path: Optional[Path], task_id: Optional[str]) -> Optional[dict]:
def _get_task_info(db_path: Optional[Path],
task_id: Optional[str]) -> Optional[dict]:
"""查任务基本信息"""
if not db_path or not task_id:
return None
@@ -1160,7 +1217,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
conn = get_connection(db_path)
try:
row = conn.execute(
"SELECT id, title, status FROM tasks WHERE id=?", (task_id,)
"SELECT id, title, status FROM tasks WHERE id=?", (
task_id,)
).fetchone()
if not row:
return None
@@ -1192,7 +1250,9 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
sessions[main_key] = main_session
with open(sessions_path, "w") as f:
json.dump(sessions, f, indent=2)
logger.info("Revived %s: sessions.json status changed running→idle", agent_id)
logger.info(
"Revived %s: sessions.json status changed running→idle",
agent_id)
# #07 O4: 同时清理残留 lock 文件
sf = main_session.get("sessionFile", "")
if sf:
@@ -1200,7 +1260,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
if lock_path.exists():
try:
lock_path.unlink()
logger.info("Cleaned stale lock for %s: %s", agent_id, lock_path.name)
logger.info(
"Cleaned stale lock for %s: %s",
agent_id,
lock_path.name)
except Exception:
pass
return True
@@ -1209,7 +1272,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
return False
@staticmethod
def _check_recent_compaction_jsonl(session_file: str, window_seconds: int = 900) -> bool:
def _check_recent_compaction_jsonl(
session_file: str, window_seconds: int = 900) -> bool:
"""v2.8.2 Fix-2: 读 session jsonl 末尾,检查是否有 window_seconds 内的 compaction 记录。
比 compactionCheckpoints 更可靠:Gateway 每次完成 compact 必然在 jsonl 末尾追加记录,
@@ -1219,7 +1283,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
实测 50KB 在长对话中不够(compact 记录被推出窗口导致漏检)。
正常扫描量不变:从尾部往前扫,遇到超过 15min 的 timestamp 即 break。
"""
if not session_file or not pathlib.Path(session_file).exists():
if not session_file or not Path(session_file).exists():
return False
try:
from datetime import datetime, timezone
@@ -1241,7 +1305,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
ts = obj.get("timestamp", "")
if ts:
try:
ct = datetime.fromisoformat(ts.replace("Z", "+00:00"))
ct = datetime.fromisoformat(
ts.replace("Z", "+00:00"))
if (now - ct).total_seconds() < window_seconds:
return True
except (ValueError, TypeError):
@@ -1265,7 +1330,11 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
v2.8.1: compact 检测改用 session jsonl 末尾扫描(Fix-1),
替代失效的 compactionCheckpoints 检测。
"""
result = {"status": "unknown", "lock_pid": None, "lock_pid_alive": False, "recent_compact": False}
result = {
"status": "unknown",
"lock_pid": None,
"lock_pid_alive": False,
"recent_compact": False}
sessions_path = Path(os.environ.get(
"OPENCLAW_HOME", str(Path.home() / ".openclaw")
)) / "agents" / agent_id / "sessions" / "sessions.json"
@@ -1304,8 +1373,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
created_at_str = lock_data.get("createdAt", "")
if created_at_str:
from datetime import datetime as _dt, timezone as _tz
created_dt = _dt.fromisoformat(created_at_str.replace("Z", "+00:00"))
elapsed = (_dt.now(_tz.utc) - created_dt).total_seconds()
created_dt = _dt.fromisoformat(
created_at_str.replace("Z", "+00:00"))
elapsed = (_dt.now(_tz.utc) -
created_dt).total_seconds()
if elapsed > 1800: # 30 minutes
result["lock_pid_alive"] = False
result["lock_expired"] = True
@@ -1318,8 +1389,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
# v2.8.1 Fix-1: compact 检测改用 session jsonl 末尾扫描
# 只在 agent 非空闲时才扫描(减少不必要 I/O)
if result["status"] not in ("done", "idle", "unknown", None) and sf:
result["recent_compact"] = AgentSpawner._check_recent_compaction_jsonl(sf)
if result["status"] not in (
"done", "idle", "unknown", None) and sf:
result["recent_compact"] = AgentSpawner._check_recent_compaction_jsonl(
sf)
except Exception:
pass
return result
@@ -1364,14 +1437,17 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
# A15/A16: stderr 含 network/compact 关键字 → 可恢复
if stderr_text:
stderr_lower = stderr_text.lower()
if any(kw in stderr_lower for kw in ["econnrefused", "etimedout", "gateway closed", "econnreset"]):
if any(kw in stderr_lower for kw in [
"econnrefused", "etimedout", "gateway closed", "econnreset"]):
return {"outcome": "gateway_unreachable", "should_retry": True,
"retry_field": "retry_count", "cooldown_seconds": 60}
if any(kw in stderr_lower for kw in ["compaction-diag", "context-overflow"]):
if any(kw in stderr_lower for kw in [
"compaction-diag", "context-overflow"]):
return {"outcome": "compact_interrupted", "should_retry": True,
"retry_field": "retry_count", "cooldown_seconds": 60}
# A17: 真正的 crash → 保持 working,ticker 兜底
return {"outcome": "crashed", "should_retry": False, "original": "process_crash"}
return {"outcome": "crashed", "should_retry": False,
"original": "process_crash"}
# A13 revised: stdout 为空但 exit=0 → 信任进程退出码,视为正常完成
# 实测发现 openclaw session=None + exit=0 是正常场景(inform 通知等)
@@ -1382,25 +1458,32 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
# A7-A12: status=error → 不续杯,stderr 辅助分类
if status == "error":
stderr_lower = stderr_text.lower()
if any(kw in stderr_lower for kw in ["401", "403", "unauthorized", "auth"]):
if any(kw in stderr_lower for kw in [
"401", "403", "unauthorized", "auth"]):
return {"outcome": "auth_failed", "should_retry": False}
if any(kw in stderr_lower for kw in ["econnrefused", "etimedout", "gateway closed", "econnreset"]):
if any(kw in stderr_lower for kw in [
"econnrefused", "etimedout", "gateway closed", "econnreset"]):
return {"outcome": "gateway_unreachable", "should_retry": True,
"retry_field": "retry_count", "cooldown_seconds": 60}
if any(kw in stderr_lower for kw in ["rate_limit", "500", "503", "api error"]):
if any(kw in stderr_lower for kw in [
"rate_limit", "500", "503", "api error"]):
return {"outcome": "api_error", "should_retry": False}
if any(kw in stderr_lower for kw in ["compaction-diag", "context-overflow"]):
if any(kw in stderr_lower for kw in [
"compaction-diag", "context-overflow"]):
return {"outcome": "compact_failed", "should_retry": False}
if any(kw in stderr_lower for kw in ["lock", "busy", "concurrent", "lane task error"]):
if any(kw in stderr_lower for kw in [
"lock", "busy", "concurrent", "lane task error"]):
return {"outcome": "lock_conflict", "should_retry": True,
"retry_field": "retry_count", "cooldown_seconds": 60}
return {"outcome": "agent_error", "should_retry": False}
# 兜底:status 未知值
return {"outcome": "agent_error", "should_retry": False, "original": "unknown_status"}
return {"outcome": "agent_error",
"should_retry": False, "original": "unknown_status"}
@staticmethod
def _get_retry_counts(db_path: Optional[Path], task_id: Optional[str]) -> dict:
def _get_retry_counts(
db_path: Optional[Path], task_id: Optional[str]) -> dict:
"""从最新 task_attempt 的 metadata 读计数器"""
defaults = {"retry_count": 0, "connect_retry_count": 0,
"api_retry_count": 0, "lock_retry_count": 0,
@@ -1426,7 +1509,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
return defaults
def _update_retry_counts(self, db_path: Optional[Path],
task_id: Optional[str], counts: dict):
task_id: Optional[str], counts: dict):
"""将 retry counts 写回最新 task_attempt 的 metadata"""
if not db_path or not task_id:
return
@@ -1440,7 +1523,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
(task_id,)
).fetchone()
if row:
meta = json.loads(row["metadata"]) if row["metadata"] else {}
meta = json.loads(
row["metadata"]) if row["metadata"] else {}
meta.update(counts)
conn.execute(
"UPDATE task_attempts SET metadata=? WHERE rowid=?",
@@ -1450,7 +1534,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
finally:
conn.close()
except Exception:
logger.exception("Failed to update retry counts for task %s", task_id)
logger.exception(
"Failed to update retry counts for task %s", task_id)
def _mark_task(self, db_path: Optional[Path], task_id: Optional[str],
status: str, detail: Optional[dict] = None):
@@ -1468,7 +1553,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
if detail:
conn.execute(
"INSERT INTO events (task_id, agent, event_type, detail) VALUES (?,?,?,?)",
(task_id, "daemon", status, json.dumps(detail, ensure_ascii=False))
(task_id, "daemon", status, json.dumps(
detail, ensure_ascii=False))
)
conn.commit()
finally:
@@ -1486,10 +1572,13 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
from src.blackboard.operations import Blackboard
bb = Blackboard(db_path)
cid = bb.add_comment(task_id, "daemon",
f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入",
comment_type="system")
f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入",
comment_type="system")
bb.record_mentions(cid, task_id, ["pangtong-fujunshi"])
logger.info("Task %s: failure notified pangtong via comment+mention (reason=%s)", task_id, reason)
logger.info(
"Task %s: failure notified pangtong via comment+mention (reason=%s)",
task_id,
reason)
except Exception as e:
logger.warning("Task %s: failed to notify: %s", task_id, e)
except Exception:
@@ -1518,7 +1607,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
if asyncio.iscoroutine(result):
await result
except Exception:
logger.warning("on_complete callback failed for %s", agent_id, exc_info=True)
logger.warning(
"on_complete callback failed for %s",
agent_id,
exc_info=True)
def _register_session(
self,
@@ -1596,7 +1688,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
def get_session_by_agent(self, agent_id: str) -> Optional[Dict[str, Any]]:
"""v2.7.2: 根据 agent_id 获取活跃 session 信息(用于进程存活性检查)"""
for sid, info in self._sessions.items():
if info.get("agent_id") == agent_id and info.get("status") == "running":
if info.get("agent_id") == agent_id and info.get(
"status") == "running":
return info
return None