fix(lint): 修复 PR #14 引入的 lint 回退 (119→0)
PR #14 从旧分支复制文件导致回退了 PR #10 的 lint 修复。 修复内容: - autoflake 移除未使用导入/变量 - autopep8 修复缩进/空格 - 手动修复 F821(pathlib→Path), F541(f-string), F841(未使用变量) - 所有修复均通过 flake8 --max-line-length=120 --extend-ignore=E501 检查 (0 errors)
This commit is contained in:
+168
-75
@@ -15,7 +15,7 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from src.blackboard.db import get_connection, init_db
|
||||
from src.blackboard.db import get_connection
|
||||
|
||||
logger = logging.getLogger("moziplus-v2.spawner")
|
||||
|
||||
@@ -163,9 +163,12 @@ class AgentBusyError(Exception):
|
||||
|
||||
#07: reason 字段区分具体原因,便于 dispatcher 层区分处理。
|
||||
"""
|
||||
def __init__(self, agent_id: str, reason: str = "busy", detail: Optional[dict] = None):
|
||||
|
||||
def __init__(self, agent_id: str, reason: str = "busy",
|
||||
detail: Optional[dict] = None):
|
||||
self.agent_id = agent_id
|
||||
self.reason = reason # counter_blocked / session_locked / session_running / session_compacting / session_stuck
|
||||
# counter_blocked / session_locked / session_running / session_compacting / session_stuck
|
||||
self.reason = reason
|
||||
self.detail = detail or {}
|
||||
super().__init__(f"{agent_id}: {reason}")
|
||||
|
||||
@@ -277,11 +280,15 @@ class AgentSpawner:
|
||||
|
||||
# mail 任务用精简模板
|
||||
if project_id == "_mail":
|
||||
return self._build_mail_prompt(task_id, title, description, must_haves, agent_id)
|
||||
return self._build_mail_prompt(
|
||||
task_id, title, description, must_haves, agent_id)
|
||||
|
||||
# 走 BootstrapBuilder 新路径
|
||||
if self.bootstrap_builder and task is not None:
|
||||
role_map = {"executor": "executor", "review": "reviewer", "discussion": "planner"}
|
||||
role_map = {
|
||||
"executor": "executor",
|
||||
"review": "reviewer",
|
||||
"discussion": "planner"}
|
||||
role = role_map.get(spawn_type, "executor")
|
||||
bootstrap_prompt = self.bootstrap_builder.build_for_task(
|
||||
task=task,
|
||||
@@ -293,13 +300,14 @@ class AgentSpawner:
|
||||
|
||||
# 无 BootstrapBuilder 或无 task 对象 → 最小 fallback
|
||||
# 只保留任务上下文 + API 操作指令
|
||||
logger.warning("No BootstrapBuilder or task object, using minimal fallback")
|
||||
logger.warning(
|
||||
"No BootstrapBuilder or task object, using minimal fallback")
|
||||
return self._build_minimal_fallback(
|
||||
task_id, title, description, must_haves,
|
||||
project_id, agent_id)
|
||||
|
||||
def _build_minimal_fallback(self, task_id, title, description, must_haves,
|
||||
project_id, agent_id):
|
||||
project_id, agent_id):
|
||||
"""最小 fallback:只有任务上下文 + API 指令"""
|
||||
task_section = f"""## 任务
|
||||
{title}
|
||||
@@ -311,7 +319,7 @@ class AgentSpawner:
|
||||
return task_section + "\n\n---\n\n" + api_section
|
||||
|
||||
def _build_api_section(self, project_id: str, task_id: str,
|
||||
agent_id: str) -> str:
|
||||
agent_id: str) -> str:
|
||||
"""构建 API 回写操作指令(BootstrapBuilder 模式下补充)"""
|
||||
# mail 任务直接 done,不走 review
|
||||
success_status = '"done"' if project_id == "_mail" else '"review"'
|
||||
@@ -337,8 +345,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
"""
|
||||
|
||||
def _build_discussion_prompt(self, task_id: str, title: str,
|
||||
description: str, must_haves: str,
|
||||
project_id: str, agent_id: str) -> str:
|
||||
description: str, must_haves: str,
|
||||
project_id: str, agent_id: str) -> str:
|
||||
"""构建讨论类 spawn prompt(§3.3 框架 + Boids)"""
|
||||
goal_snapshot = description or title
|
||||
constraints = must_haves or "(无特殊约束)"
|
||||
@@ -368,7 +376,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
if not self.guardrails:
|
||||
return "无特殊限制"
|
||||
try:
|
||||
return "、".join(r.get("name", r.get("rule_id", "")) for r in self.guardrails.rules[:6])
|
||||
return "、".join(r.get("name", r.get("rule_id", ""))
|
||||
for r in self.guardrails.rules[:6])
|
||||
except Exception:
|
||||
return "无特殊限制"
|
||||
|
||||
@@ -379,9 +388,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
return router.agent_profiles.get(agent_id)
|
||||
return None
|
||||
|
||||
|
||||
def _build_mail_prompt(self, task_id: str, title: str, description: str,
|
||||
must_haves: str, agent_id: str) -> str:
|
||||
must_haves: str, agent_id: str) -> str:
|
||||
"""构建 Mail 专用精简模板"""
|
||||
# 解析 must_haves 获取 from 和 performative
|
||||
from_agent = agent_id
|
||||
@@ -389,7 +397,9 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
try:
|
||||
meta = json.loads(must_haves) if must_haves else {}
|
||||
from_agent = meta.get("from", agent_id)
|
||||
performative = meta.get("performative", meta.get("type", "request"))
|
||||
performative = meta.get(
|
||||
"performative", meta.get(
|
||||
"type", "request"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -472,7 +482,9 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
self._revive_session(agent_id)
|
||||
elif pre_state.get("status") == "running" and not pre_state.get("lock_pid_alive"):
|
||||
# status=running 但 lock PID 已死 → 假死,revive
|
||||
logger.warning("Phase 0: %s status=running but lock PID dead, reviving", agent_id)
|
||||
logger.warning(
|
||||
"Phase 0: %s status=running but lock PID dead, reviving",
|
||||
agent_id)
|
||||
self._revive_session(agent_id)
|
||||
|
||||
# Phase 1: Counter acquire(互斥锁)
|
||||
@@ -487,12 +499,15 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
if use_main_session:
|
||||
session_state = self._check_session_state(agent_id)
|
||||
logger.info("Phase 2 session check for %s: status=%s lock_pid=%s lock_pid_alive=%s compact=%s",
|
||||
agent_id, session_state.get('status'), session_state.get('lock_pid'),
|
||||
agent_id, session_state.get(
|
||||
'status'), session_state.get('lock_pid'),
|
||||
session_state.get('lock_pid_alive'), session_state.get('recent_compact'))
|
||||
|
||||
blockers = []
|
||||
if session_state.get("lock_pid_alive") and not session_state.get("lock_expired"):
|
||||
blockers.append(("session_locked", session_state.get("lock_pid")))
|
||||
if session_state.get(
|
||||
"lock_pid_alive") and not session_state.get("lock_expired"):
|
||||
blockers.append(
|
||||
("session_locked", session_state.get("lock_pid")))
|
||||
if session_state.get("status") == "running":
|
||||
if session_state.get("lock_pid_alive"):
|
||||
# 真 running:外部进程占用
|
||||
@@ -515,7 +530,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
|
||||
# Phase 2.5: 假死修复(status=running + lock PID 死 → revive → 重检)
|
||||
# 此场景应被 Phase 0 提前修复,这里做兜底
|
||||
if session_state.get("status") == "running" and not session_state.get("lock_pid_alive"):
|
||||
if session_state.get("status") == "running" and not session_state.get(
|
||||
"lock_pid_alive"):
|
||||
logger.warning("Phase 2.5: %s status=running + lock dead (should be caught in Phase 0), reviving",
|
||||
agent_id)
|
||||
self._revive_session(agent_id)
|
||||
@@ -538,7 +554,10 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
raise
|
||||
|
||||
if self.dry_run:
|
||||
logger.info("[DRY RUN] Would spawn agent %s (session=%s)", agent_id, _sid_key)
|
||||
logger.info(
|
||||
"[DRY RUN] Would spawn agent %s (session=%s)",
|
||||
agent_id,
|
||||
_sid_key)
|
||||
self._register_session(_sid_key, agent_id, task_id, pid=None)
|
||||
return _sid_key
|
||||
|
||||
@@ -554,7 +573,8 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
if asyncio.iscoroutine(result):
|
||||
await result
|
||||
except Exception:
|
||||
logger.warning("Business on_complete failed for %s", aid, exc_info=True)
|
||||
logger.warning(
|
||||
"Business on_complete failed for %s", aid, exc_info=True)
|
||||
|
||||
cmd = [
|
||||
"openclaw", "agent",
|
||||
@@ -575,7 +595,7 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
self._register_session(session_id, agent_id, task_id, proc.pid,
|
||||
broadcast_task_ids=broadcast_task_ids)
|
||||
broadcast_task_ids=broadcast_task_ids)
|
||||
logger.info("Spawned agent %s (session=%s, pid=%d)",
|
||||
agent_id, session_id, proc.pid)
|
||||
|
||||
@@ -593,7 +613,11 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
if self.counter:
|
||||
self.counter.release(agent_id, _sid_key)
|
||||
logger.exception("Failed to spawn agent %s", agent_id)
|
||||
self._record_attempt(task_id, agent_id, "spawn_failed", error=str(e))
|
||||
self._record_attempt(
|
||||
task_id,
|
||||
agent_id,
|
||||
"spawn_failed",
|
||||
error=str(e))
|
||||
raise
|
||||
|
||||
async def spawn_subagent(
|
||||
@@ -609,7 +633,9 @@ curl -X POST http://{self.api_host}:{self.api_port}/api/projects/{project_id}/ta
|
||||
session_id = str(uuid.uuid4())
|
||||
|
||||
if self.dry_run:
|
||||
logger.info("[DRY RUN] Would spawn subagent (session=%s)", session_id)
|
||||
logger.info(
|
||||
"[DRY RUN] Would spawn subagent (session=%s)",
|
||||
session_id)
|
||||
self._register_session(session_id, "subagent", task_id, pid=None)
|
||||
return session_id
|
||||
|
||||
@@ -729,10 +755,16 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
agent_id, session_id, json_result)
|
||||
|
||||
# 查任务实际状态
|
||||
task_status = self._get_task_status(db_path, task_id) if task_id else None
|
||||
task_status = self._get_task_status(
|
||||
db_path, task_id) if task_id else None
|
||||
|
||||
# 分类
|
||||
cls = self._classify_outcome(exit_code, json_result, stderr_text, task_status, stdout_text)
|
||||
cls = self._classify_outcome(
|
||||
exit_code,
|
||||
json_result,
|
||||
stderr_text,
|
||||
task_status,
|
||||
stdout_text)
|
||||
outcome = cls["outcome"]
|
||||
|
||||
# 更新 session 状态
|
||||
@@ -761,17 +793,21 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
agent_id, session_id, outcome, exit_code, task_status)
|
||||
|
||||
# 广播反馈追踪(Phase 1 bug fix)
|
||||
if task_id == "broadcast" and hasattr(self, '_ticker') and self._ticker:
|
||||
if task_id == "broadcast" and hasattr(
|
||||
self, '_ticker') and self._ticker:
|
||||
# 广播任务:从 session 信息取真实 task_id 列表,逐一回调 tracker
|
||||
sess_info = self._sessions.get(session_id or "main", {})
|
||||
bt_ids = sess_info.get("broadcast_task_ids") or []
|
||||
# 广播场景一律标 no_reply:Agent 只 claim 一个任务,
|
||||
# 其余任务的 tracker 不能被 claimed 清除
|
||||
for real_task_id in bt_ids:
|
||||
self._ticker.record_broadcast_response(real_task_id, agent_id, "no_reply")
|
||||
self._ticker.record_broadcast_response(
|
||||
real_task_id, agent_id, "no_reply")
|
||||
elif task_id and hasattr(self, '_ticker') and self._ticker:
|
||||
outcome_str = "claimed" if cls.get("status") == "ok" else "no_reply"
|
||||
self._ticker.record_broadcast_response(task_id, agent_id, outcome_str)
|
||||
outcome_str = "claimed" if cls.get(
|
||||
"status") == "ok" else "no_reply"
|
||||
self._ticker.record_broadcast_response(
|
||||
task_id, agent_id, outcome_str)
|
||||
|
||||
if cls["should_retry"]:
|
||||
# cooldown: 新增的可恢复场景(A14/A15/A16/A8/A10)
|
||||
@@ -850,14 +886,24 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
# v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间
|
||||
if outcome == "crashed" and self.counter:
|
||||
self.counter.set_cooldown(agent_id, seconds=60)
|
||||
logger.info("Crash cooldown set for %s: 60s (outcome=%s)", agent_id, outcome)
|
||||
logger.info(
|
||||
"Crash cooldown set for %s: 60s (outcome=%s)",
|
||||
agent_id,
|
||||
outcome)
|
||||
elif outcome in ("compact_failed", "process_crash", "session_stuck",
|
||||
"compact_hanging", "agent_error", "compact_interrupted") and self.counter:
|
||||
"compact_hanging", "agent_error", "compact_interrupted") and self.counter:
|
||||
self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟
|
||||
logger.info("Error cooldown set for %s: 300s (outcome=%s)", agent_id, outcome)
|
||||
logger.info(
|
||||
"Error cooldown set for %s: 300s (outcome=%s)",
|
||||
agent_id,
|
||||
outcome)
|
||||
# F1: 不可恢复 outcome → 立刻标 failed + 写黑板
|
||||
if outcome in ("auth_failed", "agent_error") and db_path and task_id:
|
||||
logger.error("Task %s: unrecoverable outcome=%s, marking failed immediately", task_id, outcome)
|
||||
if outcome in ("auth_failed",
|
||||
"agent_error") and db_path and task_id:
|
||||
logger.error(
|
||||
"Task %s: unrecoverable outcome=%s, marking failed immediately",
|
||||
task_id,
|
||||
outcome)
|
||||
self._mark_task(db_path, task_id, "failed", {
|
||||
"reason": outcome,
|
||||
"stderr_preview": (stderr_text or "")[:500],
|
||||
@@ -881,13 +927,16 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
stderr_text = b"".join(stderr_chunks).decode("utf-8", errors="replace")
|
||||
# stderr collected but not used in this handler
|
||||
# (kept for potential future diagnostics)
|
||||
b"".join(stderr_chunks).decode("utf-8", errors="replace")
|
||||
|
||||
# 检查 session 状态
|
||||
state = self._check_session_state(agent_id)
|
||||
|
||||
# B1: 假死 - 先复活,连续假死 ≥2 次再 failed
|
||||
if state.get("status") == "running" and not state.get("lock_pid_alive", True):
|
||||
if state.get("status") == "running" and not state.get(
|
||||
"lock_pid_alive", True):
|
||||
# 假死计数
|
||||
stuck_count = self._stuck_counts.get(task_id, 0) + 1
|
||||
self._stuck_counts[task_id] = stuck_count
|
||||
@@ -913,7 +962,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
await self._do_on_complete_async(on_complete, agent_id, "session_revived")
|
||||
else:
|
||||
# 复活失败 → 标 failed
|
||||
logger.error("Agent %s revive failed, marking failed", agent_id)
|
||||
logger.error(
|
||||
"Agent %s revive failed, marking failed", agent_id)
|
||||
self._mark_task(db_path, task_id, "failed",
|
||||
{"reason": "revive_failed", "stuck_count": stuck_count,
|
||||
"diagnostics": state})
|
||||
@@ -994,7 +1044,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
"SELECT status FROM tasks WHERE id=?", (task_id,)
|
||||
).fetchone()
|
||||
# Bug-6 fix: pending 不是终态
|
||||
if row and row["status"] in ("done", "failed", "cancelled", "review"):
|
||||
if row and row["status"] in (
|
||||
"done", "failed", "cancelled", "review"):
|
||||
logger.info("Retry skip: task %s already %s (agent=%s)",
|
||||
task_id, row["status"], agent_id)
|
||||
# on_complete = wrapped_on_complete,会 release counter
|
||||
@@ -1003,7 +1054,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception:
|
||||
logger.warning("Retry status check failed for %s, proceeding", task_id)
|
||||
logger.warning(
|
||||
"Retry status check failed for %s, proceeding", task_id)
|
||||
|
||||
# 直接读写 tasks 表的 retry_count
|
||||
if retry_field == "retry_count" and db_path and task_id:
|
||||
@@ -1023,7 +1075,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception:
|
||||
logger.exception("Failed to update retry_count for task %s", task_id)
|
||||
logger.exception(
|
||||
"Failed to update retry_count for task %s", task_id)
|
||||
count = 1
|
||||
else:
|
||||
retry_counts = self._get_retry_counts(db_path, task_id)
|
||||
@@ -1107,7 +1160,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
"""
|
||||
text = stdout_text.strip()
|
||||
if not text:
|
||||
return {"status": None, "summary": None, "fallback_used": False, "fallback_reason": None, "payloads": []}
|
||||
return {"status": None, "summary": None, "fallback_used": False,
|
||||
"fallback_reason": None, "payloads": []}
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
@@ -1119,7 +1173,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
else:
|
||||
return {"status": None, "summary": None, "fallback_used": False, "fallback_reason": None, "payloads": []}
|
||||
return {"status": None, "summary": None, "fallback_used": False,
|
||||
"fallback_reason": None, "payloads": []}
|
||||
|
||||
# 从 data.result.meta.executionTrace 取 fallback 信息
|
||||
result = data.get("result", {})
|
||||
@@ -1135,7 +1190,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _get_task_status(db_path: Optional[Path], task_id: Optional[str]) -> Optional[str]:
|
||||
def _get_task_status(
|
||||
db_path: Optional[Path], task_id: Optional[str]) -> Optional[str]:
|
||||
"""查任务实际 API 状态"""
|
||||
if not db_path or not task_id:
|
||||
return None
|
||||
@@ -1152,7 +1208,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _get_task_info(db_path: Optional[Path], task_id: Optional[str]) -> Optional[dict]:
|
||||
def _get_task_info(db_path: Optional[Path],
|
||||
task_id: Optional[str]) -> Optional[dict]:
|
||||
"""查任务基本信息"""
|
||||
if not db_path or not task_id:
|
||||
return None
|
||||
@@ -1160,7 +1217,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
conn = get_connection(db_path)
|
||||
try:
|
||||
row = conn.execute(
|
||||
"SELECT id, title, status FROM tasks WHERE id=?", (task_id,)
|
||||
"SELECT id, title, status FROM tasks WHERE id=?", (
|
||||
task_id,)
|
||||
).fetchone()
|
||||
if not row:
|
||||
return None
|
||||
@@ -1192,7 +1250,9 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
sessions[main_key] = main_session
|
||||
with open(sessions_path, "w") as f:
|
||||
json.dump(sessions, f, indent=2)
|
||||
logger.info("Revived %s: sessions.json status changed running→idle", agent_id)
|
||||
logger.info(
|
||||
"Revived %s: sessions.json status changed running→idle",
|
||||
agent_id)
|
||||
# #07 O4: 同时清理残留 lock 文件
|
||||
sf = main_session.get("sessionFile", "")
|
||||
if sf:
|
||||
@@ -1200,7 +1260,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
if lock_path.exists():
|
||||
try:
|
||||
lock_path.unlink()
|
||||
logger.info("Cleaned stale lock for %s: %s", agent_id, lock_path.name)
|
||||
logger.info(
|
||||
"Cleaned stale lock for %s: %s",
|
||||
agent_id,
|
||||
lock_path.name)
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
@@ -1209,7 +1272,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _check_recent_compaction_jsonl(session_file: str, window_seconds: int = 900) -> bool:
|
||||
def _check_recent_compaction_jsonl(
|
||||
session_file: str, window_seconds: int = 900) -> bool:
|
||||
"""v2.8.2 Fix-2: 读 session jsonl 末尾,检查是否有 window_seconds 内的 compaction 记录。
|
||||
|
||||
比 compactionCheckpoints 更可靠:Gateway 每次完成 compact 必然在 jsonl 末尾追加记录,
|
||||
@@ -1219,7 +1283,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
实测 50KB 在长对话中不够(compact 记录被推出窗口导致漏检)。
|
||||
正常扫描量不变:从尾部往前扫,遇到超过 15min 的 timestamp 即 break。
|
||||
"""
|
||||
if not session_file or not pathlib.Path(session_file).exists():
|
||||
if not session_file or not Path(session_file).exists():
|
||||
return False
|
||||
try:
|
||||
from datetime import datetime, timezone
|
||||
@@ -1241,7 +1305,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
ts = obj.get("timestamp", "")
|
||||
if ts:
|
||||
try:
|
||||
ct = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
||||
ct = datetime.fromisoformat(
|
||||
ts.replace("Z", "+00:00"))
|
||||
if (now - ct).total_seconds() < window_seconds:
|
||||
return True
|
||||
except (ValueError, TypeError):
|
||||
@@ -1265,7 +1330,11 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
v2.8.1: compact 检测改用 session jsonl 末尾扫描(Fix-1),
|
||||
替代失效的 compactionCheckpoints 检测。
|
||||
"""
|
||||
result = {"status": "unknown", "lock_pid": None, "lock_pid_alive": False, "recent_compact": False}
|
||||
result = {
|
||||
"status": "unknown",
|
||||
"lock_pid": None,
|
||||
"lock_pid_alive": False,
|
||||
"recent_compact": False}
|
||||
sessions_path = Path(os.environ.get(
|
||||
"OPENCLAW_HOME", str(Path.home() / ".openclaw")
|
||||
)) / "agents" / agent_id / "sessions" / "sessions.json"
|
||||
@@ -1304,8 +1373,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
created_at_str = lock_data.get("createdAt", "")
|
||||
if created_at_str:
|
||||
from datetime import datetime as _dt, timezone as _tz
|
||||
created_dt = _dt.fromisoformat(created_at_str.replace("Z", "+00:00"))
|
||||
elapsed = (_dt.now(_tz.utc) - created_dt).total_seconds()
|
||||
created_dt = _dt.fromisoformat(
|
||||
created_at_str.replace("Z", "+00:00"))
|
||||
elapsed = (_dt.now(_tz.utc) -
|
||||
created_dt).total_seconds()
|
||||
if elapsed > 1800: # 30 minutes
|
||||
result["lock_pid_alive"] = False
|
||||
result["lock_expired"] = True
|
||||
@@ -1318,8 +1389,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
|
||||
# v2.8.1 Fix-1: compact 检测改用 session jsonl 末尾扫描
|
||||
# 只在 agent 非空闲时才扫描(减少不必要 I/O)
|
||||
if result["status"] not in ("done", "idle", "unknown", None) and sf:
|
||||
result["recent_compact"] = AgentSpawner._check_recent_compaction_jsonl(sf)
|
||||
if result["status"] not in (
|
||||
"done", "idle", "unknown", None) and sf:
|
||||
result["recent_compact"] = AgentSpawner._check_recent_compaction_jsonl(
|
||||
sf)
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
@@ -1364,14 +1437,17 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
# A15/A16: stderr 含 network/compact 关键字 → 可恢复
|
||||
if stderr_text:
|
||||
stderr_lower = stderr_text.lower()
|
||||
if any(kw in stderr_lower for kw in ["econnrefused", "etimedout", "gateway closed", "econnreset"]):
|
||||
if any(kw in stderr_lower for kw in [
|
||||
"econnrefused", "etimedout", "gateway closed", "econnreset"]):
|
||||
return {"outcome": "gateway_unreachable", "should_retry": True,
|
||||
"retry_field": "retry_count", "cooldown_seconds": 60}
|
||||
if any(kw in stderr_lower for kw in ["compaction-diag", "context-overflow"]):
|
||||
if any(kw in stderr_lower for kw in [
|
||||
"compaction-diag", "context-overflow"]):
|
||||
return {"outcome": "compact_interrupted", "should_retry": True,
|
||||
"retry_field": "retry_count", "cooldown_seconds": 60}
|
||||
# A17: 真正的 crash → 保持 working,ticker 兜底
|
||||
return {"outcome": "crashed", "should_retry": False, "original": "process_crash"}
|
||||
return {"outcome": "crashed", "should_retry": False,
|
||||
"original": "process_crash"}
|
||||
|
||||
# A13 revised: stdout 为空但 exit=0 → 信任进程退出码,视为正常完成
|
||||
# 实测发现 openclaw session=None + exit=0 是正常场景(inform 通知等)
|
||||
@@ -1382,25 +1458,32 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
# A7-A12: status=error → 不续杯,stderr 辅助分类
|
||||
if status == "error":
|
||||
stderr_lower = stderr_text.lower()
|
||||
if any(kw in stderr_lower for kw in ["401", "403", "unauthorized", "auth"]):
|
||||
if any(kw in stderr_lower for kw in [
|
||||
"401", "403", "unauthorized", "auth"]):
|
||||
return {"outcome": "auth_failed", "should_retry": False}
|
||||
if any(kw in stderr_lower for kw in ["econnrefused", "etimedout", "gateway closed", "econnreset"]):
|
||||
if any(kw in stderr_lower for kw in [
|
||||
"econnrefused", "etimedout", "gateway closed", "econnreset"]):
|
||||
return {"outcome": "gateway_unreachable", "should_retry": True,
|
||||
"retry_field": "retry_count", "cooldown_seconds": 60}
|
||||
if any(kw in stderr_lower for kw in ["rate_limit", "500", "503", "api error"]):
|
||||
if any(kw in stderr_lower for kw in [
|
||||
"rate_limit", "500", "503", "api error"]):
|
||||
return {"outcome": "api_error", "should_retry": False}
|
||||
if any(kw in stderr_lower for kw in ["compaction-diag", "context-overflow"]):
|
||||
if any(kw in stderr_lower for kw in [
|
||||
"compaction-diag", "context-overflow"]):
|
||||
return {"outcome": "compact_failed", "should_retry": False}
|
||||
if any(kw in stderr_lower for kw in ["lock", "busy", "concurrent", "lane task error"]):
|
||||
if any(kw in stderr_lower for kw in [
|
||||
"lock", "busy", "concurrent", "lane task error"]):
|
||||
return {"outcome": "lock_conflict", "should_retry": True,
|
||||
"retry_field": "retry_count", "cooldown_seconds": 60}
|
||||
return {"outcome": "agent_error", "should_retry": False}
|
||||
|
||||
# 兜底:status 未知值
|
||||
return {"outcome": "agent_error", "should_retry": False, "original": "unknown_status"}
|
||||
return {"outcome": "agent_error",
|
||||
"should_retry": False, "original": "unknown_status"}
|
||||
|
||||
@staticmethod
|
||||
def _get_retry_counts(db_path: Optional[Path], task_id: Optional[str]) -> dict:
|
||||
def _get_retry_counts(
|
||||
db_path: Optional[Path], task_id: Optional[str]) -> dict:
|
||||
"""从最新 task_attempt 的 metadata 读计数器"""
|
||||
defaults = {"retry_count": 0, "connect_retry_count": 0,
|
||||
"api_retry_count": 0, "lock_retry_count": 0,
|
||||
@@ -1426,7 +1509,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
return defaults
|
||||
|
||||
def _update_retry_counts(self, db_path: Optional[Path],
|
||||
task_id: Optional[str], counts: dict):
|
||||
task_id: Optional[str], counts: dict):
|
||||
"""将 retry counts 写回最新 task_attempt 的 metadata"""
|
||||
if not db_path or not task_id:
|
||||
return
|
||||
@@ -1440,7 +1523,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
(task_id,)
|
||||
).fetchone()
|
||||
if row:
|
||||
meta = json.loads(row["metadata"]) if row["metadata"] else {}
|
||||
meta = json.loads(
|
||||
row["metadata"]) if row["metadata"] else {}
|
||||
meta.update(counts)
|
||||
conn.execute(
|
||||
"UPDATE task_attempts SET metadata=? WHERE rowid=?",
|
||||
@@ -1450,7 +1534,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception:
|
||||
logger.exception("Failed to update retry counts for task %s", task_id)
|
||||
logger.exception(
|
||||
"Failed to update retry counts for task %s", task_id)
|
||||
|
||||
def _mark_task(self, db_path: Optional[Path], task_id: Optional[str],
|
||||
status: str, detail: Optional[dict] = None):
|
||||
@@ -1468,7 +1553,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
if detail:
|
||||
conn.execute(
|
||||
"INSERT INTO events (task_id, agent, event_type, detail) VALUES (?,?,?,?)",
|
||||
(task_id, "daemon", status, json.dumps(detail, ensure_ascii=False))
|
||||
(task_id, "daemon", status, json.dumps(
|
||||
detail, ensure_ascii=False))
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
@@ -1486,10 +1572,13 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
from src.blackboard.operations import Blackboard
|
||||
bb = Blackboard(db_path)
|
||||
cid = bb.add_comment(task_id, "daemon",
|
||||
f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入",
|
||||
comment_type="system")
|
||||
f"@pangtong-fujunshi 任务执行失败: {reason},请评估是否需要介入",
|
||||
comment_type="system")
|
||||
bb.record_mentions(cid, task_id, ["pangtong-fujunshi"])
|
||||
logger.info("Task %s: failure notified pangtong via comment+mention (reason=%s)", task_id, reason)
|
||||
logger.info(
|
||||
"Task %s: failure notified pangtong via comment+mention (reason=%s)",
|
||||
task_id,
|
||||
reason)
|
||||
except Exception as e:
|
||||
logger.warning("Task %s: failed to notify: %s", task_id, e)
|
||||
except Exception:
|
||||
@@ -1518,7 +1607,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
if asyncio.iscoroutine(result):
|
||||
await result
|
||||
except Exception:
|
||||
logger.warning("on_complete callback failed for %s", agent_id, exc_info=True)
|
||||
logger.warning(
|
||||
"on_complete callback failed for %s",
|
||||
agent_id,
|
||||
exc_info=True)
|
||||
|
||||
def _register_session(
|
||||
self,
|
||||
@@ -1596,7 +1688,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
def get_session_by_agent(self, agent_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""v2.7.2: 根据 agent_id 获取活跃 session 信息(用于进程存活性检查)"""
|
||||
for sid, info in self._sessions.items():
|
||||
if info.get("agent_id") == agent_id and info.get("status") == "running":
|
||||
if info.get("agent_id") == agent_id and info.get(
|
||||
"status") == "running":
|
||||
return info
|
||||
return None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user