auto-sync: 2026-05-31 23:35:05
This commit is contained in:
+60
-8
@@ -757,6 +757,11 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
# 其他:A1(completed), A4(agent_failed), A7(auth_failed),
|
||||
# A8(gateway_unreachable), A11(lock_conflict),
|
||||
# A10(compact_failed), A12(agent_error)
|
||||
# v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间
|
||||
if outcome in ("crashed", "compact_failed", "process_crash", "session_stuck",
|
||||
"compact_hanging", "agent_error") and self.counter:
|
||||
self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟
|
||||
logger.info("Crash/error cooldown set for %s: 300s (outcome=%s)", agent_id, outcome)
|
||||
# 进程退出 → on_complete release counter
|
||||
# 任务状态由各 outcome 自行处理(或等 ticker)
|
||||
await self._do_on_complete_async(on_complete, agent_id, outcome)
|
||||
@@ -1088,9 +1093,59 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
logger.exception("Failed to revive %s", agent_id)
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _check_recent_compaction_jsonl(session_file: str, window_seconds: int = 300) -> bool:
|
||||
"""v2.8.1 Fix-1: 读 session jsonl 末尾,检查是否有 window_seconds 内的 compaction 记录。
|
||||
|
||||
比 compactionCheckpoints 更可靠:Gateway 每次完成 compact 必然在 jsonl 末尾追加记录,
|
||||
但不保证更新 compactionCheckpoints。
|
||||
"""
|
||||
if not session_file or not pathlib.Path(session_file).exists():
|
||||
return False
|
||||
try:
|
||||
from datetime import datetime, timezone
|
||||
now = datetime.now(timezone.utc)
|
||||
with open(session_file, "rb") as sf:
|
||||
sf.seek(0, 2)
|
||||
size = sf.tell()
|
||||
sf.seek(max(0, size - 51200))
|
||||
tail = sf.read().decode("utf-8", errors="replace")
|
||||
for line in reversed(tail.splitlines()):
|
||||
if not line.strip():
|
||||
continue
|
||||
try:
|
||||
import json as _json
|
||||
obj = _json.loads(line)
|
||||
except (_json.JSONDecodeError, ValueError):
|
||||
continue
|
||||
if obj.get("type") == "compaction":
|
||||
ts = obj.get("timestamp", "")
|
||||
if ts:
|
||||
try:
|
||||
ct = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
||||
if (now - ct).total_seconds() < window_seconds:
|
||||
return True
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
ts = obj.get("timestamp", "")
|
||||
if ts:
|
||||
try:
|
||||
ct = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
||||
if (now - ct).total_seconds() >= window_seconds:
|
||||
break
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _check_session_state(agent_id: str) -> dict:
|
||||
"""检查 sessions.json 和 lock 状态"""
|
||||
"""检查 sessions.json 和 lock 状态
|
||||
|
||||
v2.8.1: compact 检测改用 session jsonl 末尾扫描(Fix-1),
|
||||
替代失效的 compactionCheckpoints 检测。
|
||||
"""
|
||||
result = {"status": "unknown", "lock_pid": None, "lock_pid_alive": False, "recent_compact": False}
|
||||
sessions_path = Path.home() / ".openclaw" / "agents" / agent_id / "sessions" / "sessions.json"
|
||||
if not sessions_path.exists():
|
||||
@@ -1125,13 +1180,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 最近 5 分钟的 compact
|
||||
import time
|
||||
now_ms = time.time() * 1000
|
||||
for cp in main_session.get("compactionCheckpoints", []):
|
||||
if (now_ms - cp.get("createdAt", 0)) < 300_000:
|
||||
result["recent_compact"] = True
|
||||
break
|
||||
# v2.8.1 Fix-1: compact 检测改用 session jsonl 末尾扫描
|
||||
# 只在 agent 非空闲时才扫描(减少不必要 I/O)
|
||||
if result["status"] not in ("done", "idle", "unknown", None) and sf:
|
||||
result["recent_compact"] = AgentSpawner._check_recent_compaction_jsonl(sf)
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user