auto-sync: 2026-05-31 23:35:05

This commit is contained in:
cfdaily
2026-05-31 23:35:05 +08:00
parent a2cf49ee99
commit 3e1d4d066b
3 changed files with 181 additions and 9 deletions
+60 -8
View File
@@ -757,6 +757,11 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
# 其他:A1(completed), A4(agent_failed), A7(auth_failed),
# A8(gateway_unreachable), A11(lock_conflict),
# A10(compact_failed), A12(agent_error)
# v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间
if outcome in ("crashed", "compact_failed", "process_crash", "session_stuck",
"compact_hanging", "agent_error") and self.counter:
self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟
logger.info("Crash/error cooldown set for %s: 300s (outcome=%s)", agent_id, outcome)
# 进程退出 → on_complete release counter
# 任务状态由各 outcome 自行处理(或等 ticker)
await self._do_on_complete_async(on_complete, agent_id, outcome)
@@ -1088,9 +1093,59 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
logger.exception("Failed to revive %s", agent_id)
return False
@staticmethod
def _check_recent_compaction_jsonl(session_file: str, window_seconds: int = 300) -> bool:
"""v2.8.1 Fix-1: 读 session jsonl 末尾,检查是否有 window_seconds 内的 compaction 记录。
比 compactionCheckpoints 更可靠:Gateway 每次完成 compact 必然在 jsonl 末尾追加记录,
但不保证更新 compactionCheckpoints。
"""
if not session_file or not pathlib.Path(session_file).exists():
return False
try:
from datetime import datetime, timezone
now = datetime.now(timezone.utc)
with open(session_file, "rb") as sf:
sf.seek(0, 2)
size = sf.tell()
sf.seek(max(0, size - 51200))
tail = sf.read().decode("utf-8", errors="replace")
for line in reversed(tail.splitlines()):
if not line.strip():
continue
try:
import json as _json
obj = _json.loads(line)
except (_json.JSONDecodeError, ValueError):
continue
if obj.get("type") == "compaction":
ts = obj.get("timestamp", "")
if ts:
try:
ct = datetime.fromisoformat(ts.replace("Z", "+00:00"))
if (now - ct).total_seconds() < window_seconds:
return True
except (ValueError, TypeError):
pass
ts = obj.get("timestamp", "")
if ts:
try:
ct = datetime.fromisoformat(ts.replace("Z", "+00:00"))
if (now - ct).total_seconds() >= window_seconds:
break
except (ValueError, TypeError):
pass
return False
except Exception:
return False
@staticmethod
def _check_session_state(agent_id: str) -> dict:
"""检查 sessions.json 和 lock 状态"""
"""检查 sessions.json 和 lock 状态
v2.8.1: compact 检测改用 session jsonl 末尾扫描(Fix-1),
替代失效的 compactionCheckpoints 检测。
"""
result = {"status": "unknown", "lock_pid": None, "lock_pid_alive": False, "recent_compact": False}
sessions_path = Path.home() / ".openclaw" / "agents" / agent_id / "sessions" / "sessions.json"
if not sessions_path.exists():
@@ -1125,13 +1180,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
except Exception:
pass
# 最近 5 分钟的 compact
import time
now_ms = time.time() * 1000
for cp in main_session.get("compactionCheckpoints", []):
if (now_ms - cp.get("createdAt", 0)) < 300_000:
result["recent_compact"] = True
break
# v2.8.1 Fix-1: compact 检测改用 session jsonl 末尾扫描
# 只在 agent 非空闲时才扫描(减少不必要 I/O)
if result["status"] not in ("done", "idle", "unknown", None) and sf:
result["recent_compact"] = AgentSpawner._check_recent_compaction_jsonl(sf)
except Exception:
pass
return result