fix(spawner): §24 v4 compact检测 - trajectory prompt.submitted 替换 gateway rotation
CI / lint (pull_request) Failing after 7s
CI / test (pull_request) Has been skipped
CI / notify-on-failure (pull_request) Successful in 0s

This commit is contained in:
cfdaily
2026-06-11 23:57:09 +08:00
parent 95a8abca96
commit 3c2c0f3175
3 changed files with 426 additions and 185 deletions
+112 -6
View File
@@ -1297,6 +1297,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
logger.exception("Failed to revive %s", agent_id)
return False
@staticmethod
# deprecated: §24 v3, 保留供方案 B 备选
@staticmethod
def _get_recent_gateway_logs() -> list:
"""获取当天和昨天的 gateway 日志路径。
@@ -1316,6 +1318,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
paths.append(p)
return paths
@staticmethod
# deprecated: §24 v3, 保留供方案 B 备选
@staticmethod
def _check_compact_in_progress_gateway(
session_key: str, window_seconds: int = 120) -> bool:
@@ -1495,14 +1499,13 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
except Exception:
pass
# §24 v3: compact 检测优先用 gateway 日志 rotation 事件
# 旧方法 _check_recent_compaction_jsonl 作为 fallback
# §24 v4: compact 检测优先用 trajectory prompt.submitted
# fallback: _check_recent_compaction_jsonl (v2.8.2)
# 重要:compact 进行中时 status=done,所以不能按 status 过滤
# 只跳过 idle/unknown(完全没有活动过的 session)
if result["status"] not in ("idle", "unknown", None):
session_key = f"agent:{agent_id}:main"
result["recent_compact"] = AgentSpawner._check_compact_in_progress_gateway(
session_key)
if result["status"] not in ("idle", "unknown", None) and sf:
result["recent_compact"] = AgentSpawner._check_compact_in_progress_trajectory(
sf)
if not result["recent_compact"] and sf:
result["recent_compact"] = AgentSpawner._check_recent_compaction_jsonl(
sf)
@@ -1510,6 +1513,109 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
pass
return result
@staticmethod
def _check_compact_in_progress_trajectory(
session_file: str, timeout_minutes: int = 30) -> bool:
"""§24 v4: 检查 trajectory jsonl 尾部,判断 session 是否处于非正常状态。
检测逻辑:最后一个完整 turn 没有 prompt.submitted/skipped → 非正常 → skip。
覆盖:compact、timeout、hook block、session 结束等所有非正常状态。
Returns:
True = 非正常状态(skip ticker
False = 正常(不 skip)或超时兜底放行
"""
if not session_file:
return False
traj_path = f"{session_file}.trajectory.jsonl"
if not os.path.exists(traj_path):
return False
try:
from datetime import datetime as _dt, timezone as _tz, timedelta
# 读尾部 500KB
with open(traj_path, "rb") as f:
f.seek(0, 2)
size = f.tell()
f.seek(max(0, size - 500 * 1024))
tail = f.read().decode("utf-8", errors="replace")
if not tail.strip():
return False
# 解析所有有效行
events = []
for line in tail.splitlines():
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
events.append(obj)
except (json.JSONDecodeError, ValueError):
continue
if not events:
return False
# 按 session.started 分组找 turn
# 每个 turn 以 session.started 开始
turns = []
current_turn = []
for evt in events:
if evt.get("type") == "session.started":
if current_turn:
turns.append(current_turn)
current_turn = [evt]
else:
current_turn.append(evt)
if current_turn:
turns.append(current_turn)
if not turns:
return False
# 检查最后一个完整 turn(包含 session.started
last_turn = turns[-1]
turn_types = {evt.get("type") for evt in last_turn}
# 有 prompt.submitted 或 prompt.skipped → 正常 turn
if "prompt.submitted" in turn_types or "prompt.skipped" in turn_types:
return False
# 非正常状态 → 检查超时兜底
# 找最后一个有 ts 的事件
last_ts = None
for evt in reversed(events):
ts_str = evt.get("ts")
if ts_str:
try:
last_ts = _dt.fromisoformat(
ts_str.replace("Z", "+00:00"))
if last_ts.tzinfo is None:
last_ts = last_ts.replace(tzinfo=_tz.utc)
except (ValueError, TypeError):
continue
break
if last_ts is None:
# 没有 ts 信息,无法判断超时 → 非正常 → skip
return True
now = _dt.now(_tz.utc)
elapsed = (now - last_ts).total_seconds()
if elapsed > timeout_minutes * 60:
logger.debug("Trajectory last event %.0fs ago > %dm, fallback pass",
elapsed, timeout_minutes)
return False # 兜底放行
return True # 非正常状态且未超时
except Exception as e:
logger.debug("_check_compact_in_progress_trajectory error: %s", e)
return False
@staticmethod
def _classify_outcome(exit_code: int, json_result: dict, stderr_text: str,
task_status: Optional[str], stdout_text: str = "") -> dict: