fix(spawner): §24 v4 compact检测 - trajectory prompt.submitted 替换 gateway rotation
This commit is contained in:
+112
-6
@@ -1297,6 +1297,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
logger.exception("Failed to revive %s", agent_id)
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
# deprecated: §24 v3, 保留供方案 B 备选
|
||||
@staticmethod
|
||||
def _get_recent_gateway_logs() -> list:
|
||||
"""获取当天和昨天的 gateway 日志路径。
|
||||
@@ -1316,6 +1318,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
paths.append(p)
|
||||
return paths
|
||||
|
||||
@staticmethod
|
||||
# deprecated: §24 v3, 保留供方案 B 备选
|
||||
@staticmethod
|
||||
def _check_compact_in_progress_gateway(
|
||||
session_key: str, window_seconds: int = 120) -> bool:
|
||||
@@ -1495,14 +1499,13 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# §24 v3: compact 检测优先用 gateway 日志 rotation 事件
|
||||
# 旧方法 _check_recent_compaction_jsonl 作为 fallback
|
||||
# §24 v4: compact 检测优先用 trajectory prompt.submitted
|
||||
# fallback: _check_recent_compaction_jsonl (v2.8.2)
|
||||
# 重要:compact 进行中时 status=done,所以不能按 status 过滤
|
||||
# 只跳过 idle/unknown(完全没有活动过的 session)
|
||||
if result["status"] not in ("idle", "unknown", None):
|
||||
session_key = f"agent:{agent_id}:main"
|
||||
result["recent_compact"] = AgentSpawner._check_compact_in_progress_gateway(
|
||||
session_key)
|
||||
if result["status"] not in ("idle", "unknown", None) and sf:
|
||||
result["recent_compact"] = AgentSpawner._check_compact_in_progress_trajectory(
|
||||
sf)
|
||||
if not result["recent_compact"] and sf:
|
||||
result["recent_compact"] = AgentSpawner._check_recent_compaction_jsonl(
|
||||
sf)
|
||||
@@ -1510,6 +1513,109 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
pass
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _check_compact_in_progress_trajectory(
|
||||
session_file: str, timeout_minutes: int = 30) -> bool:
|
||||
"""§24 v4: 检查 trajectory jsonl 尾部,判断 session 是否处于非正常状态。
|
||||
|
||||
检测逻辑:最后一个完整 turn 没有 prompt.submitted/skipped → 非正常 → skip。
|
||||
覆盖:compact、timeout、hook block、session 结束等所有非正常状态。
|
||||
|
||||
Returns:
|
||||
True = 非正常状态(skip ticker)
|
||||
False = 正常(不 skip)或超时兜底放行
|
||||
"""
|
||||
if not session_file:
|
||||
return False
|
||||
traj_path = f"{session_file}.trajectory.jsonl"
|
||||
if not os.path.exists(traj_path):
|
||||
return False
|
||||
|
||||
try:
|
||||
from datetime import datetime as _dt, timezone as _tz, timedelta
|
||||
|
||||
# 读尾部 500KB
|
||||
with open(traj_path, "rb") as f:
|
||||
f.seek(0, 2)
|
||||
size = f.tell()
|
||||
f.seek(max(0, size - 500 * 1024))
|
||||
tail = f.read().decode("utf-8", errors="replace")
|
||||
|
||||
if not tail.strip():
|
||||
return False
|
||||
|
||||
# 解析所有有效行
|
||||
events = []
|
||||
for line in tail.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
events.append(obj)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
continue
|
||||
|
||||
if not events:
|
||||
return False
|
||||
|
||||
# 按 session.started 分组找 turn
|
||||
# 每个 turn 以 session.started 开始
|
||||
turns = []
|
||||
current_turn = []
|
||||
for evt in events:
|
||||
if evt.get("type") == "session.started":
|
||||
if current_turn:
|
||||
turns.append(current_turn)
|
||||
current_turn = [evt]
|
||||
else:
|
||||
current_turn.append(evt)
|
||||
if current_turn:
|
||||
turns.append(current_turn)
|
||||
|
||||
if not turns:
|
||||
return False
|
||||
|
||||
# 检查最后一个完整 turn(包含 session.started)
|
||||
last_turn = turns[-1]
|
||||
turn_types = {evt.get("type") for evt in last_turn}
|
||||
|
||||
# 有 prompt.submitted 或 prompt.skipped → 正常 turn
|
||||
if "prompt.submitted" in turn_types or "prompt.skipped" in turn_types:
|
||||
return False
|
||||
|
||||
# 非正常状态 → 检查超时兜底
|
||||
# 找最后一个有 ts 的事件
|
||||
last_ts = None
|
||||
for evt in reversed(events):
|
||||
ts_str = evt.get("ts")
|
||||
if ts_str:
|
||||
try:
|
||||
last_ts = _dt.fromisoformat(
|
||||
ts_str.replace("Z", "+00:00"))
|
||||
if last_ts.tzinfo is None:
|
||||
last_ts = last_ts.replace(tzinfo=_tz.utc)
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
break
|
||||
|
||||
if last_ts is None:
|
||||
# 没有 ts 信息,无法判断超时 → 非正常 → skip
|
||||
return True
|
||||
|
||||
now = _dt.now(_tz.utc)
|
||||
elapsed = (now - last_ts).total_seconds()
|
||||
if elapsed > timeout_minutes * 60:
|
||||
logger.debug("Trajectory last event %.0fs ago > %dm, fallback pass",
|
||||
elapsed, timeout_minutes)
|
||||
return False # 兜底放行
|
||||
|
||||
return True # 非正常状态且未超时
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("_check_compact_in_progress_trajectory error: %s", e)
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _classify_outcome(exit_code: int, json_result: dict, stderr_text: str,
|
||||
task_status: Optional[str], stdout_text: str = "") -> dict:
|
||||
|
||||
Reference in New Issue
Block a user