auto-sync: 2026-05-26 11:47:58

This commit is contained in:
cfdaily
2026-05-26 11:47:58 +08:00
parent f29a194c9c
commit bb9204af21
+31 -6
View File
@@ -659,13 +659,38 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
# 检查 session 状态
state = self._check_session_state(agent_id)
# B1: 假死
# B1: 假死 — 先复活,连续假死 ≥2 次再 failed
if state.get("status") == "running" and not state.get("lock_pid_alive", True):
logger.error("Agent %s session stuck (session=%s, lock PID dead)",
agent_id, session_id)
self._mark_task(db_path, task_id, "failed",
{"reason": "session_stuck", "diagnostics": state})
await self._do_on_complete_async(on_complete, agent_id, "session_stuck")
# 假死计数
stuck_count = self._stuck_counts.get(task_id, 0) + 1
self._stuck_counts[task_id] = stuck_count
if stuck_count >= 2:
# 连续假死 ≥2 次,标 failed
logger.error("Agent %s session stuck %d times (session=%s, lock PID dead)",
agent_id, stuck_count, session_id)
self._mark_task(db_path, task_id, "failed",
{"reason": "session_stuck", "stuck_count": stuck_count,
"diagnostics": state})
await self._do_on_complete_async(on_complete, agent_id, "session_stuck")
return
# 第 1 次假死 → 尝试复活
logger.warning("Agent %s session stuck (attempt %d), reviving (session=%s)",
agent_id, stuck_count, session_id)
revived = self._revive_session(agent_id)
if revived:
logger.info("Agent %s session revived, releasing counter for ticker re-dispatch",
agent_id)
# release counter → 任务保持 working → ticker 下次 re-dispatch
await self._do_on_complete_async(on_complete, agent_id, "session_revived")
else:
# 复活失败 → 标 failed
logger.error("Agent %s revive failed, marking failed", agent_id)
self._mark_task(db_path, task_id, "failed",
{"reason": "revive_failed", "stuck_count": stuck_count,
"diagnostics": state})
await self._do_on_complete_async(on_complete, agent_id, "revive_failed")
return
# B2/B3/B4: 进程还活着