auto-sync: 2026-06-02 23:41:57
This commit is contained in:
+27
-9
@@ -768,6 +768,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
agent_id, session_id, outcome, exit_code, task_status)
|
||||
|
||||
if cls["should_retry"]:
|
||||
# cooldown: 新增的可恢复场景(A14/A15/A16/A8/A10)
|
||||
cooldown_seconds = cls.get("cooldown_seconds", 0)
|
||||
if cooldown_seconds and self.counter:
|
||||
self.counter.set_cooldown(agent_id, seconds=cooldown_seconds)
|
||||
# A2/A3: gateway_timeout → 续杯(on_complete 会 release counter)
|
||||
await self._do_retry(
|
||||
session_id, agent_id, task_id, on_complete, db_path,
|
||||
@@ -828,7 +832,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
agent_id, session_id, task_id, fallback_count,
|
||||
self.max_retries, json_result.get("fallback_reason"))
|
||||
if self.counter:
|
||||
self.counter.set_cooldown(agent_id, seconds=30)
|
||||
self.counter.set_cooldown(agent_id, seconds=60)
|
||||
await self._do_retry(
|
||||
session_id, agent_id, task_id, on_complete, db_path,
|
||||
"fallback_retry_count" # 独立计数,不与 gateway_timeout 的 retry_count 共用
|
||||
@@ -839,7 +843,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
# A10(compact_failed), A12(agent_error)
|
||||
# v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间
|
||||
if outcome in ("crashed", "compact_failed", "process_crash", "session_stuck",
|
||||
"compact_hanging", "agent_error") and self.counter:
|
||||
"compact_hanging", "agent_error", "compact_interrupted") and self.counter:
|
||||
self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟
|
||||
logger.info("Crash/error cooldown set for %s: 300s (outcome=%s)", agent_id, outcome)
|
||||
# 注意: cooldown 期间任务状态仍为 working,但 counter 已释放。
|
||||
@@ -1286,8 +1290,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
task_status: Optional[str], stdout_text: str = "") -> dict:
|
||||
"""分类退出原因,返回处理策略
|
||||
|
||||
v3.0: 基于 JSON status/summary/executionTrace 判定,不再依赖 transport 字段。
|
||||
只有 status="timeout" 触发 retry,其他都不 retry。
|
||||
v3.1: A0 拆分为 A14-A17(信号中断/stderr 智能分类)。
|
||||
A8/A10 改为可恢复 retry。cooldown 统一 60s。
|
||||
"""
|
||||
status = json_result.get("status")
|
||||
summary = json_result.get("summary", "")
|
||||
@@ -1312,10 +1316,22 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
return {"outcome": "gateway_timeout", "should_retry": True,
|
||||
"retry_field": "retry_count"}
|
||||
|
||||
# A0: stdout 为空且 exit≠0 = 进程异常终止
|
||||
# 注意:exit=0 + stdout 为空可能是正常完成(--json 没输出),
|
||||
# 此时 task_status 如果是 done/review 会被上面的 A4 兜住
|
||||
# A0 拆分: 无 JSON 输出 + exit≠0
|
||||
if status is None and not stdout_text.strip() and exit_code != 0:
|
||||
# A14: SIGINT(130) / SIGTERM(143) → 外部中断,可恢复
|
||||
if exit_code in (130, 143):
|
||||
return {"outcome": "interrupted", "should_retry": True,
|
||||
"retry_field": "retry_count", "cooldown_seconds": 60}
|
||||
# A15/A16: stderr 含 network/compact 关键字 → 可恢复
|
||||
if stderr_text:
|
||||
stderr_lower = stderr_text.lower()
|
||||
if any(kw in stderr_lower for kw in ["econnrefused", "etimedout", "gateway closed", "econnreset"]):
|
||||
return {"outcome": "gateway_unreachable", "should_retry": True,
|
||||
"retry_field": "retry_count", "cooldown_seconds": 60}
|
||||
if any(kw in stderr_lower for kw in ["compaction-diag", "context-overflow"]):
|
||||
return {"outcome": "compact_interrupted", "should_retry": True,
|
||||
"retry_field": "retry_count", "cooldown_seconds": 60}
|
||||
# A17: 真正的 crash → 保持 working,ticker 兜底
|
||||
return {"outcome": "crashed", "should_retry": False, "original": "process_crash"}
|
||||
|
||||
# stdout 为空但 exit=0:可能是正常完成但 --json 没输出
|
||||
@@ -1332,13 +1348,15 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
|
||||
if any(kw in stderr_lower for kw in ["401", "403", "unauthorized", "auth"]):
|
||||
return {"outcome": "auth_failed", "should_retry": False}
|
||||
if any(kw in stderr_lower for kw in ["econnrefused", "etimedout", "gateway closed", "econnreset"]):
|
||||
return {"outcome": "gateway_unreachable", "should_retry": False}
|
||||
return {"outcome": "gateway_unreachable", "should_retry": True,
|
||||
"retry_field": "retry_count", "cooldown_seconds": 60}
|
||||
if any(kw in stderr_lower for kw in ["rate_limit", "500", "503", "api error"]):
|
||||
return {"outcome": "api_error", "should_retry": False}
|
||||
if any(kw in stderr_lower for kw in ["compaction-diag", "context-overflow"]):
|
||||
return {"outcome": "compact_failed", "should_retry": False}
|
||||
if any(kw in stderr_lower for kw in ["lock", "busy", "concurrent", "lane task error"]):
|
||||
return {"outcome": "lock_conflict", "should_retry": False}
|
||||
return {"outcome": "lock_conflict", "should_retry": True,
|
||||
"retry_field": "retry_count", "cooldown_seconds": 60}
|
||||
return {"outcome": "agent_error", "should_retry": False}
|
||||
|
||||
# 兜底:status 未知值
|
||||
|
||||
Reference in New Issue
Block a user