auto-sync: 2026-06-02 23:41:57

This commit is contained in:
cfdaily
2026-06-02 23:41:57 +08:00
parent 7026e5d6a6
commit 23c653b05f
+27 -9
View File
@@ -768,6 +768,10 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
agent_id, session_id, outcome, exit_code, task_status)
if cls["should_retry"]:
# cooldown: 新增的可恢复场景(A14/A15/A16/A8/A10
cooldown_seconds = cls.get("cooldown_seconds", 0)
if cooldown_seconds and self.counter:
self.counter.set_cooldown(agent_id, seconds=cooldown_seconds)
# A2/A3: gateway_timeout → 续杯(on_complete 会 release counter)
await self._do_retry(
session_id, agent_id, task_id, on_complete, db_path,
@@ -828,7 +832,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
agent_id, session_id, task_id, fallback_count,
self.max_retries, json_result.get("fallback_reason"))
if self.counter:
self.counter.set_cooldown(agent_id, seconds=30)
self.counter.set_cooldown(agent_id, seconds=60)
await self._do_retry(
session_id, agent_id, task_id, on_complete, db_path,
"fallback_retry_count" # 独立计数,不与 gateway_timeout 的 retry_count 共用
@@ -839,7 +843,7 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
# A10(compact_failed), A12(agent_error)
# v2.8.1 Fix-3a: crash 类 outcome 设 cooldown,给 agent session 恢复时间
if outcome in ("crashed", "compact_failed", "process_crash", "session_stuck",
"compact_hanging", "agent_error") and self.counter:
"compact_hanging", "agent_error", "compact_interrupted") and self.counter:
self.counter.set_cooldown(agent_id, seconds=300) # 5 分钟
logger.info("Crash/error cooldown set for %s: 300s (outcome=%s)", agent_id, outcome)
# 注意: cooldown 期间任务状态仍为 working,但 counter 已释放。
@@ -1286,8 +1290,8 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
task_status: Optional[str], stdout_text: str = "") -> dict:
"""分类退出原因,返回处理策略
v3.0: 基于 JSON status/summary/executionTrace 判定,不再依赖 transport 字段
只有 status="timeout" 触发 retry,其他都不 retry
v3.1: A0 拆分为 A14-A17(信号中断/stderr 智能分类)
A8/A10 改为可恢复 retry。cooldown 统一 60s
"""
status = json_result.get("status")
summary = json_result.get("summary", "")
@@ -1312,10 +1316,22 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
return {"outcome": "gateway_timeout", "should_retry": True,
"retry_field": "retry_count"}
# A0: stdout 为空且 exit≠0 = 进程异常终止
# 注意:exit=0 + stdout 为空可能是正常完成(--json 没输出),
# 此时 task_status 如果是 done/review 会被上面的 A4 兜住
# A0 拆分: 无 JSON 输出 + exit≠0
if status is None and not stdout_text.strip() and exit_code != 0:
# A14: SIGINT(130) / SIGTERM(143) → 外部中断,可恢复
if exit_code in (130, 143):
return {"outcome": "interrupted", "should_retry": True,
"retry_field": "retry_count", "cooldown_seconds": 60}
# A15/A16: stderr 含 network/compact 关键字 → 可恢复
if stderr_text:
stderr_lower = stderr_text.lower()
if any(kw in stderr_lower for kw in ["econnrefused", "etimedout", "gateway closed", "econnreset"]):
return {"outcome": "gateway_unreachable", "should_retry": True,
"retry_field": "retry_count", "cooldown_seconds": 60}
if any(kw in stderr_lower for kw in ["compaction-diag", "context-overflow"]):
return {"outcome": "compact_interrupted", "should_retry": True,
"retry_field": "retry_count", "cooldown_seconds": 60}
# A17: 真正的 crash → 保持 workingticker 兜底
return {"outcome": "crashed", "should_retry": False, "original": "process_crash"}
# stdout 为空但 exit=0:可能是正常完成但 --json 没输出
@@ -1332,13 +1348,15 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
if any(kw in stderr_lower for kw in ["401", "403", "unauthorized", "auth"]):
return {"outcome": "auth_failed", "should_retry": False}
if any(kw in stderr_lower for kw in ["econnrefused", "etimedout", "gateway closed", "econnreset"]):
return {"outcome": "gateway_unreachable", "should_retry": False}
return {"outcome": "gateway_unreachable", "should_retry": True,
"retry_field": "retry_count", "cooldown_seconds": 60}
if any(kw in stderr_lower for kw in ["rate_limit", "500", "503", "api error"]):
return {"outcome": "api_error", "should_retry": False}
if any(kw in stderr_lower for kw in ["compaction-diag", "context-overflow"]):
return {"outcome": "compact_failed", "should_retry": False}
if any(kw in stderr_lower for kw in ["lock", "busy", "concurrent", "lane task error"]):
return {"outcome": "lock_conflict", "should_retry": False}
return {"outcome": "lock_conflict", "should_retry": True,
"retry_field": "retry_count", "cooldown_seconds": 60}
return {"outcome": "agent_error", "should_retry": False}
# 兜底:status 未知值