From 0be4c30fa75b4169f87233a80d99ece6406e312e Mon Sep 17 00:00:00 2001 From: cfdaily Date: Tue, 26 May 2026 13:47:50 +0800 Subject: [PATCH] auto-sync: 2026-05-26 13:47:50 --- src/daemon/spawner.py | 90 ++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 57 deletions(-) diff --git a/src/daemon/spawner.py b/src/daemon/spawner.py index 60aec0d..1b3e8f5 100644 --- a/src/daemon/spawner.py +++ b/src/daemon/spawner.py @@ -1017,80 +1017,56 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_ return result @staticmethod - def _classify_outcome(exit_code: int, meta: dict, stderr_text: str, + @staticmethod + def _classify_outcome(exit_code: int, json_result: dict, stderr_text: str, task_status: Optional[str], stdout_text: str = "") -> dict: """分类退出原因,返回处理策略 - v2.7.2: 去掉 release_counter 字段。进程退出 = release counter(由 wrapped_on_complete 保证)。 - 只有 A2/A3(gateway_timeout)触发 retry,其他都不 retry。 + v3.0: 基于 JSON status/summary/executionTrace 判定,不再依赖 transport 字段。 + 只有 status="timeout" 触发 retry,其他都不 retry。 """ - transport = meta.get("transport", "") - fallback_reason = meta.get("fallbackReason") + status = json_result.get("status") + summary = json_result.get("summary", "") + fallback_used = json_result.get("fallback_used", False) - # 终态判断 - terminal_statuses = {"done", "review", "failed", "cancelled"} - is_terminal = task_status in terminal_statuses + # stdout 为空 = 进程异常终止 + if status is None and not stdout_text.strip(): + return {"outcome": "process_crash", "should_retry": False} - # A4: 任务自己 failed + # A4: 任务 DB status=failed(Agent 自己标的) if task_status == "failed": return {"outcome": "agent_failed", "should_retry": False} - # A1: 正常完成 - if exit_code == 0 and transport != "embedded" and is_terminal: + # A1: status=ok + completed + 非 fallback + if status == "ok" and summary == "completed" and not fallback_used: return {"outcome": "completed", "should_retry": False} - # A5/A6: fallback(不应出现 — 出现说明 counter 检查失效) - if exit_code == 0 and transport == "embedded": - if is_terminal: - return {"outcome": "fallback_timeout", "should_retry": False} - # fallback 完成但任务没 done → 不 retry,等 _handle_exit 特殊处理 + # A5/A6: status=ok + fallback + if status == "ok" and fallback_used: return {"outcome": "fallback_timeout", "should_retry": False} - # A2/A3: Gateway timeout(任务没完成)— 唯一续杯场景 - # openclaw agent 正常退出一定会输出 JSON。stdout 为空 = 进程被异常终止(kill/崩溃) - # stdout 为空时不应续杯,应等 ticker - if exit_code == 0 and not is_terminal: - stdout_is_empty = not stdout_text or not stdout_text.strip() - if stdout_is_empty: - # 进程异常终止(没有正常输出 JSON),不续杯 - return {"outcome": "agent_error", "should_retry": False} - # P2: transport=null 时检查 stderr 辅助判断 - if not transport: - stderr_lower = stderr_text.lower() - if any(kw in stderr_lower for kw in ["lock", "busy", "concurrent", "lane task error"]): - return {"outcome": "lock_conflict", "should_retry": False} - if any(kw in stderr_lower for kw in ["compaction-diag", "context-overflow"]): - return {"outcome": "compact_failed", "should_retry": False} - if any(kw in stderr_lower for kw in ["rate_limit", "500", "503"]): - return {"outcome": "api_error", "should_retry": False} + # A2/A3: status=timeout → 唯一续杯场景 + if status == "timeout": return {"outcome": "gateway_timeout", "should_retry": True, "retry_field": "retry_count"} - # A7: 认证失败 - if exit_code != 0 and any(kw in stderr_text for kw in ["401", "403", "unauthorized", "auth"]): - return {"outcome": "auth_failed", "should_retry": False} + # A7-A12: status=error → 不续杯,stderr 辅助分类 + if status == "error": + stderr_lower = stderr_text.lower() + if any(kw in stderr_lower for kw in ["401", "403", "unauthorized", "auth"]): + return {"outcome": "auth_failed", "should_retry": False} + if any(kw in stderr_lower for kw in ["econnrefused", "etimedout", "gateway closed", "econnreset"]): + return {"outcome": "gateway_unreachable", "should_retry": False} + if any(kw in stderr_lower for kw in ["rate_limit", "500", "503", "api error"]): + return {"outcome": "api_error", "should_retry": False} + if any(kw in stderr_lower for kw in ["compaction-diag", "context-overflow"]): + return {"outcome": "compact_failed", "should_retry": False} + if any(kw in stderr_lower for kw in ["lock", "busy", "concurrent", "lane task error"]): + return {"outcome": "lock_conflict", "should_retry": False} + return {"outcome": "agent_error", "should_retry": False} - # A8: Gateway 不可达 - if exit_code != 0 and any(kw in stderr_text for kw in ["ECONNREFUSED", "ETIMEDOUT", "gateway closed", "ECONNRESET"]): - return {"outcome": "gateway_unreachable", "should_retry": False, - "count_field": "connect_retry_count"} - - # A9: API 错误(429 等)— 不 retry,_handle_exit 推回 pending + 冷却 - if exit_code != 0 and any(kw in stderr_text for kw in ["rate_limit", "500", "503", "API error"]): - return {"outcome": "api_error", "should_retry": False, - "count_field": "api_retry_count"} - - # A10: compact 失败 — 不 retry,等 ticker 重新调度 - if exit_code != 0 and any(kw in stderr_text for kw in ["compaction-diag", "context-overflow", "timeout-compaction"]): - return {"outcome": "compact_failed", "should_retry": False} - - # A11: Lock 冲突 - if exit_code != 0 and any(kw in stderr_text for kw in ["lock", "busy", "concurrent", "lane task error"]): - return {"outcome": "lock_conflict", "should_retry": False, - "count_field": "lock_retry_count"} - - # A12: 其他 — 不 retry - return {"outcome": "agent_error", "should_retry": False} + # 兜底:status 未知值 + return {"outcome": "unknown_status", "should_retry": False} @staticmethod def _get_retry_counts(db_path: Optional[Path], task_id: Optional[str]) -> dict: