diff --git a/src/daemon/spawner.py b/src/daemon/spawner.py index 29306e0..cc7b894 100644 --- a/src/daemon/spawner.py +++ b/src/daemon/spawner.py @@ -926,7 +926,11 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_ @staticmethod def _classify_outcome(exit_code: int, meta: dict, stderr_text: str, task_status: Optional[str]) -> dict: - """分类退出原因,返回处理策略""" + """分类退出原因,返回处理策略 + + v2.7.2: 去掉 release_counter 字段。进程退出 = release counter(由 wrapped_on_complete 保证)。 + 只有 A2/A3(gateway_timeout)触发 retry,其他都不 retry。 + """ transport = meta.get("transport", "") fallback_reason = meta.get("fallbackReason") @@ -936,59 +940,49 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_ # A4: 任务自己 failed if task_status == "failed": - return {"outcome": "agent_failed", "release_counter": True, - "should_retry": False} + return {"outcome": "agent_failed", "should_retry": False} # A1: 正常完成 if exit_code == 0 and transport != "embedded" and is_terminal: - return {"outcome": "completed", "release_counter": True, - "should_retry": False} + return {"outcome": "completed", "should_retry": False} - # A5/A6: fallback + # A5/A6: fallback(不应出现 — 出现说明 counter 检查失效) if exit_code == 0 and transport == "embedded": if is_terminal: - return {"outcome": "fallback_timeout", "release_counter": True, - "should_retry": False} - # fallback 完成但任务没 done → 续杯 - return {"outcome": "fallback_timeout", "release_counter": False, - "should_retry": True, "retry_field": "retry_count"} + return {"outcome": "fallback_timeout", "should_retry": False} + # fallback 完成但任务没 done → 不 retry,等 _handle_exit 特殊处理 + return {"outcome": "fallback_timeout", "should_retry": False} - # A2/A3: Gateway timeout(任务没完成) + # A2/A3: Gateway timeout(任务没完成)— 唯一续杯场景 if exit_code == 0 and not is_terminal: - return {"outcome": "gateway_timeout", "release_counter": False, - "should_retry": True, "retry_field": "retry_count"} + return {"outcome": "gateway_timeout", "should_retry": True, + "retry_field": "retry_count"} # A7: 认证失败 if exit_code != 0 and any(kw in stderr_text for kw in ["401", "403", "unauthorized", "auth"]): - return {"outcome": "auth_failed", "release_counter": True, - "should_retry": False} + return {"outcome": "auth_failed", "should_retry": False} # A8: Gateway 不可达 if exit_code != 0 and any(kw in stderr_text for kw in ["ECONNREFUSED", "ETIMEDOUT", "gateway closed", "ECONNRESET"]): - return {"outcome": "gateway_unreachable", "release_counter": True, - "should_retry": False, # 让 ticker 自然重试 + return {"outcome": "gateway_unreachable", "should_retry": False, "count_field": "connect_retry_count"} - # A9: API 错误 + # A9: API 错误(429 等)— 不 retry,_handle_exit 推回 pending + 冷却 if exit_code != 0 and any(kw in stderr_text for kw in ["rate_limit", "500", "503", "API error"]): - return {"outcome": "api_error", "release_counter": True, - "should_retry": False, + return {"outcome": "api_error", "should_retry": False, "count_field": "api_retry_count"} - # A10: compact 失败 + # A10: compact 失败 — 不 retry,等 ticker 重新调度 if exit_code != 0 and any(kw in stderr_text for kw in ["compaction-diag", "context-overflow", "timeout-compaction"]): - return {"outcome": "compact_failed", "release_counter": False, - "should_retry": True, "retry_field": "retry_count"} + return {"outcome": "compact_failed", "should_retry": False} # A11: Lock 冲突 if exit_code != 0 and any(kw in stderr_text for kw in ["lock", "busy", "concurrent", "lane task error"]): - return {"outcome": "lock_conflict", "release_counter": True, - "should_retry": False, + return {"outcome": "lock_conflict", "should_retry": False, "count_field": "lock_retry_count"} - # A12: 其他 - return {"outcome": "agent_error", "release_counter": False, - "should_retry": True, "retry_field": "retry_count"} + # A12: 其他 — 不 retry + return {"outcome": "agent_error", "should_retry": False} @staticmethod def _get_retry_counts(db_path: Optional[Path], task_id: Optional[str]) -> dict: