auto-sync: 2026-06-02 19:10:31

This commit is contained in:
cfdaily
2026-06-02 19:10:31 +08:00
parent cbdb8650a5
commit f83de496e3
+52 -17
View File
@@ -775,29 +775,64 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_
)
elif outcome == "api_error":
# A9: 429/API 错误 → release counter(on_complete)+ 推回 pending + 冷却
# 有上限:api_retry_count 累计达 max_retries 则标 failed
await self._do_on_complete_async(on_complete, agent_id, outcome)
if self.counter:
self.counter.set_cooldown(agent_id)
if db_path and task_id:
self._mark_task(db_path, task_id, "pending", {
"reason": "api_error_retry",
})
logger.info("Task %s pushed back to pending (api_error)", task_id)
retry_counts = self._get_retry_counts(db_path, task_id)
api_count = retry_counts.get("api_retry_count", 0) + 1
retry_counts["api_retry_count"] = api_count
self._update_retry_counts(db_path, task_id, retry_counts)
if api_count >= self.max_retries:
logger.error("Task %s api_retry_count=%d >= max_retries, marking failed",
task_id, api_count)
self._mark_task(db_path, task_id, "failed", {
"reason": "max_api_retry_count", "count": api_count,
})
else:
self._mark_task(db_path, task_id, "pending", {
"reason": "api_error_retry",
"api_retry_count": api_count,
})
logger.info("Task %s pushed back to pending (api_error, api_retry=%d/%d)",
task_id, api_count, self.max_retries)
elif outcome == "fallback_timeout" and not cls["should_retry"]:
# A5/A6: fallback 不应出现,标 failed + escalate + context 日志
logger.error("UNEXPECTED FALLBACK: agent=%s session=%s task=%s "
"fallback_used=%s fallback_reason=%s counter_active=%s "
"This indicates counter check failed to prevent concurrent spawn.",
agent_id, session_id, task_id,
json_result.get("fallback_used"), json_result.get("fallback_reason"),
self.counter.active_agents if self.counter else "N/A")
await self._do_on_complete_async(on_complete, agent_id, outcome)
# A3/A3b: fallback 分级处理
# fallback_count 从 task_attempts.metadata 读取,
# 达 max_retries 标 failed(A3),否则 retry + cooldown(A3b)
fallback_count = 0
if db_path and task_id:
self._mark_task(db_path, task_id, "failed", {
"reason": "unexpected_fallback",
"status": json_result.get("status"),
"fallback_reason": json_result.get("fallback_reason"),
})
retry_counts = self._get_retry_counts(db_path, task_id)
fallback_count = retry_counts.get("fallback_count", 0) + 1
retry_counts["fallback_count"] = fallback_count
self._update_retry_counts(db_path, task_id, retry_counts)
if fallback_count >= self.max_retries:
# A3: 连续 fallback 达上限,标 failed
logger.error("A3 fallback exhausted: agent=%s session=%s task=%s "
"fallback_count=%d reason=%s",
agent_id, session_id, task_id, fallback_count,
json_result.get("fallback_reason"))
await self._do_on_complete_async(on_complete, agent_id, outcome)
if db_path and task_id:
self._mark_task(db_path, task_id, "failed", {
"reason": "fallback_exhausted",
"fallback_count": fallback_count,
"fallback_reason": json_result.get("fallback_reason"),
})
else:
# A3b: fallback 未达上限,retry + cooldown
logger.warning("A3b fallback retry: agent=%s session=%s task=%s "
"fallback_count=%d/%d reason=%s",
agent_id, session_id, task_id, fallback_count,
self.max_retries, json_result.get("fallback_reason"))
if self.counter:
self.counter.set_cooldown(agent_id, seconds=30)
await self._do_retry(
session_id, agent_id, task_id, on_complete, db_path,
"retry_count"
)
else:
# 其他:A1(completed), A4(agent_failed), A7(auth_failed),
# A8(gateway_unreachable), A11(lock_conflict),