diff --git a/src/daemon/spawner.py b/src/daemon/spawner.py index 9b29373..b867c05 100644 --- a/src/daemon/spawner.py +++ b/src/daemon/spawner.py @@ -873,13 +873,11 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_ compact_wait_count = self._compact_waits.get(task_id, 0) + 1 self._compact_waits[task_id] = compact_wait_count if compact_wait_count >= self.max_monitor_timeouts: - logger.error("Agent %s max compact waits reached (session=%s, count=%d)", - agent_id, session_id, compact_wait_count) - self._mark_task(db_path, task_id, "failed", { - "reason": "compact_hanging", - "compact_wait_count": compact_wait_count, - "diagnostics": state, - }) + # #07.3 ACT-2: compact_hanging 不标 failed,只 release counter + # 进程还活着但不 monitor,等 ticker _check_timeouts 超时回收 → 重新 dispatch + logger.warning("Agent %s compact hanging after %d waits, releasing counter for ticker re-dispatch", + agent_id, compact_wait_count) + self._compact_waits.pop(task_id, None) await self._do_on_complete_async(on_complete, agent_id, "compact_hanging") return # 继续等 @@ -1030,10 +1028,12 @@ curl -X POST http://{api_host}:{api_port}/api/projects/{project_id}/tasks/{task_ task_db_path=db_path, skip_counter=True, # Bug-4 fix: counter 已在原始 spawn 中持有 ) - except AgentBusyError: - # agent 被其他任务占用(不应发生,但防御) - logger.warning("Retry spawn skipped: %s busy (unexpected)", agent_id) - await self._do_on_complete_async(on_complete, agent_id, "retry_agent_busy") + except AgentBusyError as e: + # #07.3 ACT-3: session busy(compact/lock/running)= 暂时性阻塞 + # release counter → 任务保持 working → ticker 重新 dispatch + logger.warning("Retry spawn deferred: %s session busy (%s), releasing counter for ticker re-dispatch", + agent_id, e.reason) + await self._do_on_complete_async(on_complete, agent_id, "retry_session_busy") except Exception: logger.exception("Retry spawn failed for %s", agent_id) await self._do_on_complete_async(on_complete, agent_id, "retry_spawn_failed") diff --git a/src/daemon/ticker.py b/src/daemon/ticker.py index ebc21e4..582e2f0 100644 --- a/src/daemon/ticker.py +++ b/src/daemon/ticker.py @@ -1281,7 +1281,8 @@ Parent Task ID: {parent_task.id} logger.error("Task %s: executor crash limit (3/30m), marking failed", task.id) continue - start_time_str = task.started_at or task.claimed_at + # #07.3 ACT-1: updated_at fallback 覆盖 mail auto-working(无 started_at/claimed_at) + start_time_str = task.started_at or task.claimed_at or task.updated_at if not start_time_str: continue try: