feat: §17 ToolchainHandler 强约束实现(Step 1-4)
CI / lint (pull_request) Failing after 7s
CI / test (pull_request) Has been skipped
CI / notify-on-failure (pull_request) Successful in 1s

Step 1: 基础设施
- prompt_composer.py: PromptContext 新增 action_type + action_steps 字段
- spawner.py: handler 路径提取 action_type/action_steps 传入 PromptContext
- db.py: comments CHECK 约束加入 action_report

Step 2: ToolchainHandler 强化
- ToolchainContextSection: 加 steps 渲染 + action_hint(按 action_type)
- ToolchainApiSection: 改为 action_report 提交指引 + Gitea 协作指引
- ToolchainConstraintsSection: 5 条强约束 + Red Flags 防self-rationalization
- verify_completion: action_report → output → comment 三层 fallback
  - review_merged 始终通过(纯通知)
  - infrastructure_failure 始终通过(防递归)
  - 修复 LENGTH(content) → LENGTH(body) bug
- on_failure 三分路: 业务→Gitea PR comment / 系统→Gitea Issue / 基础设施→toolchain task

Step 3: toolchain_routes 改造
- 新增 _toolchain_db_path() + _send_toolchain_task()
- 所有 8 个 handler 改为 _send_toolchain_task
- _send_mail 保留但不再被 toolchain handler 调用
- _send_deploy_failure_mail → _send_deploy_failure_task

Step 4: 测试
- 29 个单元测试全部通过
- 全量 456 passed, 3 skipped, 0 failures
This commit is contained in:
cfdaily
2026-06-13 23:36:44 +08:00
parent 90f4e3284c
commit c89863a288
6 changed files with 1140 additions and 125 deletions
+513
View File
@@ -0,0 +1,513 @@
"""Unit tests for §17 ToolchainHandler 强约束实现."""
import json
import os
import sys
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
# Add project root to path
PROJECT_ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from src.daemon.prompt_composer import PromptContext, PromptComposer
from src.daemon.toolchain_handler import (
ToolchainHandler,
ToolchainContextSection,
ToolchainApiSection,
ToolchainConstraintsSection,
_ACTION_HINTS,
)
from src.daemon.base_task_handler import VerifyResult
from src.blackboard.db import init_db, get_connection
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def tmp_db():
"""Create a temporary _toolchain DB for testing."""
with tempfile.TemporaryDirectory() as d:
db_path = Path(d) / "blackboard.db"
init_db(db_path)
yield db_path
@pytest.fixture
def handler():
return ToolchainHandler()
def _insert_task(db_path, task_id, must_haves_json, status="working"):
"""Insert a task into DB for testing."""
conn = get_connection(db_path)
conn.execute(
"INSERT INTO tasks (id, title, description, assignee, assigned_by, "
"must_haves, task_type, status) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(task_id, "test", "test desc", "zhangfei-dev", "system",
must_haves_json, "toolchain", status)
)
conn.commit()
conn.close()
def _insert_comment(db_path, task_id, author, body, comment_type="general"):
"""Insert a comment into DB."""
conn = get_connection(db_path)
conn.execute(
"INSERT INTO comments (task_id, author, comment_type, body) VALUES (?, ?, ?, ?)",
(task_id, author, comment_type, body)
)
conn.commit()
conn.close()
def _insert_output(db_path, task_id, content="test output"):
"""Insert an output into DB."""
conn = get_connection(db_path)
conn.execute(
"INSERT INTO outputs (task_id, agent, output_type, title, summary) "
"VALUES (?, ?, ?, ?, ?)",
(task_id, "zhangfei-dev", "document", "test", content)
)
conn.commit()
conn.close()
# ---------------------------------------------------------------------------
# Step 1a: PromptContext new fields
# ---------------------------------------------------------------------------
class TestPromptContextFields:
def test_action_type_default(self):
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
)
assert ctx.action_type == ""
def test_action_steps_default(self):
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
)
assert ctx.action_steps == []
def test_action_type_set(self):
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
action_type="review_result",
)
assert ctx.action_type == "review_result"
def test_action_steps_set(self):
steps = ["step 1", "step 2"]
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
action_steps=steps,
)
assert ctx.action_steps == steps
# ---------------------------------------------------------------------------
# Step 2a: ToolchainContextSection steps rendering + action_hint
# ---------------------------------------------------------------------------
class TestToolchainContextSection:
def test_renders_steps(self):
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
event_type="review_result",
event_data={"pr_number": "42", "repo": "sanguo/test"},
action_type="review_result",
action_steps=["合并 PR", "提交 action report"],
)
section = ToolchainContextSection()
result = section.render(ctx)
assert "必须执行的步骤" in result
assert "1. 合并 PR" in result
assert "2. 提交 action report" in result
def test_renders_action_hint(self):
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
event_type="ci_failure",
action_type="ci_failure",
action_steps=[],
)
section = ToolchainContextSection()
result = section.render(ctx)
assert "CI 失败" in result
assert "需要你修复" in result
def test_renders_default_hint_for_unknown_action_type(self):
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
event_type="unknown",
action_type="unknown_type",
action_steps=[],
)
section = ToolchainContextSection()
result = section.render(ctx)
assert "需要你执行动作的事件" in result
def test_no_steps_no_steps_section(self):
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
event_type="review_merged",
action_type="review_merged",
action_steps=[],
)
section = ToolchainContextSection()
result = section.render(ctx)
assert "必须执行的步骤" not in result
# ---------------------------------------------------------------------------
# Step 2b: ToolchainApiSection action_report guidance
# ---------------------------------------------------------------------------
class TestToolchainApiSection:
def test_has_action_report_instruction(self):
ctx = PromptContext(
task_id="tc-123", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="zhangfei-dev",
)
section = ToolchainApiSection()
result = section.render(ctx)
assert "action_report" in result
assert "comment_type" in result
assert "tc-123" in result
def test_no_manual_done_instruction(self):
ctx = PromptContext(
task_id="tc-123", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="zhangfei-dev",
)
section = ToolchainApiSection()
result = section.render(ctx)
# Should NOT contain the old "标记为 done" instruction
assert "标记为 **done**" not in result
assert '"status": "done"' not in result
def test_has_outputs_instruction(self):
ctx = PromptContext(
task_id="tc-123", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="zhangfei-dev",
)
section = ToolchainApiSection()
result = section.render(ctx)
assert "outputs" in result
def test_has_gitea_collaboration_instruction(self):
ctx = PromptContext(
task_id="tc-123", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="zhangfei-dev",
)
section = ToolchainApiSection()
result = section.render(ctx)
assert "Gitea" in result
assert "Mail API" in result
# ---------------------------------------------------------------------------
# Step 2c: ToolchainConstraintsSection Red Flags
# ---------------------------------------------------------------------------
class TestToolchainConstraintsSection:
def test_has_red_flags_table(self):
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
)
section = ToolchainConstraintsSection()
result = section.render(ctx)
assert "Red Flags" in result
assert "" in result
def test_has_all_5_constraints(self):
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
)
section = ToolchainConstraintsSection()
result = section.render(ctx)
assert "必须按步骤执行" in result
assert "必须提交 action report" in result
assert "不要执行任何状态转换命令" in result
assert "不需要回复" in result
assert "所有协作通过 Gitea 完成" in result
def test_has_strong_language(self):
ctx = PromptContext(
task_id="t1", title="test", description="d",
must_haves="", project_id="_toolchain", agent_id="a1",
)
section = ToolchainConstraintsSection()
result = section.render(ctx)
assert "强制要求" in result
assert "不是建议" in result
# ---------------------------------------------------------------------------
# Step 2d: verify_completion tests
# ---------------------------------------------------------------------------
class TestVerifyCompletion:
def test_action_report_passes(self, handler, tmp_db):
"""action_report comment → pass"""
must_haves = json.dumps({"action_type": "review_result"})
_insert_task(tmp_db, "t1", must_haves)
_insert_comment(tmp_db, "t1", "zhangfei-dev",
"已修复 CI", comment_type="action_report")
result = handler.verify_completion("t1", tmp_db)
assert result.passed is True
assert result.reason == "has_action_report"
def test_no_action_report_fallback_output(self, handler, tmp_db):
"""No action_report but has output → pass (fallback)"""
must_haves = json.dumps({"action_type": "review_result"})
_insert_task(tmp_db, "t2", must_haves)
_insert_output(tmp_db, "t2", "review result content")
result = handler.verify_completion("t2", tmp_db)
assert result.passed is True
assert result.reason == "has_output"
def test_no_action_report_fallback_comment(self, handler, tmp_db):
"""No action_report but has substantial comment → pass (fallback)"""
must_haves = json.dumps({"action_type": "review_result"})
_insert_task(tmp_db, "t3", must_haves)
_insert_comment(tmp_db, "t3", "zhangfei-dev",
"This is a sufficiently long comment about the task.")
result = handler.verify_completion("t3", tmp_db)
assert result.passed is True
assert result.reason == "has_comment"
def test_nothing_passes(self, handler, tmp_db):
"""No action_report, no output, no comment → fail"""
must_haves = json.dumps({"action_type": "review_result"})
_insert_task(tmp_db, "t4", must_haves)
result = handler.verify_completion("t4", tmp_db)
assert result.passed is False
assert result.reason == "no_action"
def test_short_comment_fails(self, handler, tmp_db):
"""Comment < 20 chars → fail"""
must_haves = json.dumps({"action_type": "review_result"})
_insert_task(tmp_db, "t5", must_haves)
_insert_comment(tmp_db, "t5", "zhangfei-dev", "ok")
result = handler.verify_completion("t5", tmp_db)
assert result.passed is False
def test_review_merged_auto_passes(self, handler, tmp_db):
"""review_merged → always pass"""
must_haves = json.dumps({"action_type": "review_merged"})
_insert_task(tmp_db, "t6", must_haves)
result = handler.verify_completion("t6", tmp_db)
assert result.passed is True
assert result.reason == "merged_passthrough"
def test_infrastructure_failure_auto_passes(self, handler, tmp_db):
"""infrastructure_failure → always pass (anti-recursion)"""
must_haves = json.dumps({"action_type": "infrastructure_failure"})
_insert_task(tmp_db, "t7", must_haves)
result = handler.verify_completion("t7", tmp_db)
assert result.passed is True
assert result.reason == "infrastructure_passthrough"
# ---------------------------------------------------------------------------
# Step 3a: _send_toolchain_task tests
# ---------------------------------------------------------------------------
class TestSendToolchainTask:
def test_creates_task_in_toolchain_db(self):
"""_send_toolchain_task creates a task in _toolchain DB."""
from src.api.toolchain_routes import _send_toolchain_task, _toolchain_db_path
with patch("src.api.toolchain_routes.get_data_root") as mock_root:
with tempfile.TemporaryDirectory() as d:
mock_root.return_value = Path(d)
task_id = _send_toolchain_task(
to_agent="zhangfei-dev",
title="Test Task",
description="Test description",
event_type="ci_failure",
action_type="ci_failure",
steps=["Fix test", "Submit report"],
context_data={"pr_number": 42},
)
assert task_id.startswith("tc-")
# Verify task was written to _toolchain DB
db_path = _toolchain_db_path()
conn = get_connection(db_path)
row = conn.execute(
"SELECT * FROM tasks WHERE id=?", (task_id,)
).fetchone()
assert row is not None
assert row["task_type"] == "toolchain"
assert row["assignee"] == "zhangfei-dev"
# Verify must_haves JSON
meta = json.loads(row["must_haves"])
assert meta["event_type"] == "ci_failure"
assert meta["action_type"] == "ci_failure"
assert meta["steps"] == ["Fix test", "Submit report"]
assert meta["context"]["pr_number"] == 42
conn.close()
def test_unknown_agent_returns_empty(self):
"""_send_toolchain_task with unknown agent returns empty string."""
from src.api.toolchain_routes import _send_toolchain_task
task_id = _send_toolchain_task(
to_agent="unknown-agent",
title="Test",
description="desc",
event_type="test",
action_type="test",
steps=[],
)
assert task_id == ""
# ---------------------------------------------------------------------------
# Step 2e: on_failure three-way routing tests
# ---------------------------------------------------------------------------
class TestOnFailureRouting:
def test_business_failure_creates_gitea_comment(self, handler, tmp_db):
"""Business failure → Gitea PR comment"""
must_haves = json.dumps({
"action_type": "review_result",
"context": {"repo": "sanguo/test", "pr_number": 42},
"assignee": "zhangfei-dev",
})
_insert_task(tmp_db, "t-fail", must_haves)
with patch.object(handler, "_create_gitea_comment") as mock_comment:
mock_comment.return_value = True
verify = VerifyResult(False, "no_action", "no action_report")
handler.on_failure("t-fail", "zhangfei-dev", tmp_db, verify)
mock_comment.assert_called_once()
call_args = mock_comment.call_args
assert call_args[0][0] == "sanguo/test"
assert call_args[0][1] == 42
def test_infrastructure_failure_creates_task(self, handler, tmp_db):
"""Infrastructure failure → _send_toolchain_task for jiangwei-infra"""
must_haves = json.dumps({
"action_type": "review_result",
"context": {"repo": "sanguo/test", "pr_number": 42},
})
_insert_task(tmp_db, "t-infra", must_haves)
with patch.object(handler, "_create_gitea_comment") as mock_comment:
mock_comment.return_value = False # Gitea API down
with patch.object(handler, "_create_gitea_issue") as mock_issue:
mock_issue.return_value = False # Gitea API still down
with patch("src.api.toolchain_routes._send_toolchain_task") as mock_send:
mock_send.return_value = "tc-infra"
verify = VerifyResult(False, "no_action", "no action_report")
handler.on_failure("t-infra", "zhangfei-dev", tmp_db, verify)
# Should eventually try to create infrastructure_failure task
mock_send.assert_called()
call_kwargs = mock_send.call_args
assert call_kwargs[1]["action_type"] == "infrastructure_failure"
assert call_kwargs[1]["to_agent"] == "jiangwei-infra"
# ---------------------------------------------------------------------------
# Regression: _mail path unaffected
# ---------------------------------------------------------------------------
class TestMailRegression:
def test_send_mail_still_exists(self):
"""_send_mail function is preserved."""
from src.api.toolchain_routes import _send_mail
assert callable(_send_mail)
def test_send_mail_not_called_by_handlers(self):
"""No toolchain handler calls _send_mail."""
import inspect
from src.api import toolchain_routes
# Get source of handler functions
source = inspect.getsource(toolchain_routes)
# _send_mail should appear only in its own definition, not in handler bodies
lines = source.split("\n")
in_handler = False
handler_send_mail_calls = []
for i, line in enumerate(lines):
if line.strip().startswith("async def _handle_") or line.strip().startswith("async def _send_mention_mails"):
in_handler = True
elif line.strip().startswith("async def ") or line.strip().startswith("def _"):
if not line.strip().startswith("async def _handle_") and not line.strip().startswith("async def _send_mention_mails"):
in_handler = False
if in_handler and "_send_mail(" in line and not line.strip().startswith("#"):
handler_send_mail_calls.append((i, line.strip()))
assert len(handler_send_mail_calls) == 0, \
f"_send_mail still called in handlers: {handler_send_mail_calls}"
# ---------------------------------------------------------------------------
# Integration: full prompt build
# ---------------------------------------------------------------------------
class TestFullPromptBuild:
def test_prompt_contains_all_sections(self, handler):
"""Full prompt has context, API, and constraints sections."""
ctx = PromptContext(
task_id="tc-test",
title="CI 失败修复",
description="Fix CI failure",
must_haves=json.dumps({
"event_type": "ci_failure",
"action_type": "ci_failure",
"steps": ["Fix test", "Push", "Submit report"],
"context": {"pr_number": 42},
}),
project_id="_toolchain",
agent_id="zhangfei-dev",
event_type="ci_failure",
event_data={"pr_number": "42", "repo": "sanguo/test"},
action_type="ci_failure",
action_steps=["Fix test", "Push", "Submit report"],
)
prompt = handler.build_prompt(ctx)
# Must have action hint
assert "CI 失败" in prompt
assert "需要你修复" in prompt
# Must have steps
assert "必须执行的步骤" in prompt
assert "1. Fix test" in prompt
# Must have API section with action_report
assert "action_report" in prompt
assert "tc-test" in prompt
# Must have constraints with Red Flags
assert "Red Flags" in prompt
assert "强制要求" in prompt