feat: §17 ToolchainHandler 强约束实现（Step 1-4）

Step 1: 基础设施 - prompt_composer.py: PromptContext 新增 action_type + action_steps 字段 - spawner.py: handler 路径提取 action_type/action_steps 传入 PromptContext - db.py: comments CHECK 约束加入 action_report Step 2: ToolchainHandler 强化 - ToolchainContextSection: 加 steps 渲染 + action_hint（按 action_type） - ToolchainApiSection: 改为 action_report 提交指引 + Gitea 协作指引 - ToolchainConstraintsSection: 5 条强约束 + Red Flags 防self-rationalization - verify_completion: action_report → output → comment 三层 fallback - review_merged 始终通过（纯通知） - infrastructure_failure 始终通过（防递归） - 修复 LENGTH(content) → LENGTH(body) bug - on_failure 三分路: 业务→Gitea PR comment / 系统→Gitea Issue / 基础设施→toolchain task Step 3: toolchain_routes 改造 - 新增 _toolchain_db_path() + _send_toolchain_task() - 所有 8 个 handler 改为 _send_toolchain_task - _send_mail 保留但不再被 toolchain handler 调用 - _send_deploy_failure_mail → _send_deploy_failure_task Step 4: 测试 - 29 个单元测试全部通过 - 全量 456 passed, 3 skipped, 0 failures
2026-06-13 23:36:44 +08:00
parent 90f4e3284c
commit c89863a288
6 changed files with 1140 additions and 125 deletions
@@ -0,0 +1,513 @@
+"""Unit tests for §17 ToolchainHandler 强约束实现."""
+import json
+import os
+import sys
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+# Add project root to path
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+from src.daemon.prompt_composer import PromptContext, PromptComposer
+from src.daemon.toolchain_handler import (
+    ToolchainHandler,
+    ToolchainContextSection,
+    ToolchainApiSection,
+    ToolchainConstraintsSection,
+    _ACTION_HINTS,
+)
+from src.daemon.base_task_handler import VerifyResult
+from src.blackboard.db import init_db, get_connection
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def tmp_db():
+    """Create a temporary _toolchain DB for testing."""
+    with tempfile.TemporaryDirectory() as d:
+        db_path = Path(d) / "blackboard.db"
+        init_db(db_path)
+        yield db_path
+
+
+@pytest.fixture
+def handler():
+    return ToolchainHandler()
+
+
+def _insert_task(db_path, task_id, must_haves_json, status="working"):
+    """Insert a task into DB for testing."""
+    conn = get_connection(db_path)
+    conn.execute(
+        "INSERT INTO tasks (id, title, description, assignee, assigned_by, "
+        "must_haves, task_type, status) "
+        "VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
+        (task_id, "test", "test desc", "zhangfei-dev", "system",
+         must_haves_json, "toolchain", status)
+    )
+    conn.commit()
+    conn.close()
+
+
+def _insert_comment(db_path, task_id, author, body, comment_type="general"):
+    """Insert a comment into DB."""
+    conn = get_connection(db_path)
+    conn.execute(
+        "INSERT INTO comments (task_id, author, comment_type, body) VALUES (?, ?, ?, ?)",
+        (task_id, author, comment_type, body)
+    )
+    conn.commit()
+    conn.close()
+
+
+def _insert_output(db_path, task_id, content="test output"):
+    """Insert an output into DB."""
+    conn = get_connection(db_path)
+    conn.execute(
+        "INSERT INTO outputs (task_id, agent, output_type, title, summary) "
+        "VALUES (?, ?, ?, ?, ?)",
+        (task_id, "zhangfei-dev", "document", "test", content)
+    )
+    conn.commit()
+    conn.close()
+
+
+# ---------------------------------------------------------------------------
+# Step 1a: PromptContext new fields
+# ---------------------------------------------------------------------------
+
+class TestPromptContextFields:
+    def test_action_type_default(self):
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+        )
+        assert ctx.action_type == ""
+
+    def test_action_steps_default(self):
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+        )
+        assert ctx.action_steps == []
+
+    def test_action_type_set(self):
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+            action_type="review_result",
+        )
+        assert ctx.action_type == "review_result"
+
+    def test_action_steps_set(self):
+        steps = ["step 1", "step 2"]
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+            action_steps=steps,
+        )
+        assert ctx.action_steps == steps
+
+
+# ---------------------------------------------------------------------------
+# Step 2a: ToolchainContextSection steps rendering + action_hint
+# ---------------------------------------------------------------------------
+
+class TestToolchainContextSection:
+    def test_renders_steps(self):
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+            event_type="review_result",
+            event_data={"pr_number": "42", "repo": "sanguo/test"},
+            action_type="review_result",
+            action_steps=["合并 PR", "提交 action report"],
+        )
+        section = ToolchainContextSection()
+        result = section.render(ctx)
+        assert "必须执行的步骤" in result
+        assert "1. 合并 PR" in result
+        assert "2. 提交 action report" in result
+
+    def test_renders_action_hint(self):
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+            event_type="ci_failure",
+            action_type="ci_failure",
+            action_steps=[],
+        )
+        section = ToolchainContextSection()
+        result = section.render(ctx)
+        assert "CI 失败" in result
+        assert "需要你修复" in result
+
+    def test_renders_default_hint_for_unknown_action_type(self):
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+            event_type="unknown",
+            action_type="unknown_type",
+            action_steps=[],
+        )
+        section = ToolchainContextSection()
+        result = section.render(ctx)
+        assert "需要你执行动作的事件" in result
+
+    def test_no_steps_no_steps_section(self):
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+            event_type="review_merged",
+            action_type="review_merged",
+            action_steps=[],
+        )
+        section = ToolchainContextSection()
+        result = section.render(ctx)
+        assert "必须执行的步骤" not in result
+
+
+# ---------------------------------------------------------------------------
+# Step 2b: ToolchainApiSection action_report guidance
+# ---------------------------------------------------------------------------
+
+class TestToolchainApiSection:
+    def test_has_action_report_instruction(self):
+        ctx = PromptContext(
+            task_id="tc-123", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="zhangfei-dev",
+        )
+        section = ToolchainApiSection()
+        result = section.render(ctx)
+        assert "action_report" in result
+        assert "comment_type" in result
+        assert "tc-123" in result
+
+    def test_no_manual_done_instruction(self):
+        ctx = PromptContext(
+            task_id="tc-123", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="zhangfei-dev",
+        )
+        section = ToolchainApiSection()
+        result = section.render(ctx)
+        # Should NOT contain the old "标记为 done" instruction
+        assert "标记为 **done**" not in result
+        assert '"status": "done"' not in result
+
+    def test_has_outputs_instruction(self):
+        ctx = PromptContext(
+            task_id="tc-123", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="zhangfei-dev",
+        )
+        section = ToolchainApiSection()
+        result = section.render(ctx)
+        assert "outputs" in result
+
+    def test_has_gitea_collaboration_instruction(self):
+        ctx = PromptContext(
+            task_id="tc-123", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="zhangfei-dev",
+        )
+        section = ToolchainApiSection()
+        result = section.render(ctx)
+        assert "Gitea" in result
+        assert "Mail API" in result
+
+
+# ---------------------------------------------------------------------------
+# Step 2c: ToolchainConstraintsSection Red Flags
+# ---------------------------------------------------------------------------
+
+class TestToolchainConstraintsSection:
+    def test_has_red_flags_table(self):
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+        )
+        section = ToolchainConstraintsSection()
+        result = section.render(ctx)
+        assert "Red Flags" in result
+        assert "❌" in result
+
+    def test_has_all_5_constraints(self):
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+        )
+        section = ToolchainConstraintsSection()
+        result = section.render(ctx)
+        assert "必须按步骤执行" in result
+        assert "必须提交 action report" in result
+        assert "不要执行任何状态转换命令" in result
+        assert "不需要回复" in result
+        assert "所有协作通过 Gitea 完成" in result
+
+    def test_has_strong_language(self):
+        ctx = PromptContext(
+            task_id="t1", title="test", description="d",
+            must_haves="", project_id="_toolchain", agent_id="a1",
+        )
+        section = ToolchainConstraintsSection()
+        result = section.render(ctx)
+        assert "强制要求" in result
+        assert "不是建议" in result
+
+
+# ---------------------------------------------------------------------------
+# Step 2d: verify_completion tests
+# ---------------------------------------------------------------------------
+
+class TestVerifyCompletion:
+    def test_action_report_passes(self, handler, tmp_db):
+        """action_report comment → pass"""
+        must_haves = json.dumps({"action_type": "review_result"})
+        _insert_task(tmp_db, "t1", must_haves)
+        _insert_comment(tmp_db, "t1", "zhangfei-dev",
+                        "已修复 CI", comment_type="action_report")
+
+        result = handler.verify_completion("t1", tmp_db)
+        assert result.passed is True
+        assert result.reason == "has_action_report"
+
+    def test_no_action_report_fallback_output(self, handler, tmp_db):
+        """No action_report but has output → pass (fallback)"""
+        must_haves = json.dumps({"action_type": "review_result"})
+        _insert_task(tmp_db, "t2", must_haves)
+        _insert_output(tmp_db, "t2", "review result content")
+
+        result = handler.verify_completion("t2", tmp_db)
+        assert result.passed is True
+        assert result.reason == "has_output"
+
+    def test_no_action_report_fallback_comment(self, handler, tmp_db):
+        """No action_report but has substantial comment → pass (fallback)"""
+        must_haves = json.dumps({"action_type": "review_result"})
+        _insert_task(tmp_db, "t3", must_haves)
+        _insert_comment(tmp_db, "t3", "zhangfei-dev",
+                        "This is a sufficiently long comment about the task.")
+
+        result = handler.verify_completion("t3", tmp_db)
+        assert result.passed is True
+        assert result.reason == "has_comment"
+
+    def test_nothing_passes(self, handler, tmp_db):
+        """No action_report, no output, no comment → fail"""
+        must_haves = json.dumps({"action_type": "review_result"})
+        _insert_task(tmp_db, "t4", must_haves)
+
+        result = handler.verify_completion("t4", tmp_db)
+        assert result.passed is False
+        assert result.reason == "no_action"
+
+    def test_short_comment_fails(self, handler, tmp_db):
+        """Comment < 20 chars → fail"""
+        must_haves = json.dumps({"action_type": "review_result"})
+        _insert_task(tmp_db, "t5", must_haves)
+        _insert_comment(tmp_db, "t5", "zhangfei-dev", "ok")
+
+        result = handler.verify_completion("t5", tmp_db)
+        assert result.passed is False
+
+    def test_review_merged_auto_passes(self, handler, tmp_db):
+        """review_merged → always pass"""
+        must_haves = json.dumps({"action_type": "review_merged"})
+        _insert_task(tmp_db, "t6", must_haves)
+
+        result = handler.verify_completion("t6", tmp_db)
+        assert result.passed is True
+        assert result.reason == "merged_passthrough"
+
+    def test_infrastructure_failure_auto_passes(self, handler, tmp_db):
+        """infrastructure_failure → always pass (anti-recursion)"""
+        must_haves = json.dumps({"action_type": "infrastructure_failure"})
+        _insert_task(tmp_db, "t7", must_haves)
+
+        result = handler.verify_completion("t7", tmp_db)
+        assert result.passed is True
+        assert result.reason == "infrastructure_passthrough"
+
+
+# ---------------------------------------------------------------------------
+# Step 3a: _send_toolchain_task tests
+# ---------------------------------------------------------------------------
+
+class TestSendToolchainTask:
+    def test_creates_task_in_toolchain_db(self):
+        """_send_toolchain_task creates a task in _toolchain DB."""
+        from src.api.toolchain_routes import _send_toolchain_task, _toolchain_db_path
+
+        with patch("src.api.toolchain_routes.get_data_root") as mock_root:
+            with tempfile.TemporaryDirectory() as d:
+                mock_root.return_value = Path(d)
+
+                task_id = _send_toolchain_task(
+                    to_agent="zhangfei-dev",
+                    title="Test Task",
+                    description="Test description",
+                    event_type="ci_failure",
+                    action_type="ci_failure",
+                    steps=["Fix test", "Submit report"],
+                    context_data={"pr_number": 42},
+                )
+
+                assert task_id.startswith("tc-")
+
+                # Verify task was written to _toolchain DB
+                db_path = _toolchain_db_path()
+                conn = get_connection(db_path)
+                row = conn.execute(
+                    "SELECT * FROM tasks WHERE id=?", (task_id,)
+                ).fetchone()
+                assert row is not None
+                assert row["task_type"] == "toolchain"
+                assert row["assignee"] == "zhangfei-dev"
+
+                # Verify must_haves JSON
+                meta = json.loads(row["must_haves"])
+                assert meta["event_type"] == "ci_failure"
+                assert meta["action_type"] == "ci_failure"
+                assert meta["steps"] == ["Fix test", "Submit report"]
+                assert meta["context"]["pr_number"] == 42
+                conn.close()
+
+    def test_unknown_agent_returns_empty(self):
+        """_send_toolchain_task with unknown agent returns empty string."""
+        from src.api.toolchain_routes import _send_toolchain_task
+
+        task_id = _send_toolchain_task(
+            to_agent="unknown-agent",
+            title="Test",
+            description="desc",
+            event_type="test",
+            action_type="test",
+            steps=[],
+        )
+        assert task_id == ""
+
+
+# ---------------------------------------------------------------------------
+# Step 2e: on_failure three-way routing tests
+# ---------------------------------------------------------------------------
+
+class TestOnFailureRouting:
+    def test_business_failure_creates_gitea_comment(self, handler, tmp_db):
+        """Business failure → Gitea PR comment"""
+        must_haves = json.dumps({
+            "action_type": "review_result",
+            "context": {"repo": "sanguo/test", "pr_number": 42},
+            "assignee": "zhangfei-dev",
+        })
+        _insert_task(tmp_db, "t-fail", must_haves)
+
+        with patch.object(handler, "_create_gitea_comment") as mock_comment:
+            mock_comment.return_value = True
+            verify = VerifyResult(False, "no_action", "no action_report")
+            handler.on_failure("t-fail", "zhangfei-dev", tmp_db, verify)
+            mock_comment.assert_called_once()
+            call_args = mock_comment.call_args
+            assert call_args[0][0] == "sanguo/test"
+            assert call_args[0][1] == 42
+
+    def test_infrastructure_failure_creates_task(self, handler, tmp_db):
+        """Infrastructure failure → _send_toolchain_task for jiangwei-infra"""
+        must_haves = json.dumps({
+            "action_type": "review_result",
+            "context": {"repo": "sanguo/test", "pr_number": 42},
+        })
+        _insert_task(tmp_db, "t-infra", must_haves)
+
+        with patch.object(handler, "_create_gitea_comment") as mock_comment:
+            mock_comment.return_value = False  # Gitea API down
+            with patch.object(handler, "_create_gitea_issue") as mock_issue:
+                mock_issue.return_value = False  # Gitea API still down
+                with patch("src.api.toolchain_routes._send_toolchain_task") as mock_send:
+                    mock_send.return_value = "tc-infra"
+                    verify = VerifyResult(False, "no_action", "no action_report")
+                    handler.on_failure("t-infra", "zhangfei-dev", tmp_db, verify)
+                    # Should eventually try to create infrastructure_failure task
+                    mock_send.assert_called()
+                    call_kwargs = mock_send.call_args
+                    assert call_kwargs[1]["action_type"] == "infrastructure_failure"
+                    assert call_kwargs[1]["to_agent"] == "jiangwei-infra"
+
+
+# ---------------------------------------------------------------------------
+# Regression: _mail path unaffected
+# ---------------------------------------------------------------------------
+
+class TestMailRegression:
+    def test_send_mail_still_exists(self):
+        """_send_mail function is preserved."""
+        from src.api.toolchain_routes import _send_mail
+        assert callable(_send_mail)
+
+    def test_send_mail_not_called_by_handlers(self):
+        """No toolchain handler calls _send_mail."""
+        import inspect
+        from src.api import toolchain_routes
+
+        # Get source of handler functions
+        source = inspect.getsource(toolchain_routes)
+        # _send_mail should appear only in its own definition, not in handler bodies
+        lines = source.split("\n")
+        in_handler = False
+        handler_send_mail_calls = []
+        for i, line in enumerate(lines):
+            if line.strip().startswith("async def _handle_") or line.strip().startswith("async def _send_mention_mails"):
+                in_handler = True
+            elif line.strip().startswith("async def ") or line.strip().startswith("def _"):
+                if not line.strip().startswith("async def _handle_") and not line.strip().startswith("async def _send_mention_mails"):
+                    in_handler = False
+            if in_handler and "_send_mail(" in line and not line.strip().startswith("#"):
+                handler_send_mail_calls.append((i, line.strip()))
+
+        assert len(handler_send_mail_calls) == 0, \
+            f"_send_mail still called in handlers: {handler_send_mail_calls}"
+
+
+# ---------------------------------------------------------------------------
+# Integration: full prompt build
+# ---------------------------------------------------------------------------
+
+class TestFullPromptBuild:
+    def test_prompt_contains_all_sections(self, handler):
+        """Full prompt has context, API, and constraints sections."""
+        ctx = PromptContext(
+            task_id="tc-test",
+            title="CI 失败修复",
+            description="Fix CI failure",
+            must_haves=json.dumps({
+                "event_type": "ci_failure",
+                "action_type": "ci_failure",
+                "steps": ["Fix test", "Push", "Submit report"],
+                "context": {"pr_number": 42},
+            }),
+            project_id="_toolchain",
+            agent_id="zhangfei-dev",
+            event_type="ci_failure",
+            event_data={"pr_number": "42", "repo": "sanguo/test"},
+            action_type="ci_failure",
+            action_steps=["Fix test", "Push", "Submit report"],
+        )
+
+        prompt = handler.build_prompt(ctx)
+
+        # Must have action hint
+        assert "CI 失败" in prompt
+        assert "需要你修复" in prompt
+        # Must have steps
+        assert "必须执行的步骤" in prompt
+        assert "1. Fix test" in prompt
+        # Must have API section with action_report
+        assert "action_report" in prompt
+        assert "tc-test" in prompt
+        # Must have constraints with Red Flags
+        assert "Red Flags" in prompt
+        assert "强制要求" in prompt