auto-sync: 2026-05-17 06:07:01

2026-05-17 06:07:01 +08:00
parent 485d7a7d36
commit 36d80a6f35
1 changed files with 342 additions and 0 deletions
@@ -0,0 +1,342 @@
+"""Review Pipeline — 产出验证流水线
+
+验证链：产出物存在性 → 格式合规 → 内容质量 → 评分
+Guardrail 门控：低风险免审、高风险强制审、极高风险双审
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+from src.blackboard.models import ObservationType, Task
+from src.blackboard.operations import Blackboard
+from src.blackboard.queries import Queries
+
+logger = logging.getLogger("moziplus-v2.review")
+
+
+class ReviewVerdict(str, Enum):
+    PASS = "pass"
+    FAIL = "fail"
+    NEEDS_REVIEW = "needs_review"
+
+
+class ReviewResult:
+    """单步验证结果"""
+
+    def __init__(
+        self,
+        step: str,
+        verdict: ReviewVerdict,
+        score: float = 0.0,
+        details: str = "",
+        suggestions: List[str] = None,
+    ):
+        self.step = step
+        self.verdict = verdict
+        self.score = score
+        self.details = details
+        self.suggestions = suggestions or []
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "step": self.step,
+            "verdict": self.verdict.value,
+            "score": self.score,
+            "details": self.details,
+            "suggestions": self.suggestions,
+        }
+
+
+class ReviewPipeline:
+    """产出验证流水线"""
+
+    def __init__(
+        self,
+        bb: Optional[Blackboard] = None,
+        custom_checks: Optional[Dict[str, Callable]] = None,
+        output_dir: Optional[Path] = None,
+    ):
+        self.bb = bb
+        self.custom_checks = custom_checks or {}
+        self.output_dir = output_dir
+
+    def run_review(
+        self,
+        task: Task,
+        outputs: Optional[List[Dict[str, Any]]] = None,
+    ) -> Dict[str, Any]:
+        """运行完整验证流水线
+
+        Returns:
+            {"verdict": str, "score": float, "results": [...],
+             "gate": str, "needs_human": bool}
+        """
+        results: List[ReviewResult] = []
+
+        # Step 1: 产出物存在性
+        r1 = self._check_existence(task, outputs)
+        results.append(r1)
+
+        # Step 2: 格式合规
+        r2 = self._check_format(task, outputs)
+        results.append(r2)
+
+        # Step 3: 内容质量（自定义检查）
+        r3 = self._check_quality(task, outputs)
+        results.append(r3)
+
+        # Step 4: Guardrail 门控
+        gate = self._determine_gate(task, results)
+        needs_human = gate in ("mandatory", "dual")
+
+        # 综合评分
+        scores = [r.score for r in results if r.score > 0]
+        avg_score = sum(scores) / len(scores) if scores else 0.0
+
+        # 综合判定
+        any_fail = any(r.verdict == ReviewVerdict.FAIL for r in results)
+        final_verdict = ReviewVerdict.FAIL if any_fail else (
+            ReviewVerdict.NEEDS_REVIEW if needs_human else ReviewVerdict.PASS
+        )
+
+        # 记录 observation
+        if self.bb:
+            self._record_observation(task, final_verdict, avg_score, results)
+
+        return {
+            "verdict": final_verdict.value,
+            "score": round(avg_score, 2),
+            "results": [r.to_dict() for r in results],
+            "gate": gate,
+            "needs_human": needs_human,
+        }
+
+    def _check_existence(
+        self, task: Task, outputs: Optional[List[Dict]]
+    ) -> ReviewResult:
+        """Step 1: 产出物存在性"""
+        if not outputs:
+            return ReviewResult(
+                "existence", ReviewVerdict.FAIL, 0.0,
+                "No outputs provided",
+            )
+
+        missing = []
+        for out in outputs:
+            if out.get("path"):
+                p = Path(out["path"])
+                if not p.exists():
+                    missing.append(str(p))
+
+        if missing:
+            return ReviewResult(
+                "existence", ReviewVerdict.FAIL, 0.0,
+                f"Missing files: {', '.join(missing)}",
+            )
+
+        return ReviewResult(
+            "existence", ReviewVerdict.PASS, 1.0,
+            f"All {len(outputs)} output(s) verified",
+        )
+
+    def _check_format(
+        self, task: Task, outputs: Optional[List[Dict]]
+    ) -> ReviewResult:
+        """Step 2: 格式合规"""
+        if not outputs:
+            return ReviewResult("format", ReviewVerdict.FAIL, 0.0, "No outputs")
+
+        issues = []
+        for out in outputs:
+            # output.md 必须存在且非空
+            if out.get("type") == "markdown" or out.get("path", "").endswith(".md"):
+                content = out.get("content", "")
+                if not content and out.get("path"):
+                    try:
+                        content = Path(out["path"]).read_text()
+                    except Exception:
+                        pass
+                if not content or len(content.strip()) < 10:
+                    issues.append(f"Output too short: {out.get('path', '?')}")
+
+            # 结论 JSON 必须有效
+            if out.get("type") == "json" or out.get("path", "").endswith(".json"):
+                content = out.get("content", "")
+                if not content and out.get("path"):
+                    try:
+                        content = Path(out["path"]).read_text()
+                    except Exception:
+                        pass
+                try:
+                    data = json.loads(content)
+                    if not isinstance(data, dict):
+                        issues.append(f"JSON not a dict: {out.get('path', '?')}")
+                except (json.JSONDecodeError, TypeError):
+                    issues.append(f"Invalid JSON: {out.get('path', '?')}")
+
+        if issues:
+            return ReviewResult(
+                "format", ReviewVerdict.FAIL, 0.5,
+                "; ".join(issues),
+            )
+
+        return ReviewResult("format", ReviewVerdict.PASS, 1.0, "Format OK")
+
+    def _check_quality(
+        self, task: Task, outputs: Optional[List[Dict]]
+    ) -> ReviewResult:
+        """Step 3: 内容质量（自定义检查）"""
+        if not outputs:
+            return ReviewResult("quality", ReviewVerdict.FAIL, 0.0, "No outputs")
+
+        suggestions = []
+        total_score = 0.0
+
+        for check_name, check_fn in self.custom_checks.items():
+            try:
+                result = check_fn(task, outputs)
+                if isinstance(result, dict):
+                    total_score += result.get("score", 0.0)
+                    if result.get("suggestions"):
+                        suggestions.extend(result["suggestions"])
+            except Exception as e:
+                suggestions.append(f"Check '{check_name}' error: {e}")
+
+        if self.custom_checks:
+            avg = total_score / len(self.custom_checks)
+        else:
+            avg = 1.0  # 无自定义检查默认通过
+
+        verdict = ReviewVerdict.PASS if avg >= 0.6 else ReviewVerdict.FAIL
+        return ReviewResult("quality", verdict, round(avg, 2), suggestions=suggestions)
+
+    def _determine_gate(
+        self, task: Task, results: List[ReviewResult]
+    ) -> str:
+        """Step 4: Guardrail 门控
+
+        Returns:
+            "auto" / "optional" / "mandatory" / "dual"
+        """
+        risk = (task.risk_level or "").lower()
+
+        # 任何步骤 FAIL → mandatory
+        any_fail = any(r.verdict == ReviewVerdict.FAIL for r in results)
+        if any_fail:
+            return "mandatory"
+
+        # 极高风险 → dual（双审）
+        if risk == "critical":
+            return "dual"
+
+        # 高风险 → mandatory
+        if risk == "high":
+            return "mandatory"
+
+        # 中风险 → optional
+        if risk == "medium":
+            return "optional"
+
+        # 低风险 → auto（免审）
+        return "auto"
+
+    def _record_observation(
+        self,
+        task: Task,
+        verdict: ReviewVerdict,
+        score: float,
+        results: List[ReviewResult],
+    ) -> None:
+        """记录审查 observation"""
+        try:
+            detail = json.dumps({
+                "verdict": verdict.value,
+                "score": score,
+                "steps": [r.to_dict() for r in results],
+            })
+            self.bb.add_observation(
+                task_id=task.id,
+                agent="review-pipeline",
+                obs_type=ObservationType.VERIFICATION.value,
+                content=f"Review: {verdict.value} (score={score})",
+                detail=detail,
+            )
+        except Exception:
+            logger.exception("Failed to record review observation")
+
+
+class RebuttalManager:
+    """反驳权管理
+
+    Agent 对审查结论不服时，可发起反驳：
+    1. 提交反驳理由
+    2. 升级给更高权限审查者
+    3. 最多 2 轮
+    """
+
+    MAX_ROUNDS = 2
+
+    def __init__(self, bb: Optional[Blackboard] = None):
+        self.bb = bb
+
+    def submit_rebuttal(
+        self,
+        task_id: str,
+        agent_id: str,
+        reason: str,
+        evidence: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """提交反驳"""
+        # 检查轮次
+        existing = self._get_rebuttal_count(task_id)
+        if existing >= self.MAX_ROUNDS:
+            return {
+                "status": "rejected",
+                "reason": f"Max rebuttal rounds ({self.MAX_ROUNDS}) reached",
+            }
+
+        round_num = existing + 1
+
+        # 记录 observation
+        if self.bb:
+            try:
+                self.bb.add_observation(
+                    task_id=task_id,
+                    agent=agent_id,
+                    obs_type="rebuttal",
+                    content=f"Rebuttal round {round_num}: {reason[:200]}",
+                    detail=json.dumps({
+                        "round": round_num,
+                        "reason": reason,
+                        "evidence": evidence,
+                    }),
+                )
+            except Exception:
+                logger.exception("Failed to record rebuttal")
+
+        # 决定升级目标
+        escalation_target = "simayi-challenger" if round_num == 1 else "pangtong-fujunshi"
+
+        return {
+            "status": "accepted",
+            "round": round_num,
+            "escalation_target": escalation_target,
+            "max_rounds": self.MAX_ROUNDS,
+        }
+
+    def _get_rebuttal_count(self, task_id: str) -> int:
+        """获取已有反驳轮次"""
+        if not self.bb:
+            return 0
+        try:
+            observations = self.bb.list_observations(task_id=task_id)
+            return sum(1 for o in observations if o.get("obs_type") == "rebuttal")
+        except Exception:
+            return 0