From 36d80a6f35ec7eb24e9bc3024a858418cf3a756d Mon Sep 17 00:00:00 2001 From: cfdaily Date: Sun, 17 May 2026 06:07:01 +0800 Subject: [PATCH] auto-sync: 2026-05-17 06:07:01 --- src/daemon/review.py | 342 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 342 insertions(+) create mode 100644 src/daemon/review.py diff --git a/src/daemon/review.py b/src/daemon/review.py new file mode 100644 index 0000000..83d4bda --- /dev/null +++ b/src/daemon/review.py @@ -0,0 +1,342 @@ +"""Review Pipeline — 产出验证流水线 + +验证链:产出物存在性 → 格式合规 → 内容质量 → 评分 +Guardrail 门控:低风险免审、高风险强制审、极高风险双审 +""" + +from __future__ import annotations + +import json +import logging +import re +from datetime import datetime +from enum import Enum +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Tuple + +from src.blackboard.models import ObservationType, Task +from src.blackboard.operations import Blackboard +from src.blackboard.queries import Queries + +logger = logging.getLogger("moziplus-v2.review") + + +class ReviewVerdict(str, Enum): + PASS = "pass" + FAIL = "fail" + NEEDS_REVIEW = "needs_review" + + +class ReviewResult: + """单步验证结果""" + + def __init__( + self, + step: str, + verdict: ReviewVerdict, + score: float = 0.0, + details: str = "", + suggestions: List[str] = None, + ): + self.step = step + self.verdict = verdict + self.score = score + self.details = details + self.suggestions = suggestions or [] + + def to_dict(self) -> Dict[str, Any]: + return { + "step": self.step, + "verdict": self.verdict.value, + "score": self.score, + "details": self.details, + "suggestions": self.suggestions, + } + + +class ReviewPipeline: + """产出验证流水线""" + + def __init__( + self, + bb: Optional[Blackboard] = None, + custom_checks: Optional[Dict[str, Callable]] = None, + output_dir: Optional[Path] = None, + ): + self.bb = bb + self.custom_checks = custom_checks or {} + self.output_dir = output_dir + + def run_review( + self, + task: Task, + outputs: Optional[List[Dict[str, Any]]] = None, + ) -> Dict[str, Any]: + """运行完整验证流水线 + + Returns: + {"verdict": str, "score": float, "results": [...], + "gate": str, "needs_human": bool} + """ + results: List[ReviewResult] = [] + + # Step 1: 产出物存在性 + r1 = self._check_existence(task, outputs) + results.append(r1) + + # Step 2: 格式合规 + r2 = self._check_format(task, outputs) + results.append(r2) + + # Step 3: 内容质量(自定义检查) + r3 = self._check_quality(task, outputs) + results.append(r3) + + # Step 4: Guardrail 门控 + gate = self._determine_gate(task, results) + needs_human = gate in ("mandatory", "dual") + + # 综合评分 + scores = [r.score for r in results if r.score > 0] + avg_score = sum(scores) / len(scores) if scores else 0.0 + + # 综合判定 + any_fail = any(r.verdict == ReviewVerdict.FAIL for r in results) + final_verdict = ReviewVerdict.FAIL if any_fail else ( + ReviewVerdict.NEEDS_REVIEW if needs_human else ReviewVerdict.PASS + ) + + # 记录 observation + if self.bb: + self._record_observation(task, final_verdict, avg_score, results) + + return { + "verdict": final_verdict.value, + "score": round(avg_score, 2), + "results": [r.to_dict() for r in results], + "gate": gate, + "needs_human": needs_human, + } + + def _check_existence( + self, task: Task, outputs: Optional[List[Dict]] + ) -> ReviewResult: + """Step 1: 产出物存在性""" + if not outputs: + return ReviewResult( + "existence", ReviewVerdict.FAIL, 0.0, + "No outputs provided", + ) + + missing = [] + for out in outputs: + if out.get("path"): + p = Path(out["path"]) + if not p.exists(): + missing.append(str(p)) + + if missing: + return ReviewResult( + "existence", ReviewVerdict.FAIL, 0.0, + f"Missing files: {', '.join(missing)}", + ) + + return ReviewResult( + "existence", ReviewVerdict.PASS, 1.0, + f"All {len(outputs)} output(s) verified", + ) + + def _check_format( + self, task: Task, outputs: Optional[List[Dict]] + ) -> ReviewResult: + """Step 2: 格式合规""" + if not outputs: + return ReviewResult("format", ReviewVerdict.FAIL, 0.0, "No outputs") + + issues = [] + for out in outputs: + # output.md 必须存在且非空 + if out.get("type") == "markdown" or out.get("path", "").endswith(".md"): + content = out.get("content", "") + if not content and out.get("path"): + try: + content = Path(out["path"]).read_text() + except Exception: + pass + if not content or len(content.strip()) < 10: + issues.append(f"Output too short: {out.get('path', '?')}") + + # 结论 JSON 必须有效 + if out.get("type") == "json" or out.get("path", "").endswith(".json"): + content = out.get("content", "") + if not content and out.get("path"): + try: + content = Path(out["path"]).read_text() + except Exception: + pass + try: + data = json.loads(content) + if not isinstance(data, dict): + issues.append(f"JSON not a dict: {out.get('path', '?')}") + except (json.JSONDecodeError, TypeError): + issues.append(f"Invalid JSON: {out.get('path', '?')}") + + if issues: + return ReviewResult( + "format", ReviewVerdict.FAIL, 0.5, + "; ".join(issues), + ) + + return ReviewResult("format", ReviewVerdict.PASS, 1.0, "Format OK") + + def _check_quality( + self, task: Task, outputs: Optional[List[Dict]] + ) -> ReviewResult: + """Step 3: 内容质量(自定义检查)""" + if not outputs: + return ReviewResult("quality", ReviewVerdict.FAIL, 0.0, "No outputs") + + suggestions = [] + total_score = 0.0 + + for check_name, check_fn in self.custom_checks.items(): + try: + result = check_fn(task, outputs) + if isinstance(result, dict): + total_score += result.get("score", 0.0) + if result.get("suggestions"): + suggestions.extend(result["suggestions"]) + except Exception as e: + suggestions.append(f"Check '{check_name}' error: {e}") + + if self.custom_checks: + avg = total_score / len(self.custom_checks) + else: + avg = 1.0 # 无自定义检查默认通过 + + verdict = ReviewVerdict.PASS if avg >= 0.6 else ReviewVerdict.FAIL + return ReviewResult("quality", verdict, round(avg, 2), suggestions=suggestions) + + def _determine_gate( + self, task: Task, results: List[ReviewResult] + ) -> str: + """Step 4: Guardrail 门控 + + Returns: + "auto" / "optional" / "mandatory" / "dual" + """ + risk = (task.risk_level or "").lower() + + # 任何步骤 FAIL → mandatory + any_fail = any(r.verdict == ReviewVerdict.FAIL for r in results) + if any_fail: + return "mandatory" + + # 极高风险 → dual(双审) + if risk == "critical": + return "dual" + + # 高风险 → mandatory + if risk == "high": + return "mandatory" + + # 中风险 → optional + if risk == "medium": + return "optional" + + # 低风险 → auto(免审) + return "auto" + + def _record_observation( + self, + task: Task, + verdict: ReviewVerdict, + score: float, + results: List[ReviewResult], + ) -> None: + """记录审查 observation""" + try: + detail = json.dumps({ + "verdict": verdict.value, + "score": score, + "steps": [r.to_dict() for r in results], + }) + self.bb.add_observation( + task_id=task.id, + agent="review-pipeline", + obs_type=ObservationType.VERIFICATION.value, + content=f"Review: {verdict.value} (score={score})", + detail=detail, + ) + except Exception: + logger.exception("Failed to record review observation") + + +class RebuttalManager: + """反驳权管理 + + Agent 对审查结论不服时,可发起反驳: + 1. 提交反驳理由 + 2. 升级给更高权限审查者 + 3. 最多 2 轮 + """ + + MAX_ROUNDS = 2 + + def __init__(self, bb: Optional[Blackboard] = None): + self.bb = bb + + def submit_rebuttal( + self, + task_id: str, + agent_id: str, + reason: str, + evidence: Optional[str] = None, + ) -> Dict[str, Any]: + """提交反驳""" + # 检查轮次 + existing = self._get_rebuttal_count(task_id) + if existing >= self.MAX_ROUNDS: + return { + "status": "rejected", + "reason": f"Max rebuttal rounds ({self.MAX_ROUNDS}) reached", + } + + round_num = existing + 1 + + # 记录 observation + if self.bb: + try: + self.bb.add_observation( + task_id=task_id, + agent=agent_id, + obs_type="rebuttal", + content=f"Rebuttal round {round_num}: {reason[:200]}", + detail=json.dumps({ + "round": round_num, + "reason": reason, + "evidence": evidence, + }), + ) + except Exception: + logger.exception("Failed to record rebuttal") + + # 决定升级目标 + escalation_target = "simayi-challenger" if round_num == 1 else "pangtong-fujunshi" + + return { + "status": "accepted", + "round": round_num, + "escalation_target": escalation_target, + "max_rounds": self.MAX_ROUNDS, + } + + def _get_rebuttal_count(self, task_id: str) -> int: + """获取已有反驳轮次""" + if not self.bb: + return 0 + try: + observations = self.bb.list_observations(task_id=task_id) + return sum(1 for o in observations if o.get("obs_type") == "rebuttal") + except Exception: + return 0