Files
sanguo_moziplus_v2/tests/test_experience.py
T
2026-05-17 06:10:15 +08:00

255 lines
8.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""F15 Experience Distillation 单元测试
按 test-plan-v2.6.md §F15
- T1: 经验提取(P0
- T2: 持久化(P0
- T3: 相似推荐(P0
- T4: 模式分类(P1
"""
import json
import pytest
from pathlib import Path
from src.daemon.experience import (
Experience,
ExperienceCategory,
ExperienceDistiller,
ExperienceStore,
)
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def store(tmp_path):
return ExperienceStore(store_path=tmp_path / "experiences.jsonl")
@pytest.fixture
def distiller(store):
return ExperienceDistiller(store=store)
@pytest.fixture
def memory_store():
"""纯内存 store"""
return ExperienceStore()
# ---------------------------------------------------------------------------
# T1: 经验提取
# ---------------------------------------------------------------------------
class TestDistillation:
def test_distill_from_review_failure(self, distiller):
exps = distiller.distill_from_task(
task_id="t1",
task_title="Build Feature",
task_type="coding",
review_result={
"verdict": "fail",
"results": [
{"step": "existence", "verdict": "fail",
"details": "Missing output.md", "suggestions": []},
],
},
)
assert len(exps) >= 1
assert any(e.category == "pitfall" for e in exps)
def test_distill_from_suggestions(self, distiller):
exps = distiller.distill_from_task(
task_id="t2",
task_title="Write Tests",
task_type="testing",
review_result={
"verdict": "pass",
"results": [
{"step": "quality", "verdict": "pass", "score": 0.9,
"suggestions": ["Always test edge cases"]},
],
},
)
assert len(exps) >= 1
summaries = [e.summary for e in exps]
assert any("edge cases" in s for s in summaries)
def test_distill_from_text_output(self, distiller):
exps = distiller.distill_from_task(
task_id="t3",
task_title="Deploy Service",
outputs=[
{"content": "## Best Practice\n\nAlways use health checks when deploying services."},
],
)
assert len(exps) >= 1
assert any(e.category == "best_practice" for e in exps)
def test_distill_pitfall_from_text(self, distiller):
exps = distiller.distill_from_task(
task_id="t4",
task_title="Debug Issue",
outputs=[
{"content": "## Bug Report\n\nForgot to close the database connection."},
],
)
assert any(e.category == "pitfall" for e in exps)
def test_distill_environment_from_text(self, distiller):
exps = distiller.distill_from_task(
task_id="t5",
task_title="Setup",
outputs=[
{"content": "Need to install Python 3.9+ and configure the PATH."},
],
)
assert any(e.category == "environment" for e in exps)
def test_empty_outputs_no_crash(self, distiller):
exps = distiller.distill_from_task(
task_id="t6",
task_title="Empty Task",
)
assert exps == []
# ---------------------------------------------------------------------------
# T2: 持久化
# ---------------------------------------------------------------------------
class TestPersistence:
def test_save_and_reload(self, tmp_path):
path = tmp_path / "experiences.jsonl"
store1 = ExperienceStore(store_path=path)
exp = Experience(category="pitfall", summary="Test experience", tags=["test"])
store1.add(exp)
# Reload
store2 = ExperienceStore(store_path=path)
assert store2.count() == 1
loaded = store2.get(exp.id)
assert loaded is not None
assert loaded.summary == "Test experience"
def test_delete_persists(self, tmp_path):
path = tmp_path / "experiences.jsonl"
store = ExperienceStore(store_path=path)
exp = Experience(category="pitfall", summary="To delete")
store.add(exp)
store.delete(exp.id)
assert store.count() == 0
# Reload
store2 = ExperienceStore(store_path=path)
assert store2.count() == 0
def test_multiple_experiences(self, tmp_path):
path = tmp_path / "experiences.jsonl"
store = ExperienceStore(store_path=path)
for i in range(5):
store.add(Experience(category="pattern", summary=f"Exp {i}"))
assert store.count() == 5
store2 = ExperienceStore(store_path=path)
assert store2.count() == 5
def test_memory_store_no_file(self):
store = ExperienceStore()
store.add(Experience(category="test", summary="Memory only"))
assert store.count() == 1
# ---------------------------------------------------------------------------
# T3: 相似推荐
# ---------------------------------------------------------------------------
class TestRecommendation:
def test_recommend_by_tags(self, distiller, store):
store.add(Experience(category="pitfall", summary="Coding pitfall",
tags=["coding"]))
store.add(Experience(category="best_practice", summary="Testing BP",
tags=["testing"]))
results = distiller.recommend(tags=["coding"])
assert len(results) >= 1
assert any("Coding pitfall" in e.summary for e in results)
def test_recommend_by_query(self, distiller, store):
store.add(Experience(category="pitfall", summary="Always close DB connections"))
store.add(Experience(category="best_practice", summary="Use type hints"))
results = distiller.recommend(query="db")
assert len(results) >= 1
assert any("DB" in e.summary for e in results)
def test_recommend_by_task_type(self, distiller, store):
store.add(Experience(category="pitfall", summary="P1", tags=["coding"]))
store.add(Experience(category="pitfall", summary="P2", tags=["testing"]))
results = distiller.recommend(task_type="coding")
assert any("P1" in e.summary for e in results)
def test_recommend_empty(self, distiller):
results = distiller.recommend()
assert results == []
def test_recommend_limit(self, distiller, store):
for i in range(10):
store.add(Experience(category="pitfall", summary=f"Exp {i}",
tags=["coding"]))
results = distiller.recommend(tags=["coding"], limit=3)
assert len(results) <= 3
# ---------------------------------------------------------------------------
# T4: 模式分类
# ---------------------------------------------------------------------------
class TestPatternClassification:
def test_classify_pitfall(self, distiller):
assert distiller._classify_text("This is a common bug") == "pitfall"
def test_classify_best_practice(self, distiller):
assert distiller._classify_text("Always use version control") == "best_practice"
def test_classify_environment(self, distiller):
assert distiller._classify_text("Install the required packages") == "environment"
def test_classify_no_match(self, distiller):
assert distiller._classify_text("The weather is nice today") is None
def test_classify_chinese(self, distiller):
assert distiller._classify_text("这是一个常见的陷阱") == "pitfall"
assert distiller._classify_text("建议使用类型注解") == "best_practice"
# ---------------------------------------------------------------------------
# Experience model
# ---------------------------------------------------------------------------
class TestExperienceModel:
def test_to_dict_roundtrip(self):
exp = Experience(
category="pitfall",
summary="Test",
source_task_id="t1",
tags=["coding"],
)
d = exp.to_dict()
exp2 = Experience.from_dict(d)
assert exp2.summary == exp.summary
assert exp2.category == exp.category
assert exp2.tags == exp.tags
def test_search_by_category(self, store):
store.add(Experience(category="pitfall", summary="P1"))
store.add(Experience(category="best_practice", summary="B1"))
results = store.search(category="pitfall")
assert len(results) == 1
assert results[0].summary == "P1"