All checks were successful
Lint / lint (pull_request) Successful in 28s
Closes #1012 - add structured session fact extraction with provenance, temporal metadata, canonical normalization, and contradiction grouping - persist structured metadata into holographic memory auto-extraction with canonical-key dedupe across repeated ingests - add fixture-backed transcript tests plus extraction quality evaluation
165 lines
6.0 KiB
Python
165 lines
6.0 KiB
Python
"""Tests for session compaction with fact extraction."""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
|
|
from agent.session_compactor import (
|
|
ExtractedFact,
|
|
evaluate_extraction_quality,
|
|
extract_and_save_facts,
|
|
extract_facts_from_messages,
|
|
format_facts_summary,
|
|
save_facts_to_store,
|
|
)
|
|
|
|
_FIXTURE_PATH = Path(__file__).resolve().parent / "fixtures" / "memory_extraction_fragments.json"
|
|
|
|
|
|
def _load_fixture(name: str):
|
|
return json.loads(_FIXTURE_PATH.read_text())[name]
|
|
|
|
|
|
class TestFactExtraction:
|
|
def test_extract_preference(self):
|
|
messages = [
|
|
{"role": "user", "content": "I prefer Python over JavaScript for backend work."},
|
|
]
|
|
facts = extract_facts_from_messages(messages)
|
|
assert len(facts) >= 1
|
|
assert any("Python" in f.content for f in facts)
|
|
|
|
def test_extract_correction(self):
|
|
messages = [
|
|
{"role": "user", "content": "Actually the port is 8081 not 8080."},
|
|
]
|
|
facts = extract_facts_from_messages(messages)
|
|
assert len(facts) >= 1
|
|
assert any("8081" in f.content for f in facts)
|
|
|
|
def test_extract_project_fact(self):
|
|
messages = [
|
|
{"role": "user", "content": "The project uses Gitea for source control."},
|
|
]
|
|
facts = extract_facts_from_messages(messages)
|
|
assert len(facts) >= 1
|
|
|
|
def test_skip_tool_results(self):
|
|
messages = [
|
|
{"role": "assistant", "content": "Running command...", "tool_calls": [{"id": "1"}]},
|
|
{"role": "tool", "content": "output here"},
|
|
]
|
|
facts = extract_facts_from_messages(messages)
|
|
assert len(facts) == 0
|
|
|
|
def test_skip_short_messages(self):
|
|
messages = [
|
|
{"role": "user", "content": "ok"},
|
|
]
|
|
facts = extract_facts_from_messages(messages)
|
|
assert len(facts) == 0
|
|
|
|
def test_deduplication(self):
|
|
messages = [
|
|
{"role": "user", "content": "I prefer Python."},
|
|
{"role": "user", "content": "I prefer Python."},
|
|
]
|
|
facts = extract_facts_from_messages(messages)
|
|
python_facts = [f for f in facts if "Python" in f.content]
|
|
assert len(python_facts) == 1
|
|
|
|
def test_structured_fact_preserves_provenance_and_temporal_metadata(self):
|
|
facts = extract_facts_from_messages(_load_fixture("preferences_and_duplicates"))
|
|
deploy_fact = next(f for f in facts if f.relation == "workflow.deploy_method")
|
|
assert deploy_fact.source_role == "user"
|
|
assert deploy_fact.source_turn == 0
|
|
assert deploy_fact.observed_at == "2026-04-22T10:00:00Z"
|
|
assert deploy_fact.provenance == "conversation:user:0"
|
|
assert deploy_fact.canonical_key
|
|
assert deploy_fact.evidence
|
|
assert deploy_fact.evidence[0]["source_text"].startswith("Deploy via Ansible")
|
|
|
|
def test_near_duplicate_facts_are_normalized_into_one_canonical_fact(self):
|
|
facts = extract_facts_from_messages(_load_fixture("preferences_and_duplicates"))
|
|
deploy_facts = [f for f in facts if f.relation == "workflow.deploy_method"]
|
|
assert len(deploy_facts) == 1
|
|
assert len(deploy_facts[0].evidence) == 2
|
|
assert deploy_facts[0].metadata["duplicate_count"] == 1
|
|
|
|
def test_contradictory_facts_are_preserved_for_unique_slots(self):
|
|
facts = extract_facts_from_messages(_load_fixture("operational_and_contradictions"))
|
|
provider_facts = [f for f in facts if f.contradiction_group == "config.provider"]
|
|
assert len(provider_facts) == 2
|
|
assert {f.status for f in provider_facts} == {"contradiction"}
|
|
assert {f.normalized_content for f in provider_facts} == {
|
|
"openai codex gpt 5 4",
|
|
"mimo v2 pro",
|
|
}
|
|
|
|
def test_quality_evaluation_reports_noise_reduction(self):
|
|
metrics = evaluate_extraction_quality(_load_fixture("mixed_transcript"))
|
|
assert metrics["raw_candidates"] > metrics["normalized_facts"]
|
|
assert metrics["noise_reduction"] > 0
|
|
assert metrics["contradiction_groups"] == 1
|
|
|
|
|
|
class TestSaveFacts:
|
|
def test_save_with_callback(self):
|
|
saved = []
|
|
|
|
def mock_save(category, entity, content, trust):
|
|
saved.append({"category": category, "content": content})
|
|
|
|
facts = [ExtractedFact("user_pref", "user", "likes dark mode", 0.8, 0)]
|
|
count = save_facts_to_store(facts, fact_store_fn=mock_save)
|
|
assert count == 1
|
|
assert len(saved) == 1
|
|
|
|
def test_save_with_extended_callback_metadata(self):
|
|
saved = []
|
|
|
|
def mock_save(category, entity, content, trust, **kwargs):
|
|
saved.append({
|
|
"category": category,
|
|
"entity": entity,
|
|
"content": content,
|
|
"trust": trust,
|
|
**kwargs,
|
|
})
|
|
|
|
fact = ExtractedFact(
|
|
"project.operational",
|
|
"watchdog",
|
|
"BURN watchdog caps dispatches per cycle to 6",
|
|
0.9,
|
|
2,
|
|
source_role="user",
|
|
observed_at="2026-04-22T11:00:00Z",
|
|
provenance="conversation:user:2",
|
|
canonical_key="project.operational|watchdog|dispatch_cap|6",
|
|
relation="fleet.dispatch_cap",
|
|
contradiction_group="fleet.dispatch_cap",
|
|
metadata={"duplicate_count": 0},
|
|
)
|
|
count = save_facts_to_store([fact], fact_store_fn=mock_save)
|
|
assert count == 1
|
|
assert saved[0]["canonical_key"] == fact.canonical_key
|
|
assert saved[0]["observed_at"] == "2026-04-22T11:00:00Z"
|
|
assert saved[0]["metadata"]["duplicate_count"] == 0
|
|
|
|
|
|
class TestFormatSummary:
|
|
def test_empty(self):
|
|
assert "No facts" in format_facts_summary([])
|
|
|
|
def test_with_facts(self):
|
|
facts = [
|
|
ExtractedFact("user_pref", "user", "likes dark mode", 0.8, 0),
|
|
ExtractedFact("correction", "user", "port is 8081", 0.9, 1),
|
|
]
|
|
summary = format_facts_summary(facts)
|
|
assert "2 facts" in summary
|
|
assert "user_pref" in summary
|