Assembles relevant knowledge from the store into a compact 2k-token context block for session injection. Features: - Filter by repo, agent type, and global scope - Sort by confidence (pitfalls first, patterns, facts) - Per-repo and per-agent markdown knowledge files - Graceful empty-store handling - JSON output mode for programmatic use - Token-count-aware truncation at line boundaries Closes #11
240 lines
7.7 KiB
Python
240 lines
7.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tests for bootstrapper.py — context assembly from knowledge store.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
# Add scripts dir to path for import
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
|
|
from bootstrapper import (
|
|
build_bootstrap_context,
|
|
estimate_tokens,
|
|
filter_facts,
|
|
load_index,
|
|
sort_facts,
|
|
truncate_to_tokens,
|
|
)
|
|
|
|
|
|
def make_index(facts: list[dict], tmp_dir: Path) -> Path:
|
|
"""Create a temporary index.json with given facts."""
|
|
index = {
|
|
"version": 1,
|
|
"last_updated": "2026-04-13T20:00:00Z",
|
|
"total_facts": len(facts),
|
|
"facts": facts,
|
|
}
|
|
path = tmp_dir / "index.json"
|
|
with open(path, "w") as f:
|
|
json.dump(index, f)
|
|
return path
|
|
|
|
|
|
def test_empty_index():
|
|
"""Empty knowledge store produces graceful output."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
tmp_dir = Path(tmp)
|
|
index_path = make_index([], tmp_dir)
|
|
|
|
# Create empty knowledge dirs
|
|
for sub in ["repos", "agents", "global"]:
|
|
(tmp_dir / sub).mkdir(exist_ok=True)
|
|
|
|
context = build_bootstrap_context(
|
|
repo="the-nexus", index_path=index_path
|
|
)
|
|
assert "No relevant knowledge found" in context
|
|
assert "Starting fresh" in context
|
|
print("PASS: empty_index")
|
|
|
|
|
|
def test_filter_by_repo():
|
|
"""Filter facts by repository."""
|
|
facts = [
|
|
{"fact": "A", "category": "fact", "repo": "the-nexus", "confidence": 0.9},
|
|
{"fact": "B", "category": "fact", "repo": "fleet-ops", "confidence": 0.8},
|
|
{"fact": "C", "category": "fact", "repo": "global", "confidence": 0.7},
|
|
]
|
|
filtered = filter_facts(facts, repo="the-nexus", include_global=True)
|
|
texts = [f["fact"] for f in filtered]
|
|
assert "A" in texts
|
|
assert "B" not in texts
|
|
assert "C" in texts
|
|
print("PASS: filter_by_repo")
|
|
|
|
|
|
def test_filter_by_agent():
|
|
"""Filter facts by agent type."""
|
|
facts = [
|
|
{"fact": "A", "category": "pattern", "repo": "global", "agent": "mimo-sprint", "confidence": 0.8},
|
|
{"fact": "B", "category": "pattern", "repo": "global", "agent": "groq-fast", "confidence": 0.7},
|
|
{"fact": "C", "category": "fact", "repo": "global", "confidence": 0.9},
|
|
]
|
|
filtered = filter_facts(facts, agent="mimo-sprint", include_global=True)
|
|
texts = [f["fact"] for f in filtered]
|
|
assert "A" in texts
|
|
assert "B" not in texts
|
|
assert "C" in texts # global, no agent restriction
|
|
print("PASS: filter_by_agent")
|
|
|
|
|
|
def test_no_global_flag():
|
|
"""Excluding global facts works."""
|
|
facts = [
|
|
{"fact": "A", "category": "fact", "repo": "the-nexus", "confidence": 0.9},
|
|
{"fact": "B", "category": "fact", "repo": "global", "confidence": 0.8},
|
|
]
|
|
filtered = filter_facts(facts, repo="the-nexus", include_global=False)
|
|
texts = [f["fact"] for f in filtered]
|
|
assert "A" in texts
|
|
assert "B" not in texts
|
|
print("PASS: no_global_flag")
|
|
|
|
|
|
def test_sort_by_confidence():
|
|
"""Facts sort by confidence descending."""
|
|
facts = [
|
|
{"fact": "low", "category": "fact", "repo": "global", "confidence": 0.3},
|
|
{"fact": "high", "category": "fact", "repo": "global", "confidence": 0.95},
|
|
{"fact": "mid", "category": "fact", "repo": "global", "confidence": 0.7},
|
|
]
|
|
sorted_f = sort_facts(facts)
|
|
assert sorted_f[0]["fact"] == "high"
|
|
assert sorted_f[1]["fact"] == "mid"
|
|
assert sorted_f[2]["fact"] == "low"
|
|
print("PASS: sort_by_confidence")
|
|
|
|
|
|
def test_sort_pitfalls_first():
|
|
"""Pitfalls sort before facts at same confidence."""
|
|
facts = [
|
|
{"fact": "regular fact", "category": "fact", "repo": "global", "confidence": 0.8},
|
|
{"fact": "danger pitfall", "category": "pitfall", "repo": "global", "confidence": 0.8},
|
|
]
|
|
sorted_f = sort_facts(facts)
|
|
assert sorted_f[0]["category"] == "pitfall"
|
|
print("PASS: sort_pitfalls_first")
|
|
|
|
|
|
def test_truncate_to_tokens():
|
|
"""Truncation cuts at line boundary."""
|
|
text = "line1\nline2\nline3\nline4\nline5\n"
|
|
truncated = truncate_to_tokens(text, max_tokens=2) # ~8 chars
|
|
assert "line1" in truncated
|
|
assert "truncated" in truncated.lower()
|
|
print("PASS: truncate_to_tokens")
|
|
|
|
|
|
def test_estimate_tokens():
|
|
"""Token estimation is reasonable."""
|
|
text = "a" * 400
|
|
tokens = estimate_tokens(text)
|
|
assert 90 <= tokens <= 110 # ~100 tokens
|
|
print("PASS: estimate_tokens")
|
|
|
|
|
|
def test_build_full_context():
|
|
"""Full context with facts renders correctly."""
|
|
facts = [
|
|
{"fact": "API merges fail with 405", "category": "pitfall", "repo": "the-nexus", "confidence": 0.95},
|
|
{"fact": "Has 50+ open PRs", "category": "fact", "repo": "the-nexus", "confidence": 0.9},
|
|
{"fact": "Token at ~/.config/gitea/token", "category": "tool-quirk", "repo": "global", "confidence": 0.9},
|
|
{"fact": "Check git remote -v first", "category": "pattern", "repo": "global", "confidence": 0.8},
|
|
]
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
tmp_dir = Path(tmp)
|
|
index_path = make_index(facts, tmp_dir)
|
|
|
|
# Create knowledge dirs
|
|
for sub in ["repos", "agents", "global"]:
|
|
(tmp_dir / sub).mkdir(exist_ok=True)
|
|
|
|
context = build_bootstrap_context(
|
|
repo="the-nexus",
|
|
agent="mimo-sprint",
|
|
include_global=True,
|
|
index_path=index_path,
|
|
)
|
|
|
|
assert "What You Know" in context
|
|
assert "PITFALLS" in context
|
|
assert "API merges fail with 405" in context
|
|
assert "the-nexus" in context
|
|
assert "Token at" in context # global fact included
|
|
print("PASS: build_full_context")
|
|
|
|
|
|
def test_max_tokens_respected():
|
|
"""Output respects max_tokens limit."""
|
|
# Generate lots of facts
|
|
facts = [
|
|
{"fact": f"Fact number {i} with some detail about things", "category": "fact", "repo": "global", "confidence": 0.8}
|
|
for i in range(100)
|
|
]
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
tmp_dir = Path(tmp)
|
|
index_path = make_index(facts, tmp_dir)
|
|
for sub in ["repos", "agents", "global"]:
|
|
(tmp_dir / sub).mkdir(exist_ok=True)
|
|
|
|
context = build_bootstrap_context(
|
|
repo=None,
|
|
max_tokens=500,
|
|
index_path=index_path,
|
|
)
|
|
|
|
actual_tokens = estimate_tokens(context)
|
|
# Allow 10% overshoot since we cut at line boundaries
|
|
assert actual_tokens <= 550, f"Expected ~500 tokens, got {actual_tokens}"
|
|
print(f"PASS: max_tokens_respected (got {actual_tokens} tokens)")
|
|
|
|
|
|
def test_missing_index_graceful():
|
|
"""Missing index.json doesn't crash."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
tmp_dir = Path(tmp)
|
|
# Don't create index.json
|
|
for sub in ["repos", "agents", "global"]:
|
|
(tmp_dir / sub).mkdir(exist_ok=True)
|
|
|
|
fake_index = tmp_dir / "nonexistent.json"
|
|
context = build_bootstrap_context(repo="anything", index_path=fake_index)
|
|
assert "No relevant knowledge found" in context
|
|
print("PASS: missing_index_graceful")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
tests = [
|
|
test_empty_index,
|
|
test_filter_by_repo,
|
|
test_filter_by_agent,
|
|
test_no_global_flag,
|
|
test_sort_by_confidence,
|
|
test_sort_pitfalls_first,
|
|
test_truncate_to_tokens,
|
|
test_estimate_tokens,
|
|
test_build_full_context,
|
|
test_max_tokens_respected,
|
|
test_missing_index_graceful,
|
|
]
|
|
|
|
passed = 0
|
|
failed = 0
|
|
for test in tests:
|
|
try:
|
|
test()
|
|
passed += 1
|
|
except Exception as e:
|
|
print(f"FAIL: {test.__name__} — {e}")
|
|
failed += 1
|
|
|
|
print(f"\n{passed} passed, {failed} failed")
|
|
sys.exit(0 if failed == 0 else 1)
|