137-7-5-release-note-analyzer/scripts/test_bootstrapper.py

#!/usr/bin/env python3
"""
Tests for bootstrapper.py — context assembly from knowledge store.
"""

import json
import sys
import tempfile
from pathlib import Path

# Add scripts dir to path for import
sys.path.insert(0, str(Path(__file__).resolve().parent))

from bootstrapper import (
    build_bootstrap_context,
    estimate_tokens,
    filter_facts,
    load_index,
    sort_facts,
    truncate_to_tokens,
)


def make_index(facts: list[dict], tmp_dir: Path) -> Path:
    """Create a temporary index.json with given facts."""
    index = {
        "version": 1,
        "last_updated": "2026-04-13T20:00:00Z",
        "total_facts": len(facts),
        "facts": facts,
    }
    path = tmp_dir / "index.json"
    with open(path, "w") as f:
        json.dump(index, f)
    return path


def test_empty_index():
    """Empty knowledge store produces graceful output."""
    with tempfile.TemporaryDirectory() as tmp:
        tmp_dir = Path(tmp)
        index_path = make_index([], tmp_dir)

        # Create empty knowledge dirs
        for sub in ["repos", "agents", "global"]:
            (tmp_dir / sub).mkdir(exist_ok=True)

        context = build_bootstrap_context(
            repo="the-nexus", index_path=index_path
        )
        assert "No relevant knowledge found" in context
        assert "Starting fresh" in context
        print("PASS: empty_index")


def test_filter_by_repo():
    """Filter facts by repository."""
    facts = [
        {"fact": "A", "category": "fact", "repo": "the-nexus", "confidence": 0.9},
        {"fact": "B", "category": "fact", "repo": "fleet-ops", "confidence": 0.8},
        {"fact": "C", "category": "fact", "repo": "global", "confidence": 0.7},
    ]
    filtered = filter_facts(facts, repo="the-nexus", include_global=True)
    texts = [f["fact"] for f in filtered]
    assert "A" in texts
    assert "B" not in texts
    assert "C" in texts
    print("PASS: filter_by_repo")


def test_filter_by_agent():
    """Filter facts by agent type."""
    facts = [
        {"fact": "A", "category": "pattern", "repo": "global", "agent": "mimo-sprint", "confidence": 0.8},
        {"fact": "B", "category": "pattern", "repo": "global", "agent": "groq-fast", "confidence": 0.7},
        {"fact": "C", "category": "fact", "repo": "global", "confidence": 0.9},
    ]
    filtered = filter_facts(facts, agent="mimo-sprint", include_global=True)
    texts = [f["fact"] for f in filtered]
    assert "A" in texts
    assert "B" not in texts
    assert "C" in texts  # global, no agent restriction
    print("PASS: filter_by_agent")


def test_no_global_flag():
    """Excluding global facts works."""
    facts = [
        {"fact": "A", "category": "fact", "repo": "the-nexus", "confidence": 0.9},
        {"fact": "B", "category": "fact", "repo": "global", "confidence": 0.8},
    ]
    filtered = filter_facts(facts, repo="the-nexus", include_global=False)
    texts = [f["fact"] for f in filtered]
    assert "A" in texts
    assert "B" not in texts
    print("PASS: no_global_flag")


def test_sort_by_confidence():
    """Facts sort by confidence descending."""
    facts = [
        {"fact": "low", "category": "fact", "repo": "global", "confidence": 0.3},
        {"fact": "high", "category": "fact", "repo": "global", "confidence": 0.95},
        {"fact": "mid", "category": "fact", "repo": "global", "confidence": 0.7},
    ]
    sorted_f = sort_facts(facts)
    assert sorted_f[0]["fact"] == "high"
    assert sorted_f[1]["fact"] == "mid"
    assert sorted_f[2]["fact"] == "low"
    print("PASS: sort_by_confidence")


def test_sort_pitfalls_first():
    """Pitfalls sort before facts at same confidence."""
    facts = [
        {"fact": "regular fact", "category": "fact", "repo": "global", "confidence": 0.8},
        {"fact": "danger pitfall", "category": "pitfall", "repo": "global", "confidence": 0.8},
    ]
    sorted_f = sort_facts(facts)
    assert sorted_f[0]["category"] == "pitfall"
    print("PASS: sort_pitfalls_first")


def test_truncate_to_tokens():
    """Truncation cuts at line boundary."""
    text = "line1\nline2\nline3\nline4\nline5\n"
    truncated = truncate_to_tokens(text, max_tokens=2)  # ~8 chars
    assert "line1" in truncated
    assert "truncated" in truncated.lower()
    print("PASS: truncate_to_tokens")


def test_estimate_tokens():
    """Token estimation is reasonable."""
    text = "a" * 400
    tokens = estimate_tokens(text)
    assert 90 <= tokens <= 110  # ~100 tokens
    print("PASS: estimate_tokens")


def test_build_full_context():
    """Full context with facts renders correctly."""
    facts = [
        {"fact": "API merges fail with 405", "category": "pitfall", "repo": "the-nexus", "confidence": 0.95},
        {"fact": "Has 50+ open PRs", "category": "fact", "repo": "the-nexus", "confidence": 0.9},
        {"fact": "Token at ~/.config/gitea/token", "category": "tool-quirk", "repo": "global", "confidence": 0.9},
        {"fact": "Check git remote -v first", "category": "pattern", "repo": "global", "confidence": 0.8},
    ]

    with tempfile.TemporaryDirectory() as tmp:
        tmp_dir = Path(tmp)
        index_path = make_index(facts, tmp_dir)

        # Create knowledge dirs
        for sub in ["repos", "agents", "global"]:
            (tmp_dir / sub).mkdir(exist_ok=True)

        context = build_bootstrap_context(
            repo="the-nexus",
            agent="mimo-sprint",
            include_global=True,
            index_path=index_path,
        )

        assert "What You Know" in context
        assert "PITFALLS" in context
        assert "API merges fail with 405" in context
        assert "the-nexus" in context
        assert "Token at" in context  # global fact included
        print("PASS: build_full_context")


def test_max_tokens_respected():
    """Output respects max_tokens limit."""
    # Generate lots of facts
    facts = [
        {"fact": f"Fact number {i} with some detail about things", "category": "fact", "repo": "global", "confidence": 0.8}
        for i in range(100)
    ]

    with tempfile.TemporaryDirectory() as tmp:
        tmp_dir = Path(tmp)
        index_path = make_index(facts, tmp_dir)
        for sub in ["repos", "agents", "global"]:
            (tmp_dir / sub).mkdir(exist_ok=True)

        context = build_bootstrap_context(
            repo=None,
            max_tokens=500,
            index_path=index_path,
        )

        actual_tokens = estimate_tokens(context)
        # Allow 10% overshoot since we cut at line boundaries
        assert actual_tokens <= 550, f"Expected ~500 tokens, got {actual_tokens}"
        print(f"PASS: max_tokens_respected (got {actual_tokens} tokens)")


def test_missing_index_graceful():
    """Missing index.json doesn't crash."""
    with tempfile.TemporaryDirectory() as tmp:
        tmp_dir = Path(tmp)
        # Don't create index.json
        for sub in ["repos", "agents", "global"]:
            (tmp_dir / sub).mkdir(exist_ok=True)

        fake_index = tmp_dir / "nonexistent.json"
        context = build_bootstrap_context(repo="anything", index_path=fake_index)
        assert "No relevant knowledge found" in context
        print("PASS: missing_index_graceful")


if __name__ == "__main__":
    tests = [
        test_empty_index,
        test_filter_by_repo,
        test_filter_by_agent,
        test_no_global_flag,
        test_sort_by_confidence,
        test_sort_pitfalls_first,
        test_truncate_to_tokens,
        test_estimate_tokens,
        test_build_full_context,
        test_max_tokens_respected,
        test_missing_index_graceful,
    ]

    passed = 0
    failed = 0
    for test in tests:
        try:
            test()
            passed += 1
        except Exception as e:
            print(f"FAIL: {test.__name__} — {e}")
            failed += 1

    print(f"\n{passed} passed, {failed} failed")
    sys.exit(0 if failed == 0 else 1)
feat: build bootstrapper.py - pre-session context assembler Assembles relevant knowledge from the store into a compact 2k-token context block for session injection. Features: - Filter by repo, agent type, and global scope - Sort by confidence (pitfalls first, patterns, facts) - Per-repo and per-agent markdown knowledge files - Graceful empty-store handling - JSON output mode for programmatic use - Token-count-aware truncation at line boundaries Closes #11 2026-04-14 14:05:30 -04:00			`#!/usr/bin/env python3`
			`"""`
			`Tests for bootstrapper.py — context assembly from knowledge store.`
			`"""`

			`import json`
			`import sys`
			`import tempfile`
			`from pathlib import Path`

			`# Add scripts dir to path for import`
			`sys.path.insert(0, str(Path(__file__).resolve().parent))`

			`from bootstrapper import (`
			`build_bootstrap_context,`
			`estimate_tokens,`
			`filter_facts,`
			`load_index,`
			`sort_facts,`
			`truncate_to_tokens,`
			`)`


			`def make_index(facts: list[dict], tmp_dir: Path) -> Path:`
			`"""Create a temporary index.json with given facts."""`
			`index = {`
			`"version": 1,`
			`"last_updated": "2026-04-13T20:00:00Z",`
			`"total_facts": len(facts),`
			`"facts": facts,`
			`}`
			`path = tmp_dir / "index.json"`
			`with open(path, "w") as f:`
			`json.dump(index, f)`
			`return path`


			`def test_empty_index():`
			`"""Empty knowledge store produces graceful output."""`
			`with tempfile.TemporaryDirectory() as tmp:`
			`tmp_dir = Path(tmp)`
			`index_path = make_index([], tmp_dir)`

			`# Create empty knowledge dirs`
			`for sub in ["repos", "agents", "global"]:`
			`(tmp_dir / sub).mkdir(exist_ok=True)`

			`context = build_bootstrap_context(`
			`repo="the-nexus", index_path=index_path`
			`)`
			`assert "No relevant knowledge found" in context`
			`assert "Starting fresh" in context`
			`print("PASS: empty_index")`


			`def test_filter_by_repo():`
			`"""Filter facts by repository."""`
			`facts = [`
			`{"fact": "A", "category": "fact", "repo": "the-nexus", "confidence": 0.9},`
			`{"fact": "B", "category": "fact", "repo": "fleet-ops", "confidence": 0.8},`
			`{"fact": "C", "category": "fact", "repo": "global", "confidence": 0.7},`
			`]`
			`filtered = filter_facts(facts, repo="the-nexus", include_global=True)`
			`texts = [f["fact"] for f in filtered]`
			`assert "A" in texts`
			`assert "B" not in texts`
			`assert "C" in texts`
			`print("PASS: filter_by_repo")`


			`def test_filter_by_agent():`
			`"""Filter facts by agent type."""`
			`facts = [`
			`{"fact": "A", "category": "pattern", "repo": "global", "agent": "mimo-sprint", "confidence": 0.8},`
			`{"fact": "B", "category": "pattern", "repo": "global", "agent": "groq-fast", "confidence": 0.7},`
			`{"fact": "C", "category": "fact", "repo": "global", "confidence": 0.9},`
			`]`
			`filtered = filter_facts(facts, agent="mimo-sprint", include_global=True)`
			`texts = [f["fact"] for f in filtered]`
			`assert "A" in texts`
			`assert "B" not in texts`
			`assert "C" in texts # global, no agent restriction`
			`print("PASS: filter_by_agent")`


			`def test_no_global_flag():`
			`"""Excluding global facts works."""`
			`facts = [`
			`{"fact": "A", "category": "fact", "repo": "the-nexus", "confidence": 0.9},`
			`{"fact": "B", "category": "fact", "repo": "global", "confidence": 0.8},`
			`]`
			`filtered = filter_facts(facts, repo="the-nexus", include_global=False)`
			`texts = [f["fact"] for f in filtered]`
			`assert "A" in texts`
			`assert "B" not in texts`
			`print("PASS: no_global_flag")`


			`def test_sort_by_confidence():`
			`"""Facts sort by confidence descending."""`
			`facts = [`
			`{"fact": "low", "category": "fact", "repo": "global", "confidence": 0.3},`
			`{"fact": "high", "category": "fact", "repo": "global", "confidence": 0.95},`
			`{"fact": "mid", "category": "fact", "repo": "global", "confidence": 0.7},`
			`]`
			`sorted_f = sort_facts(facts)`
			`assert sorted_f[0]["fact"] == "high"`
			`assert sorted_f[1]["fact"] == "mid"`
			`assert sorted_f[2]["fact"] == "low"`
			`print("PASS: sort_by_confidence")`


			`def test_sort_pitfalls_first():`
			`"""Pitfalls sort before facts at same confidence."""`
			`facts = [`
			`{"fact": "regular fact", "category": "fact", "repo": "global", "confidence": 0.8},`
			`{"fact": "danger pitfall", "category": "pitfall", "repo": "global", "confidence": 0.8},`
			`]`
			`sorted_f = sort_facts(facts)`
			`assert sorted_f[0]["category"] == "pitfall"`
			`print("PASS: sort_pitfalls_first")`


			`def test_truncate_to_tokens():`
			`"""Truncation cuts at line boundary."""`
			`text = "line1\nline2\nline3\nline4\nline5\n"`
			`truncated = truncate_to_tokens(text, max_tokens=2) # ~8 chars`
			`assert "line1" in truncated`
			`assert "truncated" in truncated.lower()`
			`print("PASS: truncate_to_tokens")`


			`def test_estimate_tokens():`
			`"""Token estimation is reasonable."""`
			`text = "a" * 400`
			`tokens = estimate_tokens(text)`
			`assert 90 <= tokens <= 110 # ~100 tokens`
			`print("PASS: estimate_tokens")`


			`def test_build_full_context():`
			`"""Full context with facts renders correctly."""`
			`facts = [`
			`{"fact": "API merges fail with 405", "category": "pitfall", "repo": "the-nexus", "confidence": 0.95},`
			`{"fact": "Has 50+ open PRs", "category": "fact", "repo": "the-nexus", "confidence": 0.9},`
			`{"fact": "Token at ~/.config/gitea/token", "category": "tool-quirk", "repo": "global", "confidence": 0.9},`
			`{"fact": "Check git remote -v first", "category": "pattern", "repo": "global", "confidence": 0.8},`
			`]`

			`with tempfile.TemporaryDirectory() as tmp:`
			`tmp_dir = Path(tmp)`
			`index_path = make_index(facts, tmp_dir)`

			`# Create knowledge dirs`
			`for sub in ["repos", "agents", "global"]:`
			`(tmp_dir / sub).mkdir(exist_ok=True)`

			`context = build_bootstrap_context(`
			`repo="the-nexus",`
			`agent="mimo-sprint",`
			`include_global=True,`
			`index_path=index_path,`
			`)`

			`assert "What You Know" in context`
			`assert "PITFALLS" in context`
			`assert "API merges fail with 405" in context`
			`assert "the-nexus" in context`
			`assert "Token at" in context # global fact included`
			`print("PASS: build_full_context")`


			`def test_max_tokens_respected():`
			`"""Output respects max_tokens limit."""`
			`# Generate lots of facts`
			`facts = [`
			`{"fact": f"Fact number {i} with some detail about things", "category": "fact", "repo": "global", "confidence": 0.8}`
			`for i in range(100)`
			`]`

			`with tempfile.TemporaryDirectory() as tmp:`
			`tmp_dir = Path(tmp)`
			`index_path = make_index(facts, tmp_dir)`
			`for sub in ["repos", "agents", "global"]:`
			`(tmp_dir / sub).mkdir(exist_ok=True)`

			`context = build_bootstrap_context(`
			`repo=None,`
			`max_tokens=500,`
			`index_path=index_path,`
			`)`

			`actual_tokens = estimate_tokens(context)`
			`# Allow 10% overshoot since we cut at line boundaries`
			`assert actual_tokens <= 550, f"Expected ~500 tokens, got {actual_tokens}"`
			`print(f"PASS: max_tokens_respected (got {actual_tokens} tokens)")`


			`def test_missing_index_graceful():`
			`"""Missing index.json doesn't crash."""`
			`with tempfile.TemporaryDirectory() as tmp:`
			`tmp_dir = Path(tmp)`
			`# Don't create index.json`
			`for sub in ["repos", "agents", "global"]:`
			`(tmp_dir / sub).mkdir(exist_ok=True)`

			`fake_index = tmp_dir / "nonexistent.json"`
			`context = build_bootstrap_context(repo="anything", index_path=fake_index)`
			`assert "No relevant knowledge found" in context`
			`print("PASS: missing_index_graceful")`


			`if __name__ == "__main__":`
			`tests = [`
			`test_empty_index,`
			`test_filter_by_repo,`
			`test_filter_by_agent,`
			`test_no_global_flag,`
			`test_sort_by_confidence,`
			`test_sort_pitfalls_first,`
			`test_truncate_to_tokens,`
			`test_estimate_tokens,`
			`test_build_full_context,`
			`test_max_tokens_respected,`
			`test_missing_index_graceful,`
			`]`

			`passed = 0`
			`failed = 0`
			`for test in tests:`
			`try:`
			`test()`
			`passed += 1`
			`except Exception as e:`
			`print(f"FAIL: {test.__name__} — {e}")`
			`failed += 1`

			`print(f"\n{passed} passed, {failed} failed")`
			`sys.exit(0 if failed == 0 else 1)`