compounding-intelligence/scripts/test_bootstrapper.py

#!/usr/bin/env python3
"""
Tests for bootstrapper.py — context assembly from knowledge store.
"""

import json
import sys
import tempfile
from pathlib import Path

# Add scripts dir to path for import
sys.path.insert(0, str(Path(__file__).resolve().parent))

from bootstrapper import (
    build_bootstrap_context,
    estimate_tokens,
    filter_facts,
    load_index,
    sort_facts,
    truncate_to_tokens,
)


def make_index(facts: list[dict], tmp_dir: Path) -> Path:
    """Create a temporary index.json with given facts."""
    index = {
        "version": 1,
        "last_updated": "2026-04-13T20:00:00Z",
        "total_facts": len(facts),
        "facts": facts,
    }
    path = tmp_dir / "index.json"
    with open(path, "w") as f:
        json.dump(index, f)
    return path


def test_empty_index():
    """Empty knowledge store produces graceful output."""
    with tempfile.TemporaryDirectory() as tmp:
        tmp_dir = Path(tmp)
        index_path = make_index([], tmp_dir)

        # Create empty knowledge dirs
        for sub in ["repos", "agents", "global"]:
            (tmp_dir / sub).mkdir(exist_ok=True)

        context = build_bootstrap_context(
            repo="the-nexus", index_path=index_path
        )
        assert "No relevant knowledge found" in context
        assert "Starting fresh" in context
        print("PASS: empty_index")


def test_filter_by_repo():
    """Filter facts by repository."""
    facts = [
        {"fact": "A", "category": "fact", "repo": "the-nexus", "confidence": 0.9},
        {"fact": "B", "category": "fact", "repo": "fleet-ops", "confidence": 0.8},
        {"fact": "C", "category": "fact", "repo": "global", "confidence": 0.7},
    ]
    filtered = filter_facts(facts, repo="the-nexus", include_global=True)
    texts = [f["fact"] for f in filtered]
    assert "A" in texts
    assert "B" not in texts
    assert "C" in texts
    print("PASS: filter_by_repo")


def test_filter_by_agent():
    """Filter facts by agent type."""
    facts = [
        {"fact": "A", "category": "pattern", "repo": "global", "agent": "mimo-sprint", "confidence": 0.8},
        {"fact": "B", "category": "pattern", "repo": "global", "agent": "groq-fast", "confidence": 0.7},
        {"fact": "C", "category": "fact", "repo": "global", "confidence": 0.9},
    ]
    filtered = filter_facts(facts, agent="mimo-sprint", include_global=True)
    texts = [f["fact"] for f in filtered]
    assert "A" in texts
    assert "B" not in texts
    assert "C" in texts  # global, no agent restriction
    print("PASS: filter_by_agent")


def test_no_global_flag():
    """Excluding global facts works."""
    facts = [
        {"fact": "A", "category": "fact", "repo": "the-nexus", "confidence": 0.9},
        {"fact": "B", "category": "fact", "repo": "global", "confidence": 0.8},
    ]
    filtered = filter_facts(facts, repo="the-nexus", include_global=False)
    texts = [f["fact"] for f in filtered]
    assert "A" in texts
    assert "B" not in texts
    print("PASS: no_global_flag")


def test_sort_by_confidence():
    """Facts sort by confidence descending."""
    facts = [
        {"fact": "low", "category": "fact", "repo": "global", "confidence": 0.3},
        {"fact": "high", "category": "fact", "repo": "global", "confidence": 0.95},
        {"fact": "mid", "category": "fact", "repo": "global", "confidence": 0.7},
    ]
    sorted_f = sort_facts(facts)
    assert sorted_f[0]["fact"] == "high"
    assert sorted_f[1]["fact"] == "mid"
    assert sorted_f[2]["fact"] == "low"
    print("PASS: sort_by_confidence")


def test_sort_pitfalls_first():
    """Pitfalls sort before facts at same confidence."""
    facts = [
        {"fact": "regular fact", "category": "fact", "repo": "global", "confidence": 0.8},
        {"fact": "danger pitfall", "category": "pitfall", "repo": "global", "confidence": 0.8},
    ]
    sorted_f = sort_facts(facts)
    assert sorted_f[0]["category"] == "pitfall"
    print("PASS: sort_pitfalls_first")


def test_truncate_to_tokens():
    """Truncation cuts at line boundary."""
    text = "line1\nline2\nline3\nline4\nline5\n"
    truncated = truncate_to_tokens(text, max_tokens=2)  # ~8 chars
    assert "line1" in truncated
    assert "truncated" in truncated.lower()
    print("PASS: truncate_to_tokens")


def test_estimate_tokens():
    """Token estimation is reasonable."""
    text = "a" * 400
    tokens = estimate_tokens(text)
    assert 90 <= tokens <= 110  # ~100 tokens
    print("PASS: estimate_tokens")


def test_build_full_context():
    """Full context with facts renders correctly."""
    facts = [
        {"fact": "API merges fail with 405", "category": "pitfall", "repo": "the-nexus", "confidence": 0.95},
        {"fact": "Has 50+ open PRs", "category": "fact", "repo": "the-nexus", "confidence": 0.9},
        {"fact": "Token at ~/.config/gitea/token", "category": "tool-quirk", "repo": "global", "confidence": 0.9},
        {"fact": "Check git remote -v first", "category": "pattern", "repo": "global", "confidence": 0.8},
    ]

    with tempfile.TemporaryDirectory() as tmp:
        tmp_dir = Path(tmp)
        index_path = make_index(facts, tmp_dir)

        # Create knowledge dirs
        for sub in ["repos", "agents", "global"]:
            (tmp_dir / sub).mkdir(exist_ok=True)

        context = build_bootstrap_context(
            repo="the-nexus",
            agent="mimo-sprint",
            include_global=True,
            index_path=index_path,
        )

        assert "What You Know" in context
        assert "PITFALLS" in context
        assert "API merges fail with 405" in context
        assert "the-nexus" in context
        assert "Token at" in context  # global fact included
        print("PASS: build_full_context")


def test_max_tokens_respected():
    """Output respects max_tokens limit."""
    # Generate lots of facts
    facts = [
        {"fact": f"Fact number {i} with some detail about things", "category": "fact", "repo": "global", "confidence": 0.8}
        for i in range(100)
    ]

    with tempfile.TemporaryDirectory() as tmp:
        tmp_dir = Path(tmp)
        index_path = make_index(facts, tmp_dir)
        for sub in ["repos", "agents", "global"]:
            (tmp_dir / sub).mkdir(exist_ok=True)

        context = build_bootstrap_context(
            repo=None,
            max_tokens=500,
            index_path=index_path,
        )

        actual_tokens = estimate_tokens(context)
        # Allow 10% overshoot since we cut at line boundaries
        assert actual_tokens <= 550, f"Expected ~500 tokens, got {actual_tokens}"
        print(f"PASS: max_tokens_respected (got {actual_tokens} tokens)")


def test_missing_index_graceful():
    """Missing index.json doesn't crash."""
    with tempfile.TemporaryDirectory() as tmp:
        tmp_dir = Path(tmp)
        # Don't create index.json
        for sub in ["repos", "agents", "global"]:
            (tmp_dir / sub).mkdir(exist_ok=True)

        fake_index = tmp_dir / "nonexistent.json"
        context = build_bootstrap_context(repo="anything", index_path=fake_index)
        assert "No relevant knowledge found" in context
        print("PASS: missing_index_graceful")


if __name__ == "__main__":
    tests = [
        test_empty_index,
        test_filter_by_repo,
        test_filter_by_agent,
        test_no_global_flag,
        test_sort_by_confidence,
        test_sort_pitfalls_first,
        test_truncate_to_tokens,
        test_estimate_tokens,
        test_build_full_context,
        test_max_tokens_respected,
        test_missing_index_graceful,
    ]

    passed = 0
    failed = 0
    for test in tests:
        try:
            test()
            passed += 1
        except Exception as e:
            print(f"FAIL: {test.__name__} — {e}")
            failed += 1

    print(f"\n{passed} passed, {failed} failed")
    sys.exit(0 if failed == 0 else 1)