compounding-intelligence/scripts/test_improvement_proposals.py

#!/usr/bin/env python3
"""Tests for scripts/improvement_proposals.py — 15 tests."""

import json
import os
import sys
import tempfile

sys.path.insert(0, os.path.dirname(__file__) or ".")
import importlib.util

spec = importlib.util.spec_from_file_location(
    "ip", os.path.join(os.path.dirname(__file__) or ".", "improvement_proposals.py")
)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)

analyze_sessions = mod.analyze_sessions
generate_proposals = mod.generate_proposals
format_proposals_markdown = mod.format_proposals_markdown
format_proposals_json = mod.format_proposals_json
_normalize_error = mod._normalize_error


# ── Helper to build test sessions ─────────────────────────────

def _make_session(session_id, repo="test-repo", errors=None, tool_calls=None, duration=30):
    return {
        "session_id": session_id,
        "repo": repo,
        "errors": [{"message": e} for e in (errors or [])],
        "tool_calls": tool_calls or [],
        "duration_minutes": duration,
        "messages": [],
    }


def _make_tool_calls(repeats):
    """Create tool call list with repeated tools."""
    calls = []
    for tool, count in repeats:
        for _ in range(count):
            calls.append({"tool": tool, "latency_ms": 100})
    return calls


# ── Tests ─────────────────────────────────────────────────────

def test_empty_sessions():
    """Verifies behavior with empty or None input."""
    patterns = analyze_sessions([])
    assert patterns == []
    print("PASS: test_empty_sessions")


def test_no_patterns_on_clean_sessions():
    """Verifies no patterns on clean sessions logic."""
    sessions = [
        _make_session("s1", tool_calls=[{"tool": "read_file", "latency_ms": 50}]),
        _make_session("s2", tool_calls=[{"tool": "write_file", "latency_ms": 80}]),
    ]
    patterns = analyze_sessions(sessions)
    # No repeated errors, no slow tools, no retries
    assert len(patterns) == 0
    print("PASS: test_no_patterns_on_clean_sessions")


def test_repeated_error_detection():
    """Same error across 3+ sessions triggers pattern."""
    sessions = [
        _make_session(f"s{i}", errors=["ModuleNotFoundError: No module named bannerlord_trace"])
        for i in range(4)
    ]
    patterns = analyze_sessions(sessions)
    repeated = [p for p in patterns if p.pattern_type == "repeated_error"]
    assert len(repeated) == 1
    assert repeated[0].occurrences == 4
    print("PASS: test_repeated_error_detection")


def test_repeated_error_threshold():
    """2 occurrences should NOT trigger (threshold is 3)."""
    sessions = [
        _make_session("s1", errors=["TimeoutError: connection timed out"]),
        _make_session("s2", errors=["TimeoutError: connection timed out"]),
    ]
    patterns = analyze_sessions(sessions)
    repeated = [p for p in patterns if p.pattern_type == "repeated_error"]
    assert len(repeated) == 0
    print("PASS: test_repeated_error_threshold")


def test_slow_tool_detection():
    """Tool with avg latency > 5000ms across 5+ calls."""
    calls = [{"tool": "git_push", "latency_ms": 8000} for _ in range(10)]
    sessions = [_make_session("s1", tool_calls=calls)]
    patterns = analyze_sessions(sessions)
    slow = [p for p in patterns if p.pattern_type == "slow_tool"]
    assert len(slow) == 1
    assert "git_push" in slow[0].description
    print("PASS: test_slow_tool_detection")


def test_fast_tool_not_flagged():
    """Tool under 5000ms avg should not trigger."""
    calls = [{"tool": "read_file", "latency_ms": 50} for _ in range(10)]
    sessions = [_make_session("s1", tool_calls=calls)]
    patterns = analyze_sessions(sessions)
    slow = [p for p in patterns if p.pattern_type == "slow_tool"]
    assert len(slow) == 0
    print("PASS: test_fast_tool_not_flagged")


def test_failed_retry_detection():
    """3+ consecutive calls to same tool triggers retry pattern."""
    calls = _make_tool_calls([("execute_code", 5)])
    sessions = [_make_session("s1", tool_calls=calls)]
    sessions.extend([
        _make_session(f"s{i}", tool_calls=_make_tool_calls([("execute_code", 4)]))
        for i in range(2, 5)
    ])
    patterns = analyze_sessions(sessions)
    retries = [p for p in patterns if p.pattern_type == "failed_retry"]
    assert len(retries) >= 1
    print("PASS: test_failed_retry_detection")


def test_manual_process_detection():
    """10+ tool calls with <= 3 unique tools."""
    calls = _make_tool_calls([("terminal", 8), ("read_file", 5)])
    sessions = [_make_session("s1", tool_calls=calls, duration=25)]
    patterns = analyze_sessions(sessions)
    manual = [p for p in patterns if p.pattern_type == "manual_process"]
    assert len(manual) == 1
    print("PASS: test_manual_process_detection")


def test_generate_proposals_from_patterns():
    """Proposals generated from waste patterns."""
    sessions = [
        _make_session(f"s{i}", errors=["Error: push timeout"])
        for i in range(5)
    ]
    patterns = analyze_sessions(sessions)
    proposals = generate_proposals(patterns)
    assert len(proposals) >= 1
    assert proposals[0].estimated_monthly_hours_saved > 0
    assert proposals[0].priority in ("critical", "high", "medium", "low")
    print("PASS: test_generate_proposals_from_patterns")


def test_proposal_roi_positive():
    """ROI weeks should be a positive number for recoverable time."""
    patterns = [mod.WastePattern(
        pattern_type="repeated_error",
        description="Test error",
        occurrences=10,
        total_time_hours=5.0,
        affected_repos=["test"],
    )]
    proposals = generate_proposals(patterns)
    assert len(proposals) == 1
    assert proposals[0].roi_weeks > 0
    assert proposals[0].roi_weeks < 100
    print("PASS: test_proposal_roi_positive")


def test_proposals_sorted_by_impact():
    """Proposals should be sorted by monthly hours saved (descending)."""
    sessions = [
        _make_session("s1", errors=["Minor warning"] * 3, duration=5),
        _make_session("s2", errors=["Critical failure: deploy crashed"] * 5, duration=60),
    ]
    # Add more sessions to cross threshold
    for i in range(3, 7):
        sessions.append(_make_session(f"s{i}", errors=["Critical failure: deploy crashed"]))

    patterns = analyze_sessions(sessions)
    proposals = generate_proposals(patterns)
    if len(proposals) >= 2:
        for i in range(len(proposals) - 1):
            assert proposals[i].estimated_monthly_hours_saved >= proposals[i + 1].estimated_monthly_hours_saved
    print("PASS: test_proposals_sorted_by_impact")


def test_format_markdown():
    """Markdown output should contain expected sections."""
    patterns = [mod.WastePattern(
        pattern_type="repeated_error", description="Test", occurrences=5,
        total_time_hours=2.5, affected_repos=["repo"],
    )]
    proposals = generate_proposals(patterns)
    md = format_proposals_markdown(proposals, patterns, "2026-04-15T00:00:00Z")
    assert "# Improvement Proposals" in md
    assert "## Summary" in md
    assert "### Problem" in md
    assert "### ROI Estimate" in md
    assert "## Appendix" in md
    print("PASS: test_format_markdown")


def test_format_json():
    """JSON output should be valid and parseable."""
    patterns = [mod.WastePattern(
        pattern_type="slow_tool", description="Slow", occurrences=10,
        total_time_hours=3.0, affected_repos=["global"],
    )]
    proposals = generate_proposals(patterns)
    output = format_proposals_json(proposals)
    parsed = json.loads(output)
    assert isinstance(parsed, list)
    assert len(parsed) == len(proposals)
    assert "title" in parsed[0]
    assert "roi_weeks" in parsed[0]
    print("PASS: test_format_json")


def test_normalize_error():
    """Error normalization should remove paths and hashes."""
    err1 = _normalize_error("Failed to clone /Users/apayne/repo with token abc123def456")
    assert "/PATH" in err1
    assert "HASH" in err1
    assert "/Users/apayne" not in err1

    err2 = _normalize_error("")
    assert err2 == ""

    err3 = _normalize_error("Simple error message")
    assert "simple error" in err3
    print("PASS: test_normalize_error")


def test_cli_integration():
    """End-to-end test: write input JSON, run script, check output."""
    import subprocess

    sessions = [
        _make_session(f"s{i}", errors=["Connection refused: port 8080"])
        for i in range(5)
    ]

    with tempfile.TemporaryDirectory() as tmpdir:
        input_path = os.path.join(tmpdir, "analytics.json")
        output_path = os.path.join(tmpdir, "proposals.md")

        with open(input_path, "w") as f:
            json.dump({"sessions": sessions}, f)

        script = os.path.join(os.path.dirname(__file__) or ".", "improvement_proposals.py")
        result = subprocess.run(
            [sys.executable, script, "--input", input_path, "--output", output_path],
            capture_output=True, text=True, timeout=10,
        )

        assert result.returncode == 0, f"CLI failed: {result.stderr}"
        assert os.path.exists(output_path)

        with open(output_path) as f:
            content = f.read()
        assert "# Improvement Proposals" in content
        print("PASS: test_cli_integration")


def run_all():
    test_empty_sessions()
    test_no_patterns_on_clean_sessions()
    test_repeated_error_detection()
    test_repeated_error_threshold()
    test_slow_tool_detection()
    test_fast_tool_not_flagged()
    test_failed_retry_detection()
    test_manual_process_detection()
    test_generate_proposals_from_patterns()
    test_proposal_roi_positive()
    test_proposals_sorted_by_impact()
    test_format_markdown()
    test_format_json()
    test_normalize_error()
    test_cli_integration()
    print("\nAll 15 tests passed!")


if __name__ == "__main__":
    run_all()