283 lines
9.7 KiB
Python
283 lines
9.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Tests for scripts/improvement_proposals.py — 15 tests."""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
|
|
sys.path.insert(0, os.path.dirname(__file__) or ".")
|
|
import importlib.util
|
|
|
|
spec = importlib.util.spec_from_file_location(
|
|
"ip", os.path.join(os.path.dirname(__file__) or ".", "improvement_proposals.py")
|
|
)
|
|
mod = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(mod)
|
|
|
|
analyze_sessions = mod.analyze_sessions
|
|
generate_proposals = mod.generate_proposals
|
|
format_proposals_markdown = mod.format_proposals_markdown
|
|
format_proposals_json = mod.format_proposals_json
|
|
_normalize_error = mod._normalize_error
|
|
|
|
|
|
# ── Helper to build test sessions ─────────────────────────────
|
|
|
|
def _make_session(session_id, repo="test-repo", errors=None, tool_calls=None, duration=30):
|
|
return {
|
|
"session_id": session_id,
|
|
"repo": repo,
|
|
"errors": [{"message": e} for e in (errors or [])],
|
|
"tool_calls": tool_calls or [],
|
|
"duration_minutes": duration,
|
|
"messages": [],
|
|
}
|
|
|
|
|
|
def _make_tool_calls(repeats):
|
|
"""Create tool call list with repeated tools."""
|
|
calls = []
|
|
for tool, count in repeats:
|
|
for _ in range(count):
|
|
calls.append({"tool": tool, "latency_ms": 100})
|
|
return calls
|
|
|
|
|
|
# ── Tests ─────────────────────────────────────────────────────
|
|
|
|
def test_empty_sessions():
|
|
patterns = analyze_sessions([])
|
|
assert patterns == []
|
|
print("PASS: test_empty_sessions")
|
|
|
|
|
|
def test_no_patterns_on_clean_sessions():
|
|
sessions = [
|
|
_make_session("s1", tool_calls=[{"tool": "read_file", "latency_ms": 50}]),
|
|
_make_session("s2", tool_calls=[{"tool": "write_file", "latency_ms": 80}]),
|
|
]
|
|
patterns = analyze_sessions(sessions)
|
|
# No repeated errors, no slow tools, no retries
|
|
assert len(patterns) == 0
|
|
print("PASS: test_no_patterns_on_clean_sessions")
|
|
|
|
|
|
def test_repeated_error_detection():
|
|
"""Same error across 3+ sessions triggers pattern."""
|
|
sessions = [
|
|
_make_session(f"s{i}", errors=["ModuleNotFoundError: No module named bannerlord_trace"])
|
|
for i in range(4)
|
|
]
|
|
patterns = analyze_sessions(sessions)
|
|
repeated = [p for p in patterns if p.pattern_type == "repeated_error"]
|
|
assert len(repeated) == 1
|
|
assert repeated[0].occurrences == 4
|
|
print("PASS: test_repeated_error_detection")
|
|
|
|
|
|
def test_repeated_error_threshold():
|
|
"""2 occurrences should NOT trigger (threshold is 3)."""
|
|
sessions = [
|
|
_make_session("s1", errors=["TimeoutError: connection timed out"]),
|
|
_make_session("s2", errors=["TimeoutError: connection timed out"]),
|
|
]
|
|
patterns = analyze_sessions(sessions)
|
|
repeated = [p for p in patterns if p.pattern_type == "repeated_error"]
|
|
assert len(repeated) == 0
|
|
print("PASS: test_repeated_error_threshold")
|
|
|
|
|
|
def test_slow_tool_detection():
|
|
"""Tool with avg latency > 5000ms across 5+ calls."""
|
|
calls = [{"tool": "git_push", "latency_ms": 8000} for _ in range(10)]
|
|
sessions = [_make_session("s1", tool_calls=calls)]
|
|
patterns = analyze_sessions(sessions)
|
|
slow = [p for p in patterns if p.pattern_type == "slow_tool"]
|
|
assert len(slow) == 1
|
|
assert "git_push" in slow[0].description
|
|
print("PASS: test_slow_tool_detection")
|
|
|
|
|
|
def test_fast_tool_not_flagged():
|
|
"""Tool under 5000ms avg should not trigger."""
|
|
calls = [{"tool": "read_file", "latency_ms": 50} for _ in range(10)]
|
|
sessions = [_make_session("s1", tool_calls=calls)]
|
|
patterns = analyze_sessions(sessions)
|
|
slow = [p for p in patterns if p.pattern_type == "slow_tool"]
|
|
assert len(slow) == 0
|
|
print("PASS: test_fast_tool_not_flagged")
|
|
|
|
|
|
def test_failed_retry_detection():
|
|
"""3+ consecutive calls to same tool triggers retry pattern."""
|
|
calls = _make_tool_calls([("execute_code", 5)])
|
|
sessions = [_make_session("s1", tool_calls=calls)]
|
|
sessions.extend([
|
|
_make_session(f"s{i}", tool_calls=_make_tool_calls([("execute_code", 4)]))
|
|
for i in range(2, 5)
|
|
])
|
|
patterns = analyze_sessions(sessions)
|
|
retries = [p for p in patterns if p.pattern_type == "failed_retry"]
|
|
assert len(retries) >= 1
|
|
print("PASS: test_failed_retry_detection")
|
|
|
|
|
|
def test_manual_process_detection():
|
|
"""10+ tool calls with <= 3 unique tools."""
|
|
calls = _make_tool_calls([("terminal", 8), ("read_file", 5)])
|
|
sessions = [_make_session("s1", tool_calls=calls, duration=25)]
|
|
patterns = analyze_sessions(sessions)
|
|
manual = [p for p in patterns if p.pattern_type == "manual_process"]
|
|
assert len(manual) == 1
|
|
print("PASS: test_manual_process_detection")
|
|
|
|
|
|
def test_generate_proposals_from_patterns():
|
|
"""Proposals generated from waste patterns."""
|
|
sessions = [
|
|
_make_session(f"s{i}", errors=["Error: push timeout"])
|
|
for i in range(5)
|
|
]
|
|
patterns = analyze_sessions(sessions)
|
|
proposals = generate_proposals(patterns)
|
|
assert len(proposals) >= 1
|
|
assert proposals[0].estimated_monthly_hours_saved > 0
|
|
assert proposals[0].priority in ("critical", "high", "medium", "low")
|
|
print("PASS: test_generate_proposals_from_patterns")
|
|
|
|
|
|
def test_proposal_roi_positive():
|
|
"""ROI weeks should be a positive number for recoverable time."""
|
|
patterns = [mod.WastePattern(
|
|
pattern_type="repeated_error",
|
|
description="Test error",
|
|
occurrences=10,
|
|
total_time_hours=5.0,
|
|
affected_repos=["test"],
|
|
)]
|
|
proposals = generate_proposals(patterns)
|
|
assert len(proposals) == 1
|
|
assert proposals[0].roi_weeks > 0
|
|
assert proposals[0].roi_weeks < 100
|
|
print("PASS: test_proposal_roi_positive")
|
|
|
|
|
|
def test_proposals_sorted_by_impact():
|
|
"""Proposals should be sorted by monthly hours saved (descending)."""
|
|
sessions = [
|
|
_make_session("s1", errors=["Minor warning"] * 3, duration=5),
|
|
_make_session("s2", errors=["Critical failure: deploy crashed"] * 5, duration=60),
|
|
]
|
|
# Add more sessions to cross threshold
|
|
for i in range(3, 7):
|
|
sessions.append(_make_session(f"s{i}", errors=["Critical failure: deploy crashed"]))
|
|
|
|
patterns = analyze_sessions(sessions)
|
|
proposals = generate_proposals(patterns)
|
|
if len(proposals) >= 2:
|
|
for i in range(len(proposals) - 1):
|
|
assert proposals[i].estimated_monthly_hours_saved >= proposals[i + 1].estimated_monthly_hours_saved
|
|
print("PASS: test_proposals_sorted_by_impact")
|
|
|
|
|
|
def test_format_markdown():
|
|
"""Markdown output should contain expected sections."""
|
|
patterns = [mod.WastePattern(
|
|
pattern_type="repeated_error", description="Test", occurrences=5,
|
|
total_time_hours=2.5, affected_repos=["repo"],
|
|
)]
|
|
proposals = generate_proposals(patterns)
|
|
md = format_proposals_markdown(proposals, patterns, "2026-04-15T00:00:00Z")
|
|
assert "# Improvement Proposals" in md
|
|
assert "## Summary" in md
|
|
assert "### Problem" in md
|
|
assert "### ROI Estimate" in md
|
|
assert "## Appendix" in md
|
|
print("PASS: test_format_markdown")
|
|
|
|
|
|
def test_format_json():
|
|
"""JSON output should be valid and parseable."""
|
|
patterns = [mod.WastePattern(
|
|
pattern_type="slow_tool", description="Slow", occurrences=10,
|
|
total_time_hours=3.0, affected_repos=["global"],
|
|
)]
|
|
proposals = generate_proposals(patterns)
|
|
output = format_proposals_json(proposals)
|
|
parsed = json.loads(output)
|
|
assert isinstance(parsed, list)
|
|
assert len(parsed) == len(proposals)
|
|
assert "title" in parsed[0]
|
|
assert "roi_weeks" in parsed[0]
|
|
print("PASS: test_format_json")
|
|
|
|
|
|
def test_normalize_error():
|
|
"""Error normalization should remove paths and hashes."""
|
|
err1 = _normalize_error("Failed to clone /Users/apayne/repo with token abc123def456")
|
|
assert "/PATH" in err1
|
|
assert "HASH" in err1
|
|
assert "/Users/apayne" not in err1
|
|
|
|
err2 = _normalize_error("")
|
|
assert err2 == ""
|
|
|
|
err3 = _normalize_error("Simple error message")
|
|
assert "simple error" in err3
|
|
print("PASS: test_normalize_error")
|
|
|
|
|
|
def test_cli_integration():
|
|
"""End-to-end test: write input JSON, run script, check output."""
|
|
import subprocess
|
|
|
|
sessions = [
|
|
_make_session(f"s{i}", errors=["Connection refused: port 8080"])
|
|
for i in range(5)
|
|
]
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
input_path = os.path.join(tmpdir, "analytics.json")
|
|
output_path = os.path.join(tmpdir, "proposals.md")
|
|
|
|
with open(input_path, "w") as f:
|
|
json.dump({"sessions": sessions}, f)
|
|
|
|
script = os.path.join(os.path.dirname(__file__) or ".", "improvement_proposals.py")
|
|
result = subprocess.run(
|
|
[sys.executable, script, "--input", input_path, "--output", output_path],
|
|
capture_output=True, text=True, timeout=10,
|
|
)
|
|
|
|
assert result.returncode == 0, f"CLI failed: {result.stderr}"
|
|
assert os.path.exists(output_path)
|
|
|
|
with open(output_path) as f:
|
|
content = f.read()
|
|
assert "# Improvement Proposals" in content
|
|
print("PASS: test_cli_integration")
|
|
|
|
|
|
def run_all():
|
|
test_empty_sessions()
|
|
test_no_patterns_on_clean_sessions()
|
|
test_repeated_error_detection()
|
|
test_repeated_error_threshold()
|
|
test_slow_tool_detection()
|
|
test_fast_tool_not_flagged()
|
|
test_failed_retry_detection()
|
|
test_manual_process_detection()
|
|
test_generate_proposals_from_patterns()
|
|
test_proposal_roi_positive()
|
|
test_proposals_sorted_by_impact()
|
|
test_format_markdown()
|
|
test_format_json()
|
|
test_normalize_error()
|
|
test_cli_integration()
|
|
print("\nAll 15 tests passed!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run_all()
|