This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/tests/timmy/test_loop_qa.py

444 lines
14 KiB
Python
Raw Normal View History

"""Tests for timmy.loop_qa — capability self-test framework.
TDD: these tests are written before the implementation. They validate:
- Capability enum and status mapping
- Six self-test probes (T1T6)
- Round-robin orchestrator with throttling
- Failure counter logic and upgrade proposal filing
- Health snapshot derivation
"""
from datetime import UTC, datetime
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Model tests
# ---------------------------------------------------------------------------
def test_capability_enum_has_all_members():
"""Capability StrEnum should have exactly 6 members."""
from timmy.loop_qa import Capability
expected = {
"tool_use",
"multistep_planning",
"memory_read",
"memory_write",
"self_coding",
"lightning_econ",
}
assert {c.value for c in Capability} == expected
def test_status_for_failures_mapping():
"""green for 01, yellow for 2, red for >= threshold."""
from timmy.loop_qa import LoopQAOrchestrator
assert LoopQAOrchestrator.status_for_failures(0) == "green"
assert LoopQAOrchestrator.status_for_failures(1) == "green"
assert LoopQAOrchestrator.status_for_failures(2) == "yellow"
assert LoopQAOrchestrator.status_for_failures(3) == "red"
assert LoopQAOrchestrator.status_for_failures(10) == "red"
def test_probe_registry_has_six_entries():
"""The test sequence should cover all 6 capabilities."""
from timmy.loop_qa import TEST_SEQUENCE, Capability
capabilities_covered = {cap for cap, _ in TEST_SEQUENCE}
assert capabilities_covered == set(Capability)
assert len(TEST_SEQUENCE) == 6
# ---------------------------------------------------------------------------
# Self-test probe tests (T1T6)
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_t1_tool_use_success():
"""T1 should succeed when shell_hand.run returns non-empty stdout."""
from timmy.loop_qa import Capability, probe_tool_use
mock_result = MagicMock(success=True, stdout="file1.py\nfile2.py\n")
with patch("timmy.loop_qa._get_shell_hand") as mock_get:
mock_hand = AsyncMock()
mock_hand.run = AsyncMock(return_value=mock_result)
mock_get.return_value = mock_hand
result = await probe_tool_use()
assert result["success"] is True
assert result["capability"] == Capability.TOOL_USE
@pytest.mark.asyncio
async def test_t1_tool_use_failure():
"""T1 should fail when shell_hand.run raises."""
from timmy.loop_qa import Capability, probe_tool_use
with patch("timmy.loop_qa._get_shell_hand") as mock_get:
mock_hand = AsyncMock()
mock_hand.run = AsyncMock(side_effect=RuntimeError("shell unavailable"))
mock_get.return_value = mock_hand
result = await probe_tool_use()
assert result["success"] is False
assert result["capability"] == Capability.TOOL_USE
assert result["error_type"] == "RuntimeError"
@pytest.mark.asyncio
async def test_t2_multistep_planning(tmp_path):
"""T2 should write a vault note and verify it exists."""
from timmy.loop_qa import probe_multistep_planning
written_path = tmp_path / "test_note.md"
# Mock write_note to actually write the content passed by the probe,
# so the marker verification succeeds when the probe reads back.
def fake_write_note(name, content, folder):
written_path.write_text(content)
return written_path
mock_vault = MagicMock()
mock_vault.write_note = MagicMock(side_effect=fake_write_note)
with patch("timmy.loop_qa._get_vault", return_value=mock_vault):
result = await probe_multistep_planning()
assert result["success"] is True
@pytest.mark.asyncio
async def test_t3_memory_write():
"""T3 should call brain store_fact_sync and succeed."""
from timmy.loop_qa import probe_memory_write
mock_mem = MagicMock()
mock_mem.store_fact_sync = MagicMock(return_value=None)
with patch("timmy.loop_qa._get_brain_memory", return_value=mock_mem):
result = await probe_memory_write()
assert result["success"] is True
# Verify store_fact_sync was called with "self_test_marker" category
mock_mem.store_fact_sync.assert_called_once()
call_args = mock_mem.store_fact_sync.call_args
assert call_args[0][0] == "self_test_marker"
@pytest.mark.asyncio
async def test_t4_memory_read():
"""T4 should verify facts are retrievable."""
from timmy.loop_qa import probe_memory_read
mock_mem = MagicMock()
mock_mem.get_facts_sync = MagicMock(
return_value=[{"content": "test_marker_123", "category": "self_test_marker"}]
)
with patch("timmy.loop_qa._get_brain_memory", return_value=mock_mem):
result = await probe_memory_read()
assert result["success"] is True
@pytest.mark.asyncio
async def test_t4_memory_read_empty():
"""T4 should fail when no facts are returned."""
from timmy.loop_qa import probe_memory_read
mock_mem = MagicMock()
mock_mem.get_facts_sync = MagicMock(return_value=[])
with patch("timmy.loop_qa._get_brain_memory", return_value=mock_mem):
result = await probe_memory_read()
assert result["success"] is False
@pytest.mark.asyncio
async def test_t5_self_coding(tmp_path):
"""T5 should write a self-test note and verify it exists."""
from timmy.loop_qa import probe_self_coding
written_path = tmp_path / "self_test_note.md"
written_path.write_text("# Self-Test Note\n\nImprovement sketch.")
mock_vault = MagicMock()
mock_vault.write_note = MagicMock(return_value=written_path)
with patch("timmy.loop_qa._get_vault", return_value=mock_vault):
result = await probe_self_coding()
assert result["success"] is True
@pytest.mark.asyncio
async def test_t6_lightning_econ_placeholder():
"""T6 should always succeed as a placeholder."""
from timmy.loop_qa import probe_lightning_econ
result = await probe_lightning_econ()
assert result["success"] is True
assert "pending" in result["details"].lower() or "v2" in result["details"].lower()
# ---------------------------------------------------------------------------
# Orchestrator tests
# ---------------------------------------------------------------------------
def _make_orchestrator():
"""Create an orchestrator with patched external services."""
from timmy.loop_qa import LoopQAOrchestrator
return LoopQAOrchestrator()
@pytest.mark.asyncio
async def test_run_next_test_round_robin():
"""Orchestrator should cycle through probes in order."""
from timmy.loop_qa import TEST_SEQUENCE, LoopQAOrchestrator
orch = LoopQAOrchestrator()
results = []
# Patch all probes to return success quickly
with patch("timmy.loop_qa.log_event"):
for cap, _ in TEST_SEQUENCE:
probe_name = f"timmy.loop_qa.probe_{cap.value}"
with patch(probe_name, new_callable=AsyncMock) as mock_probe:
mock_probe.return_value = {
"success": True,
"capability": cap,
"details": "ok",
"error_type": None,
}
result = await orch.run_next_test()
results.append(result)
# All 6 should run
assert len(results) == 6
assert all(r is not None for r in results)
@pytest.mark.asyncio
async def test_run_next_test_disabled():
"""run_next_test should return None when loop_qa_enabled is False."""
from timmy.loop_qa import LoopQAOrchestrator
orch = LoopQAOrchestrator()
with patch("timmy.loop_qa.settings") as mock_settings:
mock_settings.loop_qa_enabled = False
result = await orch.run_next_test()
assert result is None
@pytest.mark.asyncio
async def test_run_next_test_throttle():
"""Should return None when max_per_hour is reached."""
from timmy.loop_qa import LoopQAOrchestrator
orch = LoopQAOrchestrator()
orch._hourly_count = 100 # Well above any threshold
orch._hour_marker = datetime.now(UTC).hour
result = await orch.run_next_test()
assert result is None
@pytest.mark.asyncio
async def test_failure_counter_increments():
"""Consecutive failure count should increment on failure."""
from timmy.loop_qa import Capability, LoopQAOrchestrator
orch = LoopQAOrchestrator()
cap = Capability.TOOL_USE
with patch("timmy.loop_qa.log_event"):
with patch(
"timmy.loop_qa.probe_tool_use",
new_callable=AsyncMock,
return_value={
"success": False,
"capability": cap,
"details": "empty stdout",
"error_type": "AssertionError",
},
):
await orch.run_next_test()
assert orch._failure_counts[cap] == 1
@pytest.mark.asyncio
async def test_failure_counter_resets_on_success():
"""Consecutive failure count should reset to 0 on success."""
from timmy.loop_qa import Capability, LoopQAOrchestrator
orch = LoopQAOrchestrator()
cap = Capability.TOOL_USE
orch._failure_counts[cap] = 5
orch._proposal_filed.add(cap)
with patch("timmy.loop_qa.log_event"):
with patch(
"timmy.loop_qa.probe_tool_use",
new_callable=AsyncMock,
return_value={
"success": True,
"capability": cap,
"details": "ok",
"error_type": None,
},
):
await orch.run_next_test()
assert orch._failure_counts[cap] == 0
assert cap not in orch._proposal_filed
@pytest.mark.asyncio
async def test_upgrade_proposal_filed_at_threshold():
"""When failures reach threshold, create_task should be called."""
from timmy.loop_qa import Capability, LoopQAOrchestrator
orch = LoopQAOrchestrator()
cap = Capability.TOOL_USE
orch._failure_counts[cap] = 2 # One more failure hits threshold of 3
with patch("timmy.loop_qa.log_event"):
with patch("timmy.loop_qa.create_task") as mock_create:
with patch(
"timmy.loop_qa.probe_tool_use",
new_callable=AsyncMock,
return_value={
"success": False,
"capability": cap,
"details": "empty stdout",
"error_type": "AssertionError",
},
):
await orch.run_next_test()
mock_create.assert_called_once()
call_kwargs = mock_create.call_args
assert "TOOL_USE" in call_kwargs[1]["title"] or "TOOL_USE" in str(call_kwargs)
assert cap in orch._proposal_filed
@pytest.mark.asyncio
async def test_upgrade_proposal_not_refiled():
"""Once a proposal is filed, it should not be filed again."""
from timmy.loop_qa import Capability, LoopQAOrchestrator
orch = LoopQAOrchestrator()
cap = Capability.TOOL_USE
orch._failure_counts[cap] = 5
orch._proposal_filed.add(cap) # Already filed
with patch("timmy.loop_qa.log_event"):
with patch("timmy.loop_qa.create_task") as mock_create:
with patch(
"timmy.loop_qa.probe_tool_use",
new_callable=AsyncMock,
return_value={
"success": False,
"capability": cap,
"details": "still broken",
"error_type": "RuntimeError",
},
):
await orch.run_next_test()
mock_create.assert_not_called()
@pytest.mark.asyncio
async def test_graceful_on_probe_crash():
"""If a probe raises unexpectedly, orchestrator should not crash."""
from timmy.loop_qa import LoopQAOrchestrator
orch = LoopQAOrchestrator()
with patch("timmy.loop_qa.log_event"):
with patch("timmy.loop_qa.capture_error"):
with patch(
"timmy.loop_qa.probe_tool_use",
new_callable=AsyncMock,
side_effect=Exception("probe exploded"),
):
result = await orch.run_next_test()
# Should return a failure result, not raise
assert result is not None
assert result["success"] is False
# ---------------------------------------------------------------------------
# Health snapshot tests
# ---------------------------------------------------------------------------
def test_health_snapshot_all_green():
"""Snapshot should show green when all counters are 0."""
from timmy.loop_qa import LoopQAOrchestrator
orch = LoopQAOrchestrator()
snapshot = orch.get_health_snapshot()
assert snapshot["overall_status"] == "green"
assert all(c["status"] == "green" for c in snapshot["capabilities"])
def test_health_snapshot_mixed_statuses():
"""Snapshot should correctly map different failure counts."""
from timmy.loop_qa import Capability, LoopQAOrchestrator
orch = LoopQAOrchestrator()
orch._failure_counts[Capability.TOOL_USE] = 2 # yellow
orch._failure_counts[Capability.MEMORY_READ] = 5 # red
snapshot = orch.get_health_snapshot()
by_cap = {c["capability"]: c["status"] for c in snapshot["capabilities"]}
assert by_cap[Capability.TOOL_USE] == "yellow"
assert by_cap[Capability.MEMORY_READ] == "red"
assert by_cap[Capability.LIGHTNING_ECON] == "green"
def test_health_snapshot_overall_worst():
"""overall_status should be the worst of all capabilities."""
from timmy.loop_qa import Capability, LoopQAOrchestrator
orch = LoopQAOrchestrator()
orch._failure_counts[Capability.TOOL_USE] = 2 # yellow
snapshot = orch.get_health_snapshot()
assert snapshot["overall_status"] == "yellow"
orch._failure_counts[Capability.MEMORY_WRITE] = 5 # red
snapshot = orch.get_health_snapshot()
assert snapshot["overall_status"] == "red"
# ---------------------------------------------------------------------------
# Dashboard route tests
# ---------------------------------------------------------------------------
def test_loop_qa_health_json(client):
"""GET /health/loop-qa should return 200 with snapshot JSON."""
resp = client.get("/health/loop-qa")
assert resp.status_code == 200
data = resp.json()
assert "overall_status" in data
assert "capabilities" in data
assert len(data["capabilities"]) == 6
def test_loop_qa_health_partial(client):
"""GET /health/loop-qa/partial should return 200 with HTML."""
resp = client.get("/health/loop-qa/partial")
assert resp.status_code == 200
assert "text/html" in resp.headers["content-type"]