"""Tests for timmy.loop_qa — capability self-test framework. TDD: these tests are written before the implementation. They validate: - Capability enum and status mapping - Six self-test probes (T1–T6) - Round-robin orchestrator with throttling - Failure counter logic and upgrade proposal filing - Health snapshot derivation """ from datetime import UTC, datetime from unittest.mock import AsyncMock, MagicMock, patch import pytest # --------------------------------------------------------------------------- # Model tests # --------------------------------------------------------------------------- def test_capability_enum_has_all_members(): """Capability StrEnum should have exactly 6 members.""" from timmy.loop_qa import Capability expected = { "tool_use", "multistep_planning", "memory_read", "memory_write", "self_coding", "lightning_econ", } assert {c.value for c in Capability} == expected def test_status_for_failures_mapping(): """green for 0–1, yellow for 2, red for >= threshold.""" from timmy.loop_qa import LoopQAOrchestrator assert LoopQAOrchestrator.status_for_failures(0) == "green" assert LoopQAOrchestrator.status_for_failures(1) == "green" assert LoopQAOrchestrator.status_for_failures(2) == "yellow" assert LoopQAOrchestrator.status_for_failures(3) == "red" assert LoopQAOrchestrator.status_for_failures(10) == "red" def test_probe_registry_has_six_entries(): """The test sequence should cover all 6 capabilities.""" from timmy.loop_qa import TEST_SEQUENCE, Capability capabilities_covered = {cap for cap, _ in TEST_SEQUENCE} assert capabilities_covered == set(Capability) assert len(TEST_SEQUENCE) == 6 # --------------------------------------------------------------------------- # Self-test probe tests (T1–T6) # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_t1_tool_use_success(): """T1 should succeed when shell_hand.run returns non-empty stdout.""" from timmy.loop_qa import Capability, probe_tool_use mock_result = MagicMock(success=True, stdout="file1.py\nfile2.py\n") with patch("timmy.loop_qa._get_shell_hand") as mock_get: mock_hand = AsyncMock() mock_hand.run = AsyncMock(return_value=mock_result) mock_get.return_value = mock_hand result = await probe_tool_use() assert result["success"] is True assert result["capability"] == Capability.TOOL_USE @pytest.mark.asyncio async def test_t1_tool_use_failure(): """T1 should fail when shell_hand.run raises.""" from timmy.loop_qa import Capability, probe_tool_use with patch("timmy.loop_qa._get_shell_hand") as mock_get: mock_hand = AsyncMock() mock_hand.run = AsyncMock(side_effect=RuntimeError("shell unavailable")) mock_get.return_value = mock_hand result = await probe_tool_use() assert result["success"] is False assert result["capability"] == Capability.TOOL_USE assert result["error_type"] == "RuntimeError" @pytest.mark.asyncio async def test_t2_multistep_planning(tmp_path): """T2 should write a vault note and verify it exists.""" from timmy.loop_qa import probe_multistep_planning written_path = tmp_path / "test_note.md" # Mock write_note to actually write the content passed by the probe, # so the marker verification succeeds when the probe reads back. def fake_write_note(name, content, folder): written_path.write_text(content) return written_path mock_vault = MagicMock() mock_vault.write_note = MagicMock(side_effect=fake_write_note) with patch("timmy.loop_qa._get_vault", return_value=mock_vault): result = await probe_multistep_planning() assert result["success"] is True @pytest.mark.asyncio async def test_t3_memory_write(): """T3 should call brain store_fact_sync and succeed.""" from timmy.loop_qa import probe_memory_write mock_mem = MagicMock() mock_mem.store_fact_sync = MagicMock(return_value=None) with patch("timmy.loop_qa._get_brain_memory", return_value=mock_mem): result = await probe_memory_write() assert result["success"] is True # Verify store_fact_sync was called with "self_test_marker" category mock_mem.store_fact_sync.assert_called_once() call_args = mock_mem.store_fact_sync.call_args assert call_args[0][0] == "self_test_marker" @pytest.mark.asyncio async def test_t4_memory_read(): """T4 should verify facts are retrievable.""" from timmy.loop_qa import probe_memory_read mock_mem = MagicMock() mock_mem.get_facts_sync = MagicMock( return_value=[{"content": "test_marker_123", "category": "self_test_marker"}] ) with patch("timmy.loop_qa._get_brain_memory", return_value=mock_mem): result = await probe_memory_read() assert result["success"] is True @pytest.mark.asyncio async def test_t4_memory_read_empty(): """T4 should fail when no facts are returned.""" from timmy.loop_qa import probe_memory_read mock_mem = MagicMock() mock_mem.get_facts_sync = MagicMock(return_value=[]) with patch("timmy.loop_qa._get_brain_memory", return_value=mock_mem): result = await probe_memory_read() assert result["success"] is False @pytest.mark.asyncio async def test_t5_self_coding(tmp_path): """T5 should write a self-test note and verify it exists.""" from timmy.loop_qa import probe_self_coding written_path = tmp_path / "self_test_note.md" written_path.write_text("# Self-Test Note\n\nImprovement sketch.") mock_vault = MagicMock() mock_vault.write_note = MagicMock(return_value=written_path) with patch("timmy.loop_qa._get_vault", return_value=mock_vault): result = await probe_self_coding() assert result["success"] is True @pytest.mark.asyncio async def test_t6_lightning_econ_placeholder(): """T6 should always succeed as a placeholder.""" from timmy.loop_qa import probe_lightning_econ result = await probe_lightning_econ() assert result["success"] is True assert "pending" in result["details"].lower() or "v2" in result["details"].lower() # --------------------------------------------------------------------------- # Orchestrator tests # --------------------------------------------------------------------------- def _make_orchestrator(): """Create an orchestrator with patched external services.""" from timmy.loop_qa import LoopQAOrchestrator return LoopQAOrchestrator() @pytest.mark.asyncio async def test_run_next_test_round_robin(): """Orchestrator should cycle through probes in order.""" from timmy.loop_qa import TEST_SEQUENCE, LoopQAOrchestrator orch = LoopQAOrchestrator() results = [] # Patch all probes to return success quickly with patch("timmy.loop_qa.log_event"): for cap, _ in TEST_SEQUENCE: probe_name = f"timmy.loop_qa.probe_{cap.value}" with patch(probe_name, new_callable=AsyncMock) as mock_probe: mock_probe.return_value = { "success": True, "capability": cap, "details": "ok", "error_type": None, } result = await orch.run_next_test() results.append(result) # All 6 should run assert len(results) == 6 assert all(r is not None for r in results) @pytest.mark.asyncio async def test_run_next_test_disabled(): """run_next_test should return None when loop_qa_enabled is False.""" from timmy.loop_qa import LoopQAOrchestrator orch = LoopQAOrchestrator() with patch("timmy.loop_qa.settings") as mock_settings: mock_settings.loop_qa_enabled = False result = await orch.run_next_test() assert result is None @pytest.mark.asyncio async def test_run_next_test_throttle(): """Should return None when max_per_hour is reached.""" from timmy.loop_qa import LoopQAOrchestrator orch = LoopQAOrchestrator() orch._hourly_count = 100 # Well above any threshold orch._hour_marker = datetime.now(UTC).hour result = await orch.run_next_test() assert result is None @pytest.mark.asyncio async def test_failure_counter_increments(): """Consecutive failure count should increment on failure.""" from timmy.loop_qa import Capability, LoopQAOrchestrator orch = LoopQAOrchestrator() cap = Capability.TOOL_USE with patch("timmy.loop_qa.log_event"): with patch( "timmy.loop_qa.probe_tool_use", new_callable=AsyncMock, return_value={ "success": False, "capability": cap, "details": "empty stdout", "error_type": "AssertionError", }, ): await orch.run_next_test() assert orch._failure_counts[cap] == 1 @pytest.mark.asyncio async def test_failure_counter_resets_on_success(): """Consecutive failure count should reset to 0 on success.""" from timmy.loop_qa import Capability, LoopQAOrchestrator orch = LoopQAOrchestrator() cap = Capability.TOOL_USE orch._failure_counts[cap] = 5 orch._proposal_filed.add(cap) with patch("timmy.loop_qa.log_event"): with patch( "timmy.loop_qa.probe_tool_use", new_callable=AsyncMock, return_value={ "success": True, "capability": cap, "details": "ok", "error_type": None, }, ): await orch.run_next_test() assert orch._failure_counts[cap] == 0 assert cap not in orch._proposal_filed @pytest.mark.asyncio async def test_upgrade_proposal_filed_at_threshold(): """When failures reach threshold, create_task should be called.""" from timmy.loop_qa import Capability, LoopQAOrchestrator orch = LoopQAOrchestrator() cap = Capability.TOOL_USE orch._failure_counts[cap] = 2 # One more failure hits threshold of 3 with patch("timmy.loop_qa.log_event"): with patch("timmy.loop_qa.create_task") as mock_create: with patch( "timmy.loop_qa.probe_tool_use", new_callable=AsyncMock, return_value={ "success": False, "capability": cap, "details": "empty stdout", "error_type": "AssertionError", }, ): await orch.run_next_test() mock_create.assert_called_once() call_kwargs = mock_create.call_args assert "TOOL_USE" in call_kwargs[1]["title"] or "TOOL_USE" in str(call_kwargs) assert cap in orch._proposal_filed @pytest.mark.asyncio async def test_upgrade_proposal_not_refiled(): """Once a proposal is filed, it should not be filed again.""" from timmy.loop_qa import Capability, LoopQAOrchestrator orch = LoopQAOrchestrator() cap = Capability.TOOL_USE orch._failure_counts[cap] = 5 orch._proposal_filed.add(cap) # Already filed with patch("timmy.loop_qa.log_event"): with patch("timmy.loop_qa.create_task") as mock_create: with patch( "timmy.loop_qa.probe_tool_use", new_callable=AsyncMock, return_value={ "success": False, "capability": cap, "details": "still broken", "error_type": "RuntimeError", }, ): await orch.run_next_test() mock_create.assert_not_called() @pytest.mark.asyncio async def test_graceful_on_probe_crash(): """If a probe raises unexpectedly, orchestrator should not crash.""" from timmy.loop_qa import LoopQAOrchestrator orch = LoopQAOrchestrator() with patch("timmy.loop_qa.log_event"): with patch("timmy.loop_qa.capture_error"): with patch( "timmy.loop_qa.probe_tool_use", new_callable=AsyncMock, side_effect=Exception("probe exploded"), ): result = await orch.run_next_test() # Should return a failure result, not raise assert result is not None assert result["success"] is False # --------------------------------------------------------------------------- # Health snapshot tests # --------------------------------------------------------------------------- def test_health_snapshot_all_green(): """Snapshot should show green when all counters are 0.""" from timmy.loop_qa import LoopQAOrchestrator orch = LoopQAOrchestrator() snapshot = orch.get_health_snapshot() assert snapshot["overall_status"] == "green" assert all(c["status"] == "green" for c in snapshot["capabilities"]) def test_health_snapshot_mixed_statuses(): """Snapshot should correctly map different failure counts.""" from timmy.loop_qa import Capability, LoopQAOrchestrator orch = LoopQAOrchestrator() orch._failure_counts[Capability.TOOL_USE] = 2 # yellow orch._failure_counts[Capability.MEMORY_READ] = 5 # red snapshot = orch.get_health_snapshot() by_cap = {c["capability"]: c["status"] for c in snapshot["capabilities"]} assert by_cap[Capability.TOOL_USE] == "yellow" assert by_cap[Capability.MEMORY_READ] == "red" assert by_cap[Capability.LIGHTNING_ECON] == "green" def test_health_snapshot_overall_worst(): """overall_status should be the worst of all capabilities.""" from timmy.loop_qa import Capability, LoopQAOrchestrator orch = LoopQAOrchestrator() orch._failure_counts[Capability.TOOL_USE] = 2 # yellow snapshot = orch.get_health_snapshot() assert snapshot["overall_status"] == "yellow" orch._failure_counts[Capability.MEMORY_WRITE] = 5 # red snapshot = orch.get_health_snapshot() assert snapshot["overall_status"] == "red" # --------------------------------------------------------------------------- # Dashboard route tests # --------------------------------------------------------------------------- def test_loop_qa_health_json(client): """GET /health/loop-qa should return 200 with snapshot JSON.""" resp = client.get("/health/loop-qa") assert resp.status_code == 200 data = resp.json() assert "overall_status" in data assert "capabilities" in data assert len(data["capabilities"]) == 6 def test_loop_qa_health_partial(client): """GET /health/loop-qa/partial should return 200 with HTML.""" resp = client.get("/health/loop-qa/partial") assert resp.status_code == 200 assert "text/html" in resp.headers["content-type"]