refactor: Phase 3 — reorganize tests into module-mirroring subdirectories

Move 97 test files from flat tests/ into 13 subdirectories: tests/dashboard/ (8 files — routes, mobile, mission control) tests/swarm/ (17 files — coordinator, docker, routing, tasks) tests/timmy/ (12 files — agent, backends, CLI, tools) tests/self_coding/ (14 files — git safety, indexer, self-modify) tests/lightning/ (3 files — L402, LND, interface) tests/creative/ (8 files — assembler, director, image/music/video) tests/integrations/ (10 files — chat bridge, telegram, voice, websocket) tests/mcp/ (4 files — bootstrap, discovery, executor) tests/spark/ (3 files — engine, tools, events) tests/hands/ (3 files — registry, oracle, phase5) tests/scripture/ (1 file) tests/infrastructure/ (3 files — router cascade, API) tests/security/ (3 files — XSS, regression) Fix Path(__file__) reference in test_mobile_scenarios.py for new depth. Add __init__.py to all test subdirectories. Tests: 1503 passed, 9 failed (pre-existing), 53 errors (pre-existing) https://claude.ai/code/session_019oMFNvD8uSGSSmBMGkBfQN
2026-02-26 21:21:28 +00:00
parent 6045077144
commit 4e11dd2490
104 changed files with 57 additions and 3 deletions
--- a/tests/timmy/test_reflection.py
+++ b/tests/timmy/test_reflection.py
@@ -0,0 +1,243 @@
+"""Tests for Reflection Service.
+
+Tests fallback and LLM-based reflection generation.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from self_coding.modification_journal import ModificationAttempt, Outcome
+from self_coding.reflection import ReflectionService
+
+
+class MockLLMResponse:
+    """Mock LLM response."""
+    def __init__(self, content: str, provider_used: str = "mock"):
+        self.content = content
+        self.provider_used = provider_used
+        self.latency_ms = 100.0
+        self.fallback_used = False
+
+
+@pytest.mark.asyncio
+class TestReflectionServiceFallback:
+    """Fallback reflections without LLM."""
+    
+    async def test_fallback_success(self):
+        """Should generate fallback reflection for success."""
+        service = ReflectionService(llm_adapter=None)
+        
+        attempt = ModificationAttempt(
+            task_description="Add error handling",
+            files_modified=["src/app.py"],
+            outcome=Outcome.SUCCESS,
+        )
+        
+        reflection = await service.reflect_on_attempt(attempt)
+        
+        assert "What went well" in reflection
+        assert "successfully completed" in reflection.lower()
+        assert "src/app.py" in reflection
+    
+    async def test_fallback_failure(self):
+        """Should generate fallback reflection for failure."""
+        service = ReflectionService(llm_adapter=None)
+        
+        attempt = ModificationAttempt(
+            task_description="Refactor database",
+            files_modified=["src/db.py", "src/models.py"],
+            outcome=Outcome.FAILURE,
+            failure_analysis="Circular dependency",
+            retry_count=2,
+        )
+        
+        reflection = await service.reflect_on_attempt(attempt)
+        
+        assert "What went well" in reflection
+        assert "What could be improved" in reflection
+        assert "circular dependency" in reflection.lower()
+        assert "2 retries" in reflection
+    
+    async def test_fallback_rollback(self):
+        """Should generate fallback reflection for rollback."""
+        service = ReflectionService(llm_adapter=None)
+        
+        attempt = ModificationAttempt(
+            task_description="Update API",
+            files_modified=["src/api.py"],
+            outcome=Outcome.ROLLBACK,
+        )
+        
+        reflection = await service.reflect_on_attempt(attempt)
+        
+        assert "What went well" in reflection
+        assert "rollback" in reflection.lower()
+        assert "preferable to shipping broken code" in reflection.lower()
+
+
+@pytest.mark.asyncio
+class TestReflectionServiceWithLLM:
+    """Reflections with mock LLM."""
+    
+    async def test_llm_reflection_success(self):
+        """Should use LLM for reflection when available."""
+        mock_adapter = AsyncMock()
+        mock_adapter.chat.return_value = MockLLMResponse(
+            "**What went well:** Clean implementation\n"
+            "**What could be improved:** More tests\n"
+            "**Next time:** Add edge cases\n"
+            "**General lesson:** Always test errors"
+        )
+        
+        service = ReflectionService(llm_adapter=mock_adapter)
+        
+        attempt = ModificationAttempt(
+            task_description="Add validation",
+            approach="Use Pydantic",
+            files_modified=["src/validation.py"],
+            outcome=Outcome.SUCCESS,
+            test_results="5 passed",
+        )
+        
+        reflection = await service.reflect_on_attempt(attempt)
+        
+        assert "Clean implementation" in reflection
+        assert mock_adapter.chat.called
+        
+        # Check the prompt was formatted correctly
+        call_args = mock_adapter.chat.call_args
+        assert "Add validation" in call_args.kwargs["message"]
+        assert "SUCCESS" in call_args.kwargs["message"]
+    
+    async def test_llm_reflection_failure_fallback(self):
+        """Should fallback when LLM fails."""
+        mock_adapter = AsyncMock()
+        mock_adapter.chat.side_effect = Exception("LLM timeout")
+        
+        service = ReflectionService(llm_adapter=mock_adapter)
+        
+        attempt = ModificationAttempt(
+            task_description="Fix bug",
+            outcome=Outcome.FAILURE,
+        )
+        
+        reflection = await service.reflect_on_attempt(attempt)
+        
+        # Should still return a reflection (fallback)
+        assert "What went well" in reflection
+        assert "What could be improved" in reflection
+
+
+@pytest.mark.asyncio
+class TestReflectionServiceWithContext:
+    """Reflections with similar past attempts."""
+    
+    async def test_reflect_with_context(self):
+        """Should include past attempts in reflection."""
+        mock_adapter = AsyncMock()
+        mock_adapter.chat.return_value = MockLLMResponse(
+            "Reflection with historical context"
+        )
+        
+        service = ReflectionService(llm_adapter=mock_adapter)
+        
+        current = ModificationAttempt(
+            task_description="Add auth middleware",
+            outcome=Outcome.SUCCESS,
+        )
+        
+        past = ModificationAttempt(
+            task_description="Add logging middleware",
+            outcome=Outcome.SUCCESS,
+            reflection="Good pattern: use decorators",
+        )
+        
+        reflection = await service.reflect_with_context(current, [past])
+        
+        assert reflection == "Reflection with historical context"
+        
+        # Check context was included
+        call_args = mock_adapter.chat.call_args
+        assert "logging middleware" in call_args.kwargs["message"]
+        assert "Good pattern: use decorators" in call_args.kwargs["message"]
+    
+    async def test_reflect_with_context_fallback(self):
+        """Should fallback when LLM fails with context."""
+        mock_adapter = AsyncMock()
+        mock_adapter.chat.side_effect = Exception("LLM error")
+        
+        service = ReflectionService(llm_adapter=mock_adapter)
+        
+        current = ModificationAttempt(
+            task_description="Add feature",
+            outcome=Outcome.SUCCESS,
+        )
+        past = ModificationAttempt(
+            task_description="Past feature",
+            outcome=Outcome.SUCCESS,
+            reflection="Past lesson",
+        )
+        
+        # Should fallback to regular reflection
+        reflection = await service.reflect_with_context(current, [past])
+        
+        assert "What went well" in reflection
+
+
+@pytest.mark.asyncio
+class TestReflectionServiceEdgeCases:
+    """Edge cases and error handling."""
+    
+    async def test_empty_files_list(self):
+        """Should handle empty files list."""
+        service = ReflectionService(llm_adapter=None)
+        
+        attempt = ModificationAttempt(
+            task_description="Test task",
+            files_modified=[],
+            outcome=Outcome.SUCCESS,
+        )
+        
+        reflection = await service.reflect_on_attempt(attempt)
+        
+        assert "What went well" in reflection
+        assert "N/A" in reflection or "these files" in reflection
+    
+    async def test_long_test_results_truncated(self):
+        """Should truncate long test results in prompt."""
+        mock_adapter = AsyncMock()
+        mock_adapter.chat.return_value = MockLLMResponse("Short reflection")
+        
+        service = ReflectionService(llm_adapter=mock_adapter)
+        
+        attempt = ModificationAttempt(
+            task_description="Big refactor",
+            outcome=Outcome.FAILURE,
+            test_results="Error\n" * 1000,  # Very long
+        )
+        
+        await service.reflect_on_attempt(attempt)
+        
+        # Check that test results were truncated in the prompt
+        call_args = mock_adapter.chat.call_args
+        prompt = call_args.kwargs["message"]
+        assert len(prompt) < 10000  # Should be truncated
+    
+    async def test_no_approach_documented(self):
+        """Should handle missing approach."""
+        service = ReflectionService(llm_adapter=None)
+        
+        attempt = ModificationAttempt(
+            task_description="Quick fix",
+            approach="",  # Empty
+            outcome=Outcome.SUCCESS,
+        )
+        
+        reflection = await service.reflect_on_attempt(attempt)
+        
+        assert "What went well" in reflection
+        assert "No approach documented" not in reflection  # Should use fallback