Files
Timmy-time-dashboard/tests/test_reflection.py
Alexander Payne 18bc64b36d feat: Self-Coding Foundation (Phase 1)
Implements the foundational infrastructure for Timmy's self-modification capability:

## New Services

1. **GitSafety** (src/self_coding/git_safety.py)
   - Atomic git operations with rollback capability
   - Snapshot/restore for safe experimentation
   - Feature branch management (timmy/self-edit/{timestamp})
   - Merge to main only after tests pass

2. **CodebaseIndexer** (src/self_coding/codebase_indexer.py)
   - AST-based parsing of Python source files
   - Extracts classes, functions, imports, docstrings
   - Builds dependency graph for blast radius analysis
   - SQLite storage with hash-based incremental indexing
   - get_summary() for LLM context (<4000 tokens)
   - get_relevant_files() for task-based file discovery

3. **ModificationJournal** (src/self_coding/modification_journal.py)
   - Persistent log of all self-modification attempts
   - Tracks outcomes: success, failure, rollback
   - find_similar() for learning from past attempts
   - Success rate metrics and recent failure tracking
   - Supports vector embeddings (Phase 2)

4. **ReflectionService** (src/self_coding/reflection.py)
   - LLM-powered analysis of modification attempts
   - Generates lessons learned from successes and failures
   - Fallback templates when LLM unavailable
   - Supports context from similar past attempts

## Test Coverage

- 104 new tests across 7 test files
- 95% code coverage on self_coding module
- Green path tests: full workflow integration
- Red path tests: errors, rollbacks, edge cases
- Safety constraint tests: test coverage requirements, protected files

## Usage

    from self_coding import GitSafety, CodebaseIndexer, ModificationJournal

    git = GitSafety(repo_path=/path/to/repo)
    indexer = CodebaseIndexer(repo_path=/path/to/repo)
    journal = ModificationJournal()

Phase 2 will build the Self-Edit MCP Tool that orchestrates these services.
2026-02-26 11:08:05 -05:00

244 lines
8.2 KiB
Python

"""Tests for Reflection Service.
Tests fallback and LLM-based reflection generation.
"""
from __future__ import annotations
from datetime import datetime, timezone
from unittest.mock import AsyncMock, MagicMock
import pytest
from self_coding.modification_journal import ModificationAttempt, Outcome
from self_coding.reflection import ReflectionService
class MockLLMResponse:
"""Mock LLM response."""
def __init__(self, content: str, provider_used: str = "mock"):
self.content = content
self.provider_used = provider_used
self.latency_ms = 100.0
self.fallback_used = False
@pytest.mark.asyncio
class TestReflectionServiceFallback:
"""Fallback reflections without LLM."""
async def test_fallback_success(self):
"""Should generate fallback reflection for success."""
service = ReflectionService(llm_adapter=None)
attempt = ModificationAttempt(
task_description="Add error handling",
files_modified=["src/app.py"],
outcome=Outcome.SUCCESS,
)
reflection = await service.reflect_on_attempt(attempt)
assert "What went well" in reflection
assert "successfully completed" in reflection.lower()
assert "src/app.py" in reflection
async def test_fallback_failure(self):
"""Should generate fallback reflection for failure."""
service = ReflectionService(llm_adapter=None)
attempt = ModificationAttempt(
task_description="Refactor database",
files_modified=["src/db.py", "src/models.py"],
outcome=Outcome.FAILURE,
failure_analysis="Circular dependency",
retry_count=2,
)
reflection = await service.reflect_on_attempt(attempt)
assert "What went well" in reflection
assert "What could be improved" in reflection
assert "circular dependency" in reflection.lower()
assert "2 retries" in reflection
async def test_fallback_rollback(self):
"""Should generate fallback reflection for rollback."""
service = ReflectionService(llm_adapter=None)
attempt = ModificationAttempt(
task_description="Update API",
files_modified=["src/api.py"],
outcome=Outcome.ROLLBACK,
)
reflection = await service.reflect_on_attempt(attempt)
assert "What went well" in reflection
assert "rollback" in reflection.lower()
assert "preferable to shipping broken code" in reflection.lower()
@pytest.mark.asyncio
class TestReflectionServiceWithLLM:
"""Reflections with mock LLM."""
async def test_llm_reflection_success(self):
"""Should use LLM for reflection when available."""
mock_adapter = AsyncMock()
mock_adapter.chat.return_value = MockLLMResponse(
"**What went well:** Clean implementation\n"
"**What could be improved:** More tests\n"
"**Next time:** Add edge cases\n"
"**General lesson:** Always test errors"
)
service = ReflectionService(llm_adapter=mock_adapter)
attempt = ModificationAttempt(
task_description="Add validation",
approach="Use Pydantic",
files_modified=["src/validation.py"],
outcome=Outcome.SUCCESS,
test_results="5 passed",
)
reflection = await service.reflect_on_attempt(attempt)
assert "Clean implementation" in reflection
assert mock_adapter.chat.called
# Check the prompt was formatted correctly
call_args = mock_adapter.chat.call_args
assert "Add validation" in call_args.kwargs["message"]
assert "SUCCESS" in call_args.kwargs["message"]
async def test_llm_reflection_failure_fallback(self):
"""Should fallback when LLM fails."""
mock_adapter = AsyncMock()
mock_adapter.chat.side_effect = Exception("LLM timeout")
service = ReflectionService(llm_adapter=mock_adapter)
attempt = ModificationAttempt(
task_description="Fix bug",
outcome=Outcome.FAILURE,
)
reflection = await service.reflect_on_attempt(attempt)
# Should still return a reflection (fallback)
assert "What went well" in reflection
assert "What could be improved" in reflection
@pytest.mark.asyncio
class TestReflectionServiceWithContext:
"""Reflections with similar past attempts."""
async def test_reflect_with_context(self):
"""Should include past attempts in reflection."""
mock_adapter = AsyncMock()
mock_adapter.chat.return_value = MockLLMResponse(
"Reflection with historical context"
)
service = ReflectionService(llm_adapter=mock_adapter)
current = ModificationAttempt(
task_description="Add auth middleware",
outcome=Outcome.SUCCESS,
)
past = ModificationAttempt(
task_description="Add logging middleware",
outcome=Outcome.SUCCESS,
reflection="Good pattern: use decorators",
)
reflection = await service.reflect_with_context(current, [past])
assert reflection == "Reflection with historical context"
# Check context was included
call_args = mock_adapter.chat.call_args
assert "logging middleware" in call_args.kwargs["message"]
assert "Good pattern: use decorators" in call_args.kwargs["message"]
async def test_reflect_with_context_fallback(self):
"""Should fallback when LLM fails with context."""
mock_adapter = AsyncMock()
mock_adapter.chat.side_effect = Exception("LLM error")
service = ReflectionService(llm_adapter=mock_adapter)
current = ModificationAttempt(
task_description="Add feature",
outcome=Outcome.SUCCESS,
)
past = ModificationAttempt(
task_description="Past feature",
outcome=Outcome.SUCCESS,
reflection="Past lesson",
)
# Should fallback to regular reflection
reflection = await service.reflect_with_context(current, [past])
assert "What went well" in reflection
@pytest.mark.asyncio
class TestReflectionServiceEdgeCases:
"""Edge cases and error handling."""
async def test_empty_files_list(self):
"""Should handle empty files list."""
service = ReflectionService(llm_adapter=None)
attempt = ModificationAttempt(
task_description="Test task",
files_modified=[],
outcome=Outcome.SUCCESS,
)
reflection = await service.reflect_on_attempt(attempt)
assert "What went well" in reflection
assert "N/A" in reflection or "these files" in reflection
async def test_long_test_results_truncated(self):
"""Should truncate long test results in prompt."""
mock_adapter = AsyncMock()
mock_adapter.chat.return_value = MockLLMResponse("Short reflection")
service = ReflectionService(llm_adapter=mock_adapter)
attempt = ModificationAttempt(
task_description="Big refactor",
outcome=Outcome.FAILURE,
test_results="Error\n" * 1000, # Very long
)
await service.reflect_on_attempt(attempt)
# Check that test results were truncated in the prompt
call_args = mock_adapter.chat.call_args
prompt = call_args.kwargs["message"]
assert len(prompt) < 10000 # Should be truncated
async def test_no_approach_documented(self):
"""Should handle missing approach."""
service = ReflectionService(llm_adapter=None)
attempt = ModificationAttempt(
task_description="Quick fix",
approach="", # Empty
outcome=Outcome.SUCCESS,
)
reflection = await service.reflect_on_attempt(attempt)
assert "What went well" in reflection
assert "No approach documented" not in reflection # Should use fallback