Files
Timmy-time-dashboard/tests/test_codebase_indexer_errors.py
Alexander Payne 18bc64b36d feat: Self-Coding Foundation (Phase 1)
Implements the foundational infrastructure for Timmy's self-modification capability:

## New Services

1. **GitSafety** (src/self_coding/git_safety.py)
   - Atomic git operations with rollback capability
   - Snapshot/restore for safe experimentation
   - Feature branch management (timmy/self-edit/{timestamp})
   - Merge to main only after tests pass

2. **CodebaseIndexer** (src/self_coding/codebase_indexer.py)
   - AST-based parsing of Python source files
   - Extracts classes, functions, imports, docstrings
   - Builds dependency graph for blast radius analysis
   - SQLite storage with hash-based incremental indexing
   - get_summary() for LLM context (<4000 tokens)
   - get_relevant_files() for task-based file discovery

3. **ModificationJournal** (src/self_coding/modification_journal.py)
   - Persistent log of all self-modification attempts
   - Tracks outcomes: success, failure, rollback
   - find_similar() for learning from past attempts
   - Success rate metrics and recent failure tracking
   - Supports vector embeddings (Phase 2)

4. **ReflectionService** (src/self_coding/reflection.py)
   - LLM-powered analysis of modification attempts
   - Generates lessons learned from successes and failures
   - Fallback templates when LLM unavailable
   - Supports context from similar past attempts

## Test Coverage

- 104 new tests across 7 test files
- 95% code coverage on self_coding module
- Green path tests: full workflow integration
- Red path tests: errors, rollbacks, edge cases
- Safety constraint tests: test coverage requirements, protected files

## Usage

    from self_coding import GitSafety, CodebaseIndexer, ModificationJournal

    git = GitSafety(repo_path=/path/to/repo)
    indexer = CodebaseIndexer(repo_path=/path/to/repo)
    journal = ModificationJournal()

Phase 2 will build the Self-Edit MCP Tool that orchestrates these services.
2026-02-26 11:08:05 -05:00

442 lines
14 KiB
Python

"""Error path tests for Codebase Indexer.
Tests syntax errors, encoding issues, circular imports, and edge cases.
"""
from __future__ import annotations
import tempfile
from pathlib import Path
import pytest
from self_coding.codebase_indexer import CodebaseIndexer, ModuleInfo
@pytest.mark.asyncio
class TestCodebaseIndexerErrors:
"""Indexing error handling."""
async def test_syntax_error_file(self):
"""Should skip files with syntax errors."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
# Valid file
(src_path / "good.py").write_text("def good(): pass")
# File with syntax error
(src_path / "bad.py").write_text("def bad(:\n pass")
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
stats = await indexer.index_all()
assert stats["indexed"] == 1
assert stats["failed"] == 1
async def test_unicode_in_source(self):
"""Should handle Unicode in source files."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
# File with Unicode
(src_path / "unicode.py").write_text(
'# -*- coding: utf-8 -*-\n'
'"""Module with Unicode: ñ 中文 🎉"""\n'
'def hello():\n'
' """Returns 👋"""\n'
' return "hello"\n',
encoding="utf-8",
)
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
stats = await indexer.index_all()
assert stats["indexed"] == 1
assert stats["failed"] == 0
info = await indexer.get_module_info("src/unicode.py")
assert "中文" in info.docstring
async def test_empty_file(self):
"""Should handle empty Python files."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
# Empty file
(src_path / "empty.py").write_text("")
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
stats = await indexer.index_all()
assert stats["indexed"] == 1
info = await indexer.get_module_info("src/empty.py")
assert info is not None
assert info.functions == []
assert info.classes == []
async def test_large_file(self):
"""Should handle large Python files."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
# Large file with many functions
content = ['"""Large module."""']
for i in range(100):
content.append(f'def function_{i}(x: int) -> int:')
content.append(f' """Function {i}."""')
content.append(f' return x + {i}')
content.append('')
(src_path / "large.py").write_text("\n".join(content))
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
stats = await indexer.index_all()
assert stats["indexed"] == 1
info = await indexer.get_module_info("src/large.py")
assert len(info.functions) == 100
async def test_nested_classes(self):
"""Should handle nested classes."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
(src_path / "nested.py").write_text('''
"""Module with nested classes."""
class Outer:
"""Outer class."""
class Inner:
"""Inner class."""
def inner_method(self):
pass
def outer_method(self):
pass
''')
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
await indexer.index_all()
info = await indexer.get_module_info("src/nested.py")
# Should find Outer class (top-level)
assert len(info.classes) == 1
assert info.classes[0].name == "Outer"
# Outer should have outer_method
assert len(info.classes[0].methods) == 1
assert info.classes[0].methods[0].name == "outer_method"
async def test_complex_type_annotations(self):
"""Should handle complex type annotations."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
(src_path / "types.py").write_text('''
"""Module with complex types."""
from typing import Dict, List, Optional, Union, Callable
def complex_function(
items: List[Dict[str, Union[int, str]]],
callback: Callable[[int], bool],
optional: Optional[str] = None,
) -> Dict[str, List[int]]:
"""Function with complex types."""
return {}
class TypedClass:
"""Class with type annotations."""
def method(self, x: int | str) -> list[int]:
"""Method with union type (Python 3.10+)."""
return []
''')
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
await indexer.index_all()
info = await indexer.get_module_info("src/types.py")
# Should parse without error
assert len(info.functions) == 1
assert len(info.classes) == 1
async def test_import_variations(self):
"""Should handle various import styles."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
(src_path / "imports.py").write_text('''
"""Module with various imports."""
# Regular imports
import os
import sys as system
from pathlib import Path
# From imports
from typing import Dict, List
from collections import OrderedDict as OD
# Relative imports (may not resolve)
from . import sibling
from .subpackage import module
# Dynamic imports (won't be caught by AST)
try:
import optional_dep
except ImportError:
pass
''')
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
await indexer.index_all()
info = await indexer.get_module_info("src/imports.py")
# Should capture static imports
assert "os" in info.imports
assert "typing.Dict" in info.imports or "Dict" in str(info.imports)
async def test_no_src_directory(self):
"""Should handle missing src directory gracefully."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src", "tests"],
)
stats = await indexer.index_all()
assert stats["indexed"] == 0
assert stats["failed"] == 0
async def test_permission_error(self):
"""Should handle permission errors gracefully."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
# Create file
file_path = src_path / "locked.py"
file_path.write_text("def test(): pass")
# Remove read permission (if on Unix)
import os
try:
os.chmod(file_path, 0o000)
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
stats = await indexer.index_all()
# Should count as failed
assert stats["failed"] == 1
finally:
# Restore permission for cleanup
os.chmod(file_path, 0o644)
async def test_circular_imports_in_dependency_graph(self):
"""Should handle circular imports in dependency analysis."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
# Create circular imports
(src_path / "a.py").write_text('''
"""Module A."""
from b import B
class A:
def get_b(self):
return B()
''')
(src_path / "b.py").write_text('''
"""Module B."""
from a import A
class B:
def get_a(self):
return A()
''')
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
await indexer.index_all()
# Both should have each other as dependencies
a_deps = await indexer.get_dependency_chain("src/a.py")
b_deps = await indexer.get_dependency_chain("src/b.py")
# Note: Due to import resolution, this might not be perfect
# but it shouldn't crash
assert isinstance(a_deps, list)
assert isinstance(b_deps, list)
async def test_summary_with_no_modules(self):
"""Summary should handle empty codebase."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
await indexer.index_all()
summary = await indexer.get_summary()
assert "Codebase Summary" in summary
assert "Total modules: 0" in summary
async def test_get_relevant_files_with_special_chars(self):
"""Should handle special characters in search query."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
(src_path / "test.py").write_text('def test(): pass')
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
await indexer.index_all()
# Search with special chars shouldn't crash
files = await indexer.get_relevant_files("test!@#$%^&*()", limit=5)
assert isinstance(files, list)
async def test_concurrent_indexing(self):
"""Should handle concurrent indexing attempts."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
(src_path / "file.py").write_text("def test(): pass")
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
# Multiple rapid indexing calls
import asyncio
tasks = [
indexer.index_all(),
indexer.index_all(),
indexer.index_all(),
]
results = await asyncio.gather(*tasks)
# All should complete without error
for stats in results:
assert stats["indexed"] >= 0
assert stats["failed"] >= 0
async def test_binary_file_in_src(self):
"""Should skip binary files in src directory."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
src_path = repo_path / "src"
src_path.mkdir()
# Binary file
(src_path / "data.bin").write_bytes(b"\x00\x01\x02\x03")
# Python file
(src_path / "script.py").write_text("def test(): pass")
indexer = CodebaseIndexer(
repo_path=repo_path,
db_path=repo_path / "index.db",
src_dirs=["src"],
)
stats = await indexer.index_all()
# Should only index .py file
assert stats["indexed"] == 1
assert stats["failed"] == 0 # Binary files are skipped, not failed