Merge pull request #54 from AlexanderWhitestone/feature/self-coding-rebased

Feature/self coding rebased
This commit is contained in:
Alexander Whitestone
2026-02-26 12:36:37 -05:00
committed by GitHub
13 changed files with 2220 additions and 0 deletions

View File

@@ -35,6 +35,7 @@ from dashboard.routes.upgrades import router as upgrades_router
from dashboard.routes.work_orders import router as work_orders_router
from dashboard.routes.tasks import router as tasks_router
from dashboard.routes.scripture import router as scripture_router
from dashboard.routes.self_coding import router as self_coding_router
from router.api import router as cascade_router
logging.basicConfig(
@@ -199,6 +200,7 @@ app.include_router(upgrades_router)
app.include_router(work_orders_router)
app.include_router(tasks_router)
app.include_router(scripture_router)
app.include_router(self_coding_router)
app.include_router(cascade_router)

View File

@@ -0,0 +1,368 @@
"""Self-Coding Dashboard Routes.
API endpoints and HTMX views for the self-coding system:
- Journal viewer with filtering
- Stats dashboard
- Manual task execution
- Real-time status updates
"""
from __future__ import annotations
import logging
from typing import Optional
from fastapi import APIRouter, Form, Request
from fastapi.responses import HTMLResponse, JSONResponse
from pydantic import BaseModel
from self_coding import (
CodebaseIndexer,
ModificationJournal,
Outcome,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/self-coding", tags=["self_coding"])
# ── API Models ────────────────────────────────────────────────────────────
class JournalEntryResponse(BaseModel):
"""A journal entry for API response."""
id: int
timestamp: str
task_description: str
approach: str
files_modified: list[str]
outcome: str
retry_count: int
has_reflection: bool
class StatsResponse(BaseModel):
"""Self-coding stats for API response."""
total_attempts: int
success_count: int
failure_count: int
rollback_count: int
success_rate: float
recent_failures: list[JournalEntryResponse]
class ExecuteRequest(BaseModel):
"""Request to execute a self-edit task."""
task_description: str
class ExecuteResponse(BaseModel):
"""Response from executing a self-edit task."""
success: bool
message: str
attempt_id: Optional[int] = None
files_modified: list[str] = []
commit_hash: Optional[str] = None
# ── Services (initialized lazily) ─────────────────────────────────────────
_journal: Optional[ModificationJournal] = None
_indexer: Optional[CodebaseIndexer] = None
def get_journal() -> ModificationJournal:
"""Get or create ModificationJournal singleton."""
global _journal
if _journal is None:
_journal = ModificationJournal()
return _journal
def get_indexer() -> CodebaseIndexer:
"""Get or create CodebaseIndexer singleton."""
global _indexer
if _indexer is None:
_indexer = CodebaseIndexer()
return _indexer
# ── API Endpoints ─────────────────────────────────────────────────────────
@router.get("/api/journal", response_model=list[JournalEntryResponse])
async def api_journal_list(
limit: int = 50,
outcome: Optional[str] = None,
):
"""Get modification journal entries.
Args:
limit: Maximum number of entries to return
outcome: Filter by outcome (success, failure, rollback)
"""
journal = get_journal()
# Build query based on filters
if outcome:
try:
outcome_enum = Outcome(outcome)
# Get recent and filter
from self_coding.modification_journal import ModificationAttempt
# Note: This is a simplified query - in production you'd add
# proper filtering to the journal class
entries = []
# Placeholder for filtered query
except ValueError:
return JSONResponse(
status_code=400,
content={"error": f"Invalid outcome: {outcome}"},
)
# For now, return recent failures mixed with successes
recent = await journal.get_recent_failures(limit=limit)
# Also get some successes
# Note: We'd need to add a method to journal for this
# For now, return what we have
response = []
for entry in recent:
response.append(JournalEntryResponse(
id=entry.id or 0,
timestamp=entry.timestamp.isoformat() if entry.timestamp else "",
task_description=entry.task_description,
approach=entry.approach,
files_modified=entry.files_modified,
outcome=entry.outcome.value,
retry_count=entry.retry_count,
has_reflection=bool(entry.reflection),
))
return response
@router.get("/api/journal/{attempt_id}", response_model=dict)
async def api_journal_detail(attempt_id: int):
"""Get detailed information about a specific attempt."""
journal = get_journal()
entry = await journal.get_by_id(attempt_id)
if not entry:
return JSONResponse(
status_code=404,
content={"error": "Attempt not found"},
)
return {
"id": entry.id,
"timestamp": entry.timestamp.isoformat() if entry.timestamp else "",
"task_description": entry.task_description,
"approach": entry.approach,
"files_modified": entry.files_modified,
"diff": entry.diff,
"test_results": entry.test_results,
"outcome": entry.outcome.value,
"failure_analysis": entry.failure_analysis,
"reflection": entry.reflection,
"retry_count": entry.retry_count,
}
@router.get("/api/stats", response_model=StatsResponse)
async def api_stats():
"""Get self-coding statistics."""
journal = get_journal()
metrics = await journal.get_success_rate()
recent_failures = await journal.get_recent_failures(limit=5)
return StatsResponse(
total_attempts=metrics["total"],
success_count=metrics["success"],
failure_count=metrics["failure"],
rollback_count=metrics["rollback"],
success_rate=metrics["overall"],
recent_failures=[
JournalEntryResponse(
id=f.id or 0,
timestamp=f.timestamp.isoformat() if f.timestamp else "",
task_description=f.task_description,
approach=f.approach,
files_modified=f.files_modified,
outcome=f.outcome.value,
retry_count=f.retry_count,
has_reflection=bool(f.reflection),
)
for f in recent_failures
],
)
@router.post("/api/execute", response_model=ExecuteResponse)
async def api_execute(request: ExecuteRequest):
"""Execute a self-edit task.
This is the API endpoint for manual task execution.
In production, this should require authentication and confirmation.
"""
from tools.self_edit import SelfEditTool
tool = SelfEditTool()
result = await tool.execute(request.task_description)
return ExecuteResponse(
success=result.success,
message=result.message,
attempt_id=result.attempt_id,
files_modified=result.files_modified,
commit_hash=result.commit_hash,
)
@router.get("/api/codebase/summary")
async def api_codebase_summary():
"""Get codebase summary for LLM context."""
indexer = get_indexer()
await indexer.index_changed()
summary = await indexer.get_summary(max_tokens=3000)
return {
"summary": summary,
"generated_at": "",
}
@router.post("/api/codebase/reindex")
async def api_codebase_reindex():
"""Trigger a full codebase reindex."""
indexer = get_indexer()
stats = await indexer.index_all()
return {
"indexed": stats["indexed"],
"failed": stats["failed"],
"skipped": stats["skipped"],
}
# ── HTMX Page Routes ──────────────────────────────────────────────────────
@router.get("", response_class=HTMLResponse)
async def self_coding_page(request: Request):
"""Main self-coding dashboard page."""
from dashboard.app import templates
return templates.TemplateResponse(
"self_coding.html",
{
"request": request,
"title": "Self-Coding",
},
)
@router.get("/journal", response_class=HTMLResponse)
async def journal_partial(
request: Request,
outcome: Optional[str] = None,
limit: int = 20,
):
"""HTMX partial for journal entries."""
from dashboard.app import templates
journal = get_journal()
# Get entries (simplified - in production, add proper filtering)
if outcome == "failure":
entries = await journal.get_recent_failures(limit=limit)
else:
# Get all recent
entries = await journal.get_recent_failures(limit=limit)
# TODO: Add method to get successes too
return templates.TemplateResponse(
"partials/journal_entries.html",
{
"request": request,
"entries": entries,
"outcome_filter": outcome,
},
)
@router.get("/stats", response_class=HTMLResponse)
async def stats_partial(request: Request):
"""HTMX partial for stats cards."""
from dashboard.app import templates
journal = get_journal()
metrics = await journal.get_success_rate()
return templates.TemplateResponse(
"partials/self_coding_stats.html",
{
"request": request,
"metrics": metrics,
},
)
@router.get("/execute-form", response_class=HTMLResponse)
async def execute_form_partial(request: Request):
"""HTMX partial for execute task form."""
from dashboard.app import templates
return templates.TemplateResponse(
"partials/execute_form.html",
{
"request": request,
},
)
@router.post("/execute", response_class=HTMLResponse)
async def execute_task(
request: Request,
task_description: str = Form(...),
):
"""HTMX endpoint to execute a task."""
from dashboard.app import templates
from tools.self_edit import SelfEditTool
tool = SelfEditTool()
result = await tool.execute(task_description)
return templates.TemplateResponse(
"partials/execute_result.html",
{
"request": request,
"result": result,
},
)
@router.get("/journal/{attempt_id}/detail", response_class=HTMLResponse)
async def journal_entry_detail(request: Request, attempt_id: int):
"""HTMX partial for journal entry detail."""
from dashboard.app import templates
journal = get_journal()
entry = await journal.get_by_id(attempt_id)
if not entry:
return templates.TemplateResponse(
"partials/error.html",
{
"request": request,
"message": "Attempt not found",
},
)
return templates.TemplateResponse(
"partials/journal_entry_detail.html",
{
"request": request,
"entry": entry,
},
)

View File

@@ -40,6 +40,7 @@
<a href="/memory" class="mc-test-link">MEMORY</a>
<a href="/router/status" class="mc-test-link">ROUTER</a>
<a href="/self-modify/queue" class="mc-test-link">UPGRADES</a>
<a href="/self-coding" class="mc-test-link">SELF-CODING</a>
<a href="/work-orders/queue" class="mc-test-link">WORK ORDERS</a>
<a href="/creative/ui" class="mc-test-link">CREATIVE</a>
<a href="/mobile" class="mc-test-link" title="Mobile-optimized view">MOBILE</a>
@@ -71,6 +72,7 @@
<a href="/lightning/ledger" class="mc-mobile-link">LEDGER</a>
<a href="/memory" class="mc-mobile-link">MEMORY</a>
<a href="/work-orders/queue" class="mc-mobile-link">WORK ORDERS</a>
<a href="/self-coding" class="mc-mobile-link">SELF-CODING</a>
<a href="/creative/ui" class="mc-mobile-link">CREATIVE</a>
<a href="/voice/button" class="mc-mobile-link">VOICE</a>
<a href="/mobile" class="mc-mobile-link">MOBILE</a>

View File

@@ -0,0 +1,7 @@
{# Error partial #}
<div class="alert alert-danger mb-0">
<div class="d-flex align-items-center gap-2">
<span>⚠️</span>
<span>{{ message }}</span>
</div>
</div>

View File

@@ -0,0 +1,45 @@
{# Execute task form partial #}
<form hx-post="/self-coding/execute" hx-target="#execute-result" hx-indicator="#execute-loading">
<div class="mb-3">
<label for="task-description" class="form-label">Task Description</label>
<textarea
class="form-control form-control-sm bg-dark text-light border-secondary"
id="task-description"
name="task_description"
rows="4"
placeholder="Describe what you want Timmy to do...
Example: Add error handling to the /health endpoint that returns 503 when Ollama is unreachable."
required
></textarea>
<div class="form-text">
Be specific. Include what to change and what the expected behavior should be.
</div>
</div>
<div class="alert alert-warning d-flex align-items-start gap-2 py-2">
<span>⚠️</span>
<small>
<strong>Warning:</strong> This will modify source code. Changes will be tested and committed.
Safety constraints: max 3 files, only files with tests, protected files cannot be modified.
</small>
</div>
<div class="d-flex justify-content-end gap-2">
<button type="button" class="btn btn-sm btn-outline-secondary" onclick="document.getElementById('execute-modal').close()">
Cancel
</button>
<button type="submit" class="btn btn-sm btn-primary" onclick="document.getElementById('execute-modal').close()">
Execute Task
</button>
</div>
</form>
<div id="execute-loading" class="htmx-indicator mt-3">
<div class="d-flex align-items-center gap-2 text-muted">
<div class="spinner-border spinner-border-sm" role="status"></div>
<small>Executing self-edit task... This may take a few minutes.</small>
</div>
</div>
<div id="execute-result" class="mt-3"></div>

View File

@@ -0,0 +1,58 @@
{# Execute task result partial #}
<div class="alert {% if result.success %}alert-success{% else %}alert-danger{% endif %} mb-0">
<div class="d-flex align-items-start gap-2">
<span class="fs-5">{% if result.success %}✅{% else %}❌{% endif %}</span>
<div>
<h6 class="alert-heading mb-1">
{% if result.success %}Success!{% else %}Failed{% endif %}
</h6>
<p class="mb-0 small">{{ result.message }}</p>
{% if result.success %}
{% if result.files_modified %}
<div class="mt-2">
<small class="text-muted">Files modified:</small>
<ul class="list-unstyled mb-0 small">
{% for file in result.files_modified %}
<li><code>{{ file }}</code></li>
{% endfor %}
</ul>
</div>
{% endif %}
{% if result.commit_hash %}
<div class="mt-2 small">
<span class="text-muted">Commit:</span>
<code>{{ result.commit_hash[:8] }}</code>
</div>
{% endif %}
{% if result.attempt_id %}
<div class="mt-2">
<a href="/self-coding#journal-{{ result.attempt_id }}" class="btn btn-sm btn-outline-success">
View in Journal
</a>
</div>
{% endif %}
{% else %}
{% if result.test_results %}
<div class="mt-2">
<small class="text-muted">Test output:</small>
<pre class="small bg-black bg-opacity-25 p-2 rounded mb-0 mt-1 overflow-auto" style="max-height: 150px;"><code>{{ result.test_results[:500] }}{% if result.test_results|length > 500 %}...{% endif %}</code></pre>
</div>
{% endif %}
{% endif %}
</div>
</div>
</div>
{# Refresh journal and stats after execution #}
{% if result.success %}
<script>
// Refresh journal and stats after successful execution
setTimeout(() => {
htmx.ajax('GET', '/self-coding/journal', { target: '#journal-container' });
htmx.ajax('GET', '/self-coding/stats', { target: '#stats-container' });
}, 500);
</script>
{% endif %}

View File

@@ -0,0 +1,64 @@
{# Journal entries list partial #}
{% if entries %}
<div class="list-group list-group-flush">
{% for entry in entries %}
<div class="list-group-item journal-entry {{ entry.outcome.value }} p-3"
hx-get="/self-coding/journal/{{ entry.id }}/detail"
hx-target="#journal-detail-{{ entry.id }}"
hx-swap="innerHTML"
style="cursor: pointer;">
<div class="d-flex justify-content-between align-items-start mb-2">
<div class="d-flex align-items-center gap-2">
{# Outcome icon #}
{% if entry.outcome.value == 'success' %}
<span class="badge bg-success"></span>
{% elif entry.outcome.value == 'failure' %}
<span class="badge bg-danger"></span>
{% else %}
<span class="badge bg-warning text-dark"></span>
{% endif %}
<span class="text-muted small">
#{{ entry.id }}
</span>
</div>
<small class="text-muted">
{{ entry.timestamp.strftime('%Y-%m-%d %H:%M') if entry.timestamp else 'Unknown' }}
</small>
</div>
<p class="mb-1 fw-medium">{{ entry.task_description }}</p>
<div class="d-flex justify-content-between align-items-center">
<div class="small text-muted">
{% if entry.files_modified %}
<span class="me-2">📁 {{ entry.files_modified|length }} file(s)</span>
{% endif %}
{% if entry.retry_count > 0 %}
<span class="me-2">🔄 {{ entry.retry_count }} retries</span>
{% endif %}
{% if entry.reflection %}
<span title="Has reflection">💡</span>
{% endif %}
</div>
<span class="badge {% if entry.outcome.value == 'success' %}bg-success{% elif entry.outcome.value == 'failure' %}bg-danger{% else %}bg-warning text-dark{% endif %}">
{{ entry.outcome.value|upper }}
</span>
</div>
{# Detail container - populated on click #}
<div id="journal-detail-{{ entry.id }}" class="mt-3"></div>
</div>
{% endfor %}
</div>
{% else %}
<div class="text-center py-5 text-muted">
<p class="mb-0">No journal entries found.</p>
<small>Self-edit attempts will appear here.</small>
</div>
{% endif %}

View File

@@ -0,0 +1,54 @@
{# Journal entry detail partial #}
<div class="card mt-3 bg-dark-subtle border-0">
<div class="card-body">
<h6 class="card-subtitle mb-3 text-muted">Attempt Details</h6>
{% if entry.approach %}
<div class="mb-3">
<small class="text-muted">Approach:</small>
<p class="mb-0">{{ entry.approach }}</p>
</div>
{% endif %}
{% if entry.files_modified %}
<div class="mb-3">
<small class="text-muted">Files Modified:</small>
<ul class="list-unstyled mb-0">
{% for file in entry.files_modified %}
<li><code class="small">{{ file }}</code></li>
{% endfor %}
</ul>
</div>
{% endif %}
{% if entry.diff %}
<div class="mb-3">
<small class="text-muted">Diff:</small>
<pre class="small bg-black p-2 rounded overflow-auto" style="max-height: 200px;"><code>{{ entry.diff[:500] }}{% if entry.diff|length > 500 %}...{% endif %}</code></pre>
</div>
{% endif %}
{% if entry.test_results %}
<div class="mb-3">
<small class="text-muted">Test Results:</small>
<pre class="small bg-black p-2 rounded overflow-auto" style="max-height: 150px;"><code>{{ entry.test_results[:500] }}{% if entry.test_results|length > 500 %}...{% endif %}</code></pre>
</div>
{% endif %}
{% if entry.failure_analysis %}
<div class="mb-3">
<small class="text-danger">Failure Analysis:</small>
<p class="mb-0 text-danger-emphasis">{{ entry.failure_analysis }}</p>
</div>
{% endif %}
{% if entry.reflection %}
<div class="mb-0">
<small class="text-info">Reflection:</small>
<div class="p-2 bg-info-subtle rounded">
{{ entry.reflection|markdown }}
</div>
</div>
{% endif %}
</div>
</div>

View File

@@ -0,0 +1,71 @@
{# Stats cards partial for self-coding dashboard #}
<div class="row g-3">
<!-- Total Attempts -->
<div class="col-md-3 col-6">
<div class="card border-0 shadow-sm stat-card h-100">
<div class="card-body">
<div class="d-flex justify-content-between align-items-start">
<div>
<h6 class="text-muted small mb-1">Total Attempts</h6>
<h3 class="mb-0">{{ metrics.total }}</h3>
</div>
<span class="fs-4">📝</span>
</div>
</div>
</div>
</div>
<!-- Success Rate -->
<div class="col-md-3 col-6">
<div class="card border-0 shadow-sm stat-card h-100">
<div class="card-body">
<div class="d-flex justify-content-between align-items-start">
<div>
<h6 class="text-muted small mb-1">Success Rate</h6>
<h3 class="mb-0 {% if metrics.overall >= 0.7 %}text-success{% elif metrics.overall >= 0.4 %}text-warning{% else %}text-danger{% endif %}">
{{ "%.0f"|format(metrics.overall * 100) }}%
</h3>
</div>
<span class="fs-4">📊</span>
</div>
<div class="progress mt-2" style="height: 4px;">
<div class="progress-bar {% if metrics.overall >= 0.7 %}bg-success{% elif metrics.overall >= 0.4 %}bg-warning{% else %}bg-danger{% endif %}"
style="width: {{ metrics.overall * 100 }}%"></div>
</div>
</div>
</div>
</div>
<!-- Successes -->
<div class="col-md-3 col-6">
<div class="card border-0 shadow-sm stat-card h-100 border-start border-3 border-success">
<div class="card-body">
<div class="d-flex justify-content-between align-items-start">
<div>
<h6 class="text-muted small mb-1">Successes</h6>
<h3 class="mb-0 text-success">{{ metrics.success }}</h3>
</div>
<span class="fs-4"></span>
</div>
</div>
</div>
</div>
<!-- Failures -->
<div class="col-md-3 col-6">
<div class="card border-0 shadow-sm stat-card h-100 border-start border-3 border-danger">
<div class="card-body">
<div class="d-flex justify-content-between align-items-start">
<div>
<h6 class="text-muted small mb-1">Failures</h6>
<h3 class="mb-0 text-danger">{{ metrics.failure + metrics.rollback }}</h3>
</div>
<span class="fs-4"></span>
</div>
<small class="text-muted">
{{ metrics.failure }} fail / {{ metrics.rollback }} rollback
</small>
</div>
</div>
</div>
</div>

View File

@@ -0,0 +1,184 @@
{% extends "base.html" %}
{% block title %}Self-Coding — Timmy Time{% endblock %}
{% block content %}
<div class="container-fluid py-4">
<!-- Header -->
<div class="d-flex justify-content-between align-items-center mb-4">
<div>
<h1 class="h3 mb-0">Self-Coding</h1>
<p class="text-muted small mb-0">Timmy's ability to modify its own source code</p>
</div>
<div class="d-flex gap-2">
<button class="btn btn-sm btn-outline-info" hx-get="/self-coding/stats" hx-target="#stats-container" hx-indicator="#stats-loading">
Refresh Stats
</button>
<button class="btn btn-sm btn-primary" hx-get="/self-coding/execute-form" hx-target="#execute-modal-content" onclick="document.getElementById('execute-modal').showModal()">
+ New Task
</button>
</div>
</div>
<!-- Stats Cards -->
<div id="stats-container" hx-get="/self-coding/stats" hx-trigger="load">
<div id="stats-loading" class="htmx-indicator">
<div class="d-flex justify-content-center py-4">
<div class="spinner-border text-info" role="status">
<span class="visually-hidden">Loading stats...</span>
</div>
</div>
</div>
</div>
<!-- Main Content Grid -->
<div class="row g-4 mt-2">
<!-- Left Column: Journal -->
<div class="col-lg-8">
<div class="card border-0 shadow-sm">
<div class="card-header bg-transparent border-secondary d-flex justify-content-between align-items-center">
<h5 class="mb-0">Modification Journal</h5>
<div class="btn-group btn-group-sm">
<button class="btn btn-outline-secondary active" hx-get="/self-coding/journal" hx-target="#journal-container">All</button>
<button class="btn btn-outline-secondary" hx-get="/self-coding/journal?outcome=success" hx-target="#journal-container">Success</button>
<button class="btn btn-outline-secondary" hx-get="/self-coding/journal?outcome=failure" hx-target="#journal-container">Failed</button>
</div>
</div>
<div class="card-body p-0">
<div id="journal-container" hx-get="/self-coding/journal" hx-trigger="load" class="journal-list">
<div class="d-flex justify-content-center py-5">
<div class="spinner-border text-info" role="status">
<span class="visually-hidden">Loading journal...</span>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- Right Column: Quick Actions & Info -->
<div class="col-lg-4">
<!-- Quick Actions -->
<div class="card border-0 shadow-sm mb-4">
<div class="card-header bg-transparent border-secondary">
<h5 class="mb-0">Quick Actions</h5>
</div>
<div class="card-body">
<div class="d-grid gap-2">
<button class="btn btn-outline-info" hx-post="/self-coding/api/codebase/reindex" hx-swap="none" hx-confirm="Reindex codebase? This may take a moment.">
🔄 Reindex Codebase
</button>
<a href="/self-coding/api/codebase/summary" target="_blank" class="btn btn-outline-secondary">
📄 View Codebase Summary
</a>
</div>
</div>
</div>
<!-- Safety Info -->
<div class="card border-0 shadow-sm mb-4">
<div class="card-header bg-transparent border-secondary">
<h5 class="mb-0">Safety Constraints</h5>
</div>
<div class="card-body">
<ul class="list-unstyled small mb-0">
<li class="mb-2">✓ Max 3 files per commit</li>
<li class="mb-2">✓ Max 100 lines changed</li>
<li class="mb-2">✓ Only files with test coverage</li>
<li class="mb-2">✓ Max 3 retries on failure</li>
<li class="mb-2">✓ Protected files cannot be modified</li>
<li>✓ All changes on feature branches</li>
</ul>
</div>
</div>
<!-- How It Works -->
<div class="card border-0 shadow-sm">
<div class="card-header bg-transparent border-secondary">
<h5 class="mb-0">How It Works</h5>
</div>
<div class="card-body">
<ol class="small mb-0">
<li class="mb-2">Receive task description</li>
<li class="mb-2">Find relevant files via indexer</li>
<li class="mb-2">Check journal for similar attempts</li>
<li class="mb-2">Create feature branch</li>
<li class="mb-2">Plan edit with LLM</li>
<li class="mb-2">Execute via Aider or direct edit</li>
<li class="mb-2">Run tests</li>
<li class="mb-2">Commit on success, rollback on failure</li>
<li>Log attempt and reflect</li>
</ol>
</div>
</div>
</div>
</div>
</div>
<!-- Execute Modal -->
<dialog id="execute-modal" class="rounded border-0 shadow-lg" style="max-width: 600px; width: 90%; background: var(--bs-body-bg);">
<div class="p-4">
<div class="d-flex justify-content-between align-items-center mb-3">
<h5 class="mb-0">Execute Self-Edit Task</h5>
<button type="button" class="btn-close" onclick="document.getElementById('execute-modal').close()"></button>
</div>
<div id="execute-modal-content">
<!-- Form loaded via HTMX -->
</div>
</div>
</dialog>
<style>
.journal-list {
max-height: 600px;
overflow-y: auto;
}
.journal-entry {
border-left: 3px solid transparent;
transition: all 0.2s ease;
}
.journal-entry:hover {
background-color: rgba(255, 255, 255, 0.03);
}
.journal-entry.success {
border-left-color: #198754;
}
.journal-entry.failure {
border-left-color: #dc3545;
}
.journal-entry.rollback {
border-left-color: #fd7e14;
}
.stat-card {
transition: transform 0.2s ease;
}
.stat-card:hover {
transform: translateY(-2px);
}
/* Custom scrollbar for journal */
.journal-list::-webkit-scrollbar {
width: 6px;
}
.journal-list::-webkit-scrollbar-track {
background: rgba(255, 255, 255, 0.05);
}
.journal-list::-webkit-scrollbar-thumb {
background: rgba(255, 255, 255, 0.2);
border-radius: 3px;
}
.journal-list::-webkit-scrollbar-thumb:hover {
background: rgba(255, 255, 255, 0.3);
}
</style>
{% endblock %}

824
src/tools/self_edit.py Normal file
View File

@@ -0,0 +1,824 @@
"""Self-Edit MCP Tool — Timmy's ability to modify its own source code.
This is the core self-modification orchestrator that:
1. Receives task descriptions
2. Queries codebase indexer for relevant files
3. Queries modification journal for similar past attempts
4. Creates feature branches via GitSafety
5. Plans changes with LLM
6. Executes via Aider (preferred) or direct editing (fallback)
7. Runs tests via pytest
8. Commits on success, rolls back on failure
9. Logs outcomes to ModificationJournal
10. Generates reflections
Usage:
from tools.self_edit import self_edit_tool
from mcp.registry import tool_registry
# Register with MCP
tool_registry.register("self_edit", self_edit_schema, self_edit_tool)
# Invoke
result = await tool_registry.execute("self_edit", {
"task_description": "Add error handling to health endpoint"
})
"""
from __future__ import annotations
import ast
import asyncio
import logging
import os
import subprocess
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
from config import settings
# Phase 1 imports
from self_coding import (
CodebaseIndexer,
GitSafety,
ModificationAttempt,
ModificationJournal,
Outcome,
ReflectionService,
)
logger = logging.getLogger(__name__)
# Safety constraints (Phase 1 hard limits)
MAX_FILES_PER_COMMIT = 3
MAX_LINES_CHANGED = 100
PROTECTED_FILES = {
"src/tools/self_edit.py",
"src/self_coding/git_safety.py",
"src/self_coding/codebase_indexer.py",
"src/self_coding/modification_journal.py",
"src/self_coding/reflection.py",
}
MAX_RETRIES = 3
@dataclass
class SelfEditResult:
"""Result of a self-edit operation."""
success: bool
message: str
attempt_id: Optional[int] = None
files_modified: list[str] = field(default_factory=list)
commit_hash: Optional[str] = None
test_results: str = ""
diff: str = ""
@dataclass
class EditPlan:
"""Plan for a self-edit operation."""
approach: str
files_to_modify: list[str]
files_to_create: list[str]
tests_to_add: list[str]
explanation: str
class SelfEditTool:
"""Self-modification orchestrator.
This class encapsulates the complete self-edit workflow:
- Pre-flight checks
- Context gathering (indexer + journal)
- Branch creation
- Edit planning (LLM)
- Execution (Aider or direct)
- Testing
- Commit/rollback
- Logging and reflection
Usage:
tool = SelfEditTool(repo_path="/path/to/repo")
result = await tool.execute("Add error handling to health endpoint")
"""
def __init__(
self,
repo_path: Optional[Path] = None,
llm_adapter: Optional[object] = None,
) -> None:
"""Initialize SelfEditTool.
Args:
repo_path: Path to repository. Defaults to current directory.
llm_adapter: LLM adapter for planning and reflection
"""
self.repo_path = Path(repo_path) if repo_path else Path.cwd()
self.llm_adapter = llm_adapter
# Initialize Phase 1 services
self.git = GitSafety(repo_path=self.repo_path)
self.indexer = CodebaseIndexer(repo_path=self.repo_path)
self.journal = ModificationJournal()
self.reflection = ReflectionService(llm_adapter=llm_adapter)
# Ensure codebase is indexed
self._indexing_done = False
logger.info("SelfEditTool initialized for %s", self.repo_path)
async def _ensure_indexed(self) -> None:
"""Ensure codebase is indexed."""
if not self._indexing_done:
await self.indexer.index_changed()
self._indexing_done = True
async def execute(
self,
task_description: str,
context: Optional[dict] = None,
) -> SelfEditResult:
"""Execute a self-edit task.
This is the main entry point for self-modification.
Args:
task_description: What to do (e.g., "Add error handling")
context: Optional additional context
Returns:
SelfEditResult with success/failure details
"""
logger.info("Starting self-edit: %s", task_description[:50])
try:
# Step 1: Pre-flight checks
if not await self._preflight_checks():
return SelfEditResult(
success=False,
message="Pre-flight checks failed. See logs for details.",
)
# Step 2: Gather context
await self._ensure_indexed()
relevant_files = await self._get_relevant_files(task_description)
similar_attempts = await self._get_similar_attempts(task_description)
# Step 3: Create feature branch
branch_name = f"timmy/self-edit/{datetime.now().strftime('%Y%m%d-%H%M%S')}"
await self.git.create_branch(branch_name)
logger.info("Created branch: %s", branch_name)
# Step 4: Take snapshot for rollback
snapshot = await self.git.snapshot(run_tests=False)
# Step 5: Plan the edit
plan = await self._plan_edit(
task_description,
relevant_files,
similar_attempts,
)
# Validate plan against safety constraints
if not self._validate_plan(plan):
return SelfEditResult(
success=False,
message=f"Plan violates safety constraints: {plan.files_to_modify}",
)
# Step 6: Execute the edit
execution_result = await self._execute_edit(plan, task_description)
if not execution_result["success"]:
# Attempt retries
for retry in range(MAX_RETRIES):
logger.info("Retry %d/%d", retry + 1, MAX_RETRIES)
# Rollback to clean state
await self.git.rollback(snapshot)
# Try again with adjusted approach
execution_result = await self._execute_edit(
plan,
task_description,
retry_count=retry + 1,
)
if execution_result["success"]:
break
if not execution_result["success"]:
# Final rollback and log failure
await self.git.rollback(snapshot)
await self.git._run_git("checkout", "main") # Return to main
attempt_id = await self._log_failure(
task_description,
plan,
execution_result["test_output"],
execution_result.get("error", "Unknown error"),
)
return SelfEditResult(
success=False,
message=f"Failed after {MAX_RETRIES} retries",
attempt_id=attempt_id,
test_results=execution_result.get("test_output", ""),
)
# Step 7: Commit and merge
commit_hash = await self.git.commit(
message=f"Self-edit: {task_description[:50]}",
files=plan.files_to_modify + plan.files_to_create + plan.tests_to_add,
)
# Merge to main (tests already passed in execution)
await self.git.merge_to_main(branch_name, require_tests=False)
# Step 8: Log success
diff = await self.git.get_diff(snapshot.commit_hash, commit_hash)
attempt_id = await self._log_success(
task_description,
plan,
commit_hash,
execution_result.get("test_output", ""),
diff,
)
return SelfEditResult(
success=True,
message=f"Successfully modified {len(plan.files_to_modify)} files",
attempt_id=attempt_id,
files_modified=plan.files_to_modify,
commit_hash=commit_hash,
test_results=execution_result.get("test_output", ""),
diff=diff,
)
except Exception as e:
logger.exception("Self-edit failed with exception")
return SelfEditResult(
success=False,
message=f"Exception: {str(e)}",
)
async def _preflight_checks(self) -> bool:
"""Run pre-flight safety checks.
Returns:
True if all checks pass
"""
# Check if repo is clean
if not await self.git.is_clean():
logger.error("Pre-flight failed: Working directory not clean")
return False
# Check if we're on main
current_branch = await self.git.get_current_branch()
if current_branch != self.git.main_branch:
logger.error("Pre-flight failed: Not on %s branch (on %s)",
self.git.main_branch, current_branch)
return False
# Check if self-modification is enabled
if not getattr(settings, 'self_modify_enabled', True):
logger.error("Pre-flight failed: Self-modification disabled in config")
return False
return True
async def _get_relevant_files(self, task_description: str) -> list[str]:
"""Get files relevant to the task.
Args:
task_description: Task to find relevant files for
Returns:
List of file paths
"""
files = await self.indexer.get_relevant_files(task_description, limit=10)
# Filter to only files with test coverage
files_with_tests = [
f for f in files
if await self.indexer.has_test_coverage(f)
]
logger.info("Found %d relevant files (%d with tests)",
len(files), len(files_with_tests))
return files_with_tests[:MAX_FILES_PER_COMMIT]
async def _get_similar_attempts(
self,
task_description: str,
) -> list[ModificationAttempt]:
"""Get similar past modification attempts.
Args:
task_description: Task to find similar attempts for
Returns:
List of similar attempts
"""
similar = await self.journal.find_similar(task_description, limit=5)
logger.info("Found %d similar past attempts", len(similar))
return similar
async def _plan_edit(
self,
task_description: str,
relevant_files: list[str],
similar_attempts: list[ModificationAttempt],
) -> EditPlan:
"""Plan the edit using LLM.
Args:
task_description: What to do
relevant_files: Files that might need modification
similar_attempts: Similar past attempts for context
Returns:
EditPlan with approach and file list
"""
if not self.llm_adapter:
# Fallback: simple plan
return EditPlan(
approach=f"Edit files to implement: {task_description}",
files_to_modify=relevant_files[:MAX_FILES_PER_COMMIT],
files_to_create=[],
tests_to_add=[],
explanation="No LLM available, using heuristic plan",
)
# Build prompt with context
codebase_summary = await self.indexer.get_summary(max_tokens=2000)
similar_context = ""
if similar_attempts:
similar_context = "\n\nSimilar past attempts:\n"
for attempt in similar_attempts:
similar_context += f"- {attempt.task_description} ({attempt.outcome.value})\n"
if attempt.reflection:
similar_context += f" Lesson: {attempt.reflection[:100]}...\n"
prompt = f"""You are planning a code modification for a Python project.
Task: {task_description}
Codebase Summary:
{codebase_summary}
Potentially relevant files (all have test coverage):
{chr(10).join(f"- {f}" for f in relevant_files)}
{similar_context}
Create a plan for implementing this task. You can modify at most {MAX_FILES_PER_COMMIT} files.
Respond in this format:
APPROACH: <brief description of the approach>
FILES_TO_MODIFY: <comma-separated list of file paths>
FILES_TO_CREATE: <comma-separated list of new file paths (if any)>
TESTS_TO_ADD: <comma-separated list of test files to add/modify>
EXPLANATION: <brief explanation of why this approach>
"""
try:
response = await self.llm_adapter.chat(message=prompt)
content = response.content
# Parse response
approach = self._extract_field(content, "APPROACH")
files_to_modify = self._parse_list(self._extract_field(content, "FILES_TO_MODIFY"))
files_to_create = self._parse_list(self._extract_field(content, "FILES_TO_CREATE"))
tests_to_add = self._parse_list(self._extract_field(content, "TESTS_TO_ADD"))
explanation = self._extract_field(content, "EXPLANATION")
return EditPlan(
approach=approach or "No approach specified",
files_to_modify=files_to_modify[:MAX_FILES_PER_COMMIT],
files_to_create=files_to_create,
tests_to_add=tests_to_add,
explanation=explanation or "No explanation provided",
)
except Exception as e:
logger.error("LLM planning failed: %s", e)
return EditPlan(
approach=f"Fallback: Modify relevant files for {task_description}",
files_to_modify=relevant_files[:MAX_FILES_PER_COMMIT],
files_to_create=[],
tests_to_add=[],
explanation=f"LLM failed, using fallback: {e}",
)
def _extract_field(self, content: str, field_name: str) -> str:
"""Extract a field from LLM response."""
for line in content.split("\n"):
if line.startswith(f"{field_name}:"):
return line.split(":", 1)[1].strip()
return ""
def _parse_list(self, text: str) -> list[str]:
"""Parse comma-separated list."""
if not text or text.lower() in ("none", "n/a", ""):
return []
return [item.strip() for item in text.split(",") if item.strip()]
def _validate_plan(self, plan: EditPlan) -> bool:
"""Validate plan against safety constraints.
Args:
plan: EditPlan to validate
Returns:
True if plan is valid
"""
# Check file count
if len(plan.files_to_modify) > MAX_FILES_PER_COMMIT:
logger.error("Plan modifies too many files: %d > %d",
len(plan.files_to_modify), MAX_FILES_PER_COMMIT)
return False
# Check for protected files
for file_path in plan.files_to_modify:
if file_path in PROTECTED_FILES:
logger.error("Plan tries to modify protected file: %s", file_path)
return False
# Check all files have test coverage
for file_path in plan.files_to_modify:
# This is async, so we check in _get_relevant_files
pass
return True
async def _execute_edit(
self,
plan: EditPlan,
task_description: str,
retry_count: int = 0,
) -> dict:
"""Execute the edit using Aider or direct editing.
Args:
plan: EditPlan to execute
task_description: Original task description
retry_count: Current retry attempt
Returns:
Dict with success, test_output, error
"""
all_files = plan.files_to_modify + plan.files_to_create
if not all_files:
return {"success": False, "error": "No files to modify"}
# Try Aider first
if await self._aider_available():
return await self._execute_with_aider(plan, task_description, all_files)
else:
# Fallback to direct editing
return await self._execute_direct_edit(plan, task_description)
async def _aider_available(self) -> bool:
"""Check if Aider is available."""
try:
result = await asyncio.create_subprocess_exec(
"aider", "--version",
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.DEVNULL,
)
await result.wait()
return result.returncode == 0
except FileNotFoundError:
return False
async def _execute_with_aider(
self,
plan: EditPlan,
task_description: str,
files: list[str],
) -> dict:
"""Execute edit using Aider.
Args:
plan: EditPlan
task_description: Task description
files: Files to edit
Returns:
Dict with success, test_output
"""
cmd = [
"aider",
"--model", "ollama_chat/qwen2.5-coder:14b-instruct",
"--auto-test",
"--test-cmd", "python -m pytest tests/ -xvs",
"--yes",
"--no-git",
"--message", f"{task_description}\n\nApproach: {plan.approach}",
] + files
logger.info("Running Aider: %s", " ".join(cmd))
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT,
cwd=self.repo_path,
)
stdout, _ = await asyncio.wait_for(
proc.communicate(),
timeout=300.0,
)
output = stdout.decode() if stdout else ""
# Check if tests passed
success = proc.returncode == 0 and "passed" in output.lower()
return {
"success": success,
"test_output": output,
}
except asyncio.TimeoutError:
logger.error("Aider timed out after 300s")
return {
"success": False,
"error": "Timeout",
"test_output": "Aider timed out after 300s",
}
except Exception as e:
logger.error("Aider execution failed: %s", e)
return {
"success": False,
"error": str(e),
"test_output": "",
}
async def _execute_direct_edit(
self,
plan: EditPlan,
task_description: str,
) -> dict:
"""Execute edit via direct file modification (fallback).
Args:
plan: EditPlan
task_description: Task description
Returns:
Dict with success, test_output
"""
if not self.llm_adapter:
return {
"success": False,
"error": "No LLM adapter for direct editing",
}
# Edit each file
for file_path in plan.files_to_modify:
full_path = self.repo_path / file_path
if not full_path.exists():
logger.error("File does not exist: %s", file_path)
continue
try:
content = full_path.read_text()
# Build edit prompt
edit_prompt = f"""Edit this Python file to implement the task.
Task: {task_description}
Approach: {plan.approach}
Current file content:
```python
{content}
```
Provide the complete new file content. Only return the code, no explanation.
"""
response = await self.llm_adapter.chat(message=edit_prompt)
new_content = response.content
# Strip code fences if present
new_content = self._strip_code_fences(new_content)
# Validate with AST
try:
ast.parse(new_content)
except SyntaxError as e:
logger.error("Generated code has syntax error: %s", e)
return {
"success": False,
"error": f"Syntax error in generated code: {e}",
}
# Write file
full_path.write_text(new_content)
logger.info("Modified: %s", file_path)
except Exception as e:
logger.error("Failed to edit %s: %s", file_path, e)
return {
"success": False,
"error": f"Failed to edit {file_path}: {e}",
}
# Run tests
return await self._run_tests()
def _strip_code_fences(self, content: str) -> str:
"""Strip markdown code fences from content."""
lines = content.split("\n")
# Remove opening fence
if lines and lines[0].startswith("```"):
lines = lines[1:]
# Remove closing fence
if lines and lines[-1].startswith("```"):
lines = lines[:-1]
return "\n".join(lines)
async def _run_tests(self) -> dict:
"""Run tests and return results.
Returns:
Dict with success, test_output
"""
cmd = ["python", "-m", "pytest", "tests/", "-x", "--tb=short"]
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT,
cwd=self.repo_path,
)
stdout, _ = await asyncio.wait_for(
proc.communicate(),
timeout=120.0,
)
output = stdout.decode() if stdout else ""
return {
"success": proc.returncode == 0,
"test_output": output,
}
except asyncio.TimeoutError:
return {
"success": False,
"error": "Tests timed out",
"test_output": "Timeout after 120s",
}
except Exception as e:
return {
"success": False,
"error": str(e),
"test_output": "",
}
async def _log_success(
self,
task_description: str,
plan: EditPlan,
commit_hash: str,
test_results: str,
diff: str,
) -> int:
"""Log successful attempt.
Returns:
Attempt ID
"""
attempt = ModificationAttempt(
task_description=task_description,
approach=plan.approach,
files_modified=plan.files_to_modify + plan.files_to_create,
diff=diff[:5000], # Truncate for storage
test_results=test_results,
outcome=Outcome.SUCCESS,
)
attempt_id = await self.journal.log_attempt(attempt)
# Generate and store reflection
reflection_text = await self.reflection.reflect_on_attempt(attempt)
await self.journal.update_reflection(attempt_id, reflection_text)
return attempt_id
async def _log_failure(
self,
task_description: str,
plan: EditPlan,
test_results: str,
error: str,
) -> int:
"""Log failed attempt.
Returns:
Attempt ID
"""
attempt = ModificationAttempt(
task_description=task_description,
approach=plan.approach,
files_modified=plan.files_to_modify,
test_results=test_results,
outcome=Outcome.FAILURE,
failure_analysis=error,
retry_count=MAX_RETRIES,
)
attempt_id = await self.journal.log_attempt(attempt)
# Generate reflection even for failures
reflection_text = await self.reflection.reflect_on_attempt(attempt)
await self.journal.update_reflection(attempt_id, reflection_text)
return attempt_id
# MCP Tool Schema
self_edit_schema = {
"type": "object",
"properties": {
"task_description": {
"type": "string",
"description": "Description of the code modification to make",
},
"context": {
"type": "object",
"description": "Optional additional context for the modification",
},
},
"required": ["task_description"],
}
# Global tool instance (singleton pattern)
_self_edit_tool: Optional[SelfEditTool] = None
async def self_edit_tool(task_description: str, context: Optional[dict] = None) -> dict:
"""MCP tool entry point for self-edit.
Args:
task_description: What to modify
context: Optional context
Returns:
Dict with result
"""
global _self_edit_tool
if _self_edit_tool is None:
_self_edit_tool = SelfEditTool()
result = await _self_edit_tool.execute(task_description, context)
return {
"success": result.success,
"message": result.message,
"attempt_id": result.attempt_id,
"files_modified": result.files_modified,
"commit_hash": result.commit_hash,
"test_results": result.test_results,
}
def register_self_edit_tool(registry: Any, llm_adapter: Optional[object] = None) -> None:
"""Register the self-edit tool with MCP registry.
Args:
registry: MCP ToolRegistry
llm_adapter: Optional LLM adapter
"""
global _self_edit_tool
_self_edit_tool = SelfEditTool(llm_adapter=llm_adapter)
registry.register(
name="self_edit",
schema=self_edit_schema,
handler=self_edit_tool,
category="self_coding",
requires_confirmation=True, # Safety: require user approval
tags=["self-modification", "code-generation"],
source_module="tools.self_edit",
)
logger.info("Self-edit tool registered with MCP")

View File

@@ -0,0 +1,143 @@
"""Tests for Self-Coding Dashboard Routes.
Tests API endpoints and HTMX views.
"""
from __future__ import annotations
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def client():
"""Create test client."""
from dashboard.app import app
return TestClient(app)
class TestSelfCodingPageRoutes:
"""Page route tests."""
def test_main_page_loads(self, client):
"""Main self-coding page should load."""
response = client.get("/self-coding")
assert response.status_code == 200
assert "Self-Coding" in response.text
def test_journal_partial(self, client):
"""Journal partial should return HTML."""
response = client.get("/self-coding/journal")
assert response.status_code == 200
# Should contain journal list or empty message
assert "journal" in response.text.lower() or "no entries" in response.text.lower()
def test_stats_partial(self, client):
"""Stats partial should return HTML."""
response = client.get("/self-coding/stats")
assert response.status_code == 200
# Should contain stats cards
assert "Total Attempts" in response.text or "success rate" in response.text.lower()
def test_execute_form_partial(self, client):
"""Execute form partial should return HTML."""
response = client.get("/self-coding/execute-form")
assert response.status_code == 200
assert "Task Description" in response.text
assert "textarea" in response.text
class TestSelfCodingAPIRoutes:
"""API route tests."""
def test_api_journal_list(self, client):
"""API should return journal entries."""
response = client.get("/self-coding/api/journal")
assert response.status_code == 200
data = response.json()
assert isinstance(data, list)
def test_api_journal_list_with_limit(self, client):
"""API should respect limit parameter."""
response = client.get("/self-coding/api/journal?limit=5")
assert response.status_code == 200
data = response.json()
assert isinstance(data, list)
assert len(data) <= 5
def test_api_journal_detail_not_found(self, client):
"""API should return 404 for non-existent entry."""
response = client.get("/self-coding/api/journal/99999")
assert response.status_code == 404
def test_api_stats(self, client):
"""API should return stats."""
response = client.get("/self-coding/api/stats")
assert response.status_code == 200
data = response.json()
assert "total_attempts" in data
assert "success_rate" in data
assert "recent_failures" in data
def test_api_codebase_summary(self, client):
"""API should return codebase summary."""
response = client.get("/self-coding/api/codebase/summary")
assert response.status_code == 200
data = response.json()
assert "summary" in data
def test_api_codebase_reindex(self, client):
"""API should trigger reindex."""
response = client.post("/self-coding/api/codebase/reindex")
assert response.status_code == 200
data = response.json()
assert "indexed" in data
assert "failed" in data
assert "skipped" in data
class TestSelfCodingExecuteEndpoint:
"""Execute endpoint tests."""
def test_execute_api_endpoint(self, client):
"""Execute API endpoint should accept task."""
# Note: This will actually try to execute, which may fail
# In production, this should be mocked or require auth
response = client.post(
"/self-coding/api/execute",
json={"task_description": "Test task that will fail preflight"}
)
# Should return response (success or failure)
assert response.status_code == 200
data = response.json()
assert "success" in data
assert "message" in data
def test_execute_htmx_endpoint(self, client):
"""Execute HTMX endpoint should accept form data."""
response = client.post(
"/self-coding/execute",
data={"task_description": "Test task that will fail preflight"}
)
# Should return HTML response
assert response.status_code == 200
assert "text/html" in response.headers["content-type"]
class TestSelfCodingNavigation:
"""Navigation integration tests."""
def test_nav_link_in_header(self, client):
"""Self-coding link should be in header."""
response = client.get("/")
assert response.status_code == 200
assert "/self-coding" in response.text
assert "SELF-CODING" in response.text

View File

@@ -0,0 +1,398 @@
"""Tests for Self-Edit MCP Tool.
Tests the complete self-edit workflow with mocked dependencies.
"""
from __future__ import annotations
import tempfile
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from tools.self_edit import (
MAX_FILES_PER_COMMIT,
MAX_RETRIES,
PROTECTED_FILES,
EditPlan,
SelfEditResult,
SelfEditTool,
register_self_edit_tool,
self_edit_tool,
)
@pytest.fixture
def temp_repo():
"""Create a temporary git repository."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
# Initialize git
import subprocess
subprocess.run(["git", "init"], cwd=repo_path, check=True, capture_output=True)
subprocess.run(
["git", "config", "user.email", "test@test.com"],
cwd=repo_path, check=True, capture_output=True,
)
subprocess.run(
["git", "config", "user.name", "Test"],
cwd=repo_path, check=True, capture_output=True,
)
# Create src structure
src_path = repo_path / "src" / "myproject"
src_path.mkdir(parents=True)
(src_path / "__init__.py").write_text("")
(src_path / "app.py").write_text('''
"""Main application."""
def hello():
return "Hello"
''')
# Create tests
tests_path = repo_path / "tests"
tests_path.mkdir()
(tests_path / "test_app.py").write_text('''
"""Tests for app."""
from myproject.app import hello
def test_hello():
assert hello() == "Hello"
''')
# Initial commit
subprocess.run(["git", "add", "."], cwd=repo_path, check=True, capture_output=True)
subprocess.run(
["git", "commit", "-m", "Initial"],
cwd=repo_path, check=True, capture_output=True,
)
subprocess.run(
["git", "branch", "-M", "main"],
cwd=repo_path, check=True, capture_output=True,
)
yield repo_path
@pytest.fixture(autouse=True)
def mock_settings():
"""Mock settings to enable self-modification."""
with patch('tools.self_edit.settings') as mock_settings:
mock_settings.self_modify_enabled = True
yield mock_settings
@pytest.fixture
def mock_llm():
"""Create mock LLM adapter."""
mock = AsyncMock()
mock.chat.return_value = MagicMock(
content="""APPROACH: Add error handling
FILES_TO_MODIFY: src/myproject/app.py
FILES_TO_CREATE:
TESTS_TO_ADD: tests/test_app.py
EXPLANATION: Wrap function in try/except"""
)
return mock
@pytest.mark.asyncio
class TestSelfEditToolBasics:
"""Basic functionality tests."""
async def test_initialization(self, temp_repo):
"""Should initialize with services."""
tool = SelfEditTool(repo_path=temp_repo)
assert tool.repo_path == temp_repo
assert tool.git is not None
assert tool.indexer is not None
assert tool.journal is not None
assert tool.reflection is not None
async def test_preflight_checks_clean_repo(self, temp_repo):
"""Should pass preflight on clean repo."""
tool = SelfEditTool(repo_path=temp_repo)
assert await tool._preflight_checks() is True
async def test_preflight_checks_dirty_repo(self, temp_repo):
"""Should fail preflight on dirty repo."""
tool = SelfEditTool(repo_path=temp_repo)
# Make uncommitted change
(temp_repo / "dirty.txt").write_text("dirty")
assert await tool._preflight_checks() is False
async def test_preflight_checks_wrong_branch(self, temp_repo):
"""Should fail preflight when not on main."""
tool = SelfEditTool(repo_path=temp_repo)
# Create and checkout feature branch
import subprocess
subprocess.run(
["git", "checkout", "-b", "feature"],
cwd=temp_repo, check=True, capture_output=True,
)
assert await tool._preflight_checks() is False
@pytest.mark.asyncio
class TestSelfEditToolPlanning:
"""Edit planning tests."""
async def test_plan_edit_with_llm(self, temp_repo, mock_llm):
"""Should generate plan using LLM."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
await tool._ensure_indexed()
plan = await tool._plan_edit(
task_description="Add error handling",
relevant_files=["src/myproject/app.py"],
similar_attempts=[],
)
assert isinstance(plan, EditPlan)
assert plan.approach == "Add error handling"
assert "src/myproject/app.py" in plan.files_to_modify
async def test_plan_edit_without_llm(self, temp_repo):
"""Should generate fallback plan without LLM."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=None)
await tool._ensure_indexed()
plan = await tool._plan_edit(
task_description="Add feature",
relevant_files=["src/myproject/app.py"],
similar_attempts=[],
)
assert isinstance(plan, EditPlan)
assert len(plan.files_to_modify) > 0
async def test_plan_respects_max_files(self, temp_repo, mock_llm):
"""Plan should respect MAX_FILES_PER_COMMIT."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
await tool._ensure_indexed()
# Mock LLM to return many files
mock_llm.chat.return_value = MagicMock(
content="FILES_TO_MODIFY: " + ",".join([f"file{i}.py" for i in range(10)])
)
plan = await tool._plan_edit(
task_description="Test",
relevant_files=[f"file{i}.py" for i in range(10)],
similar_attempts=[],
)
assert len(plan.files_to_modify) <= MAX_FILES_PER_COMMIT
@pytest.mark.asyncio
class TestSelfEditToolValidation:
"""Safety constraint validation tests."""
async def test_validate_plan_too_many_files(self, temp_repo):
"""Should reject plan with too many files."""
tool = SelfEditTool(repo_path=temp_repo)
plan = EditPlan(
approach="Test",
files_to_modify=[f"file{i}.py" for i in range(MAX_FILES_PER_COMMIT + 1)],
files_to_create=[],
tests_to_add=[],
explanation="Test",
)
assert tool._validate_plan(plan) is False
async def test_validate_plan_protected_file(self, temp_repo):
"""Should reject plan modifying protected files."""
tool = SelfEditTool(repo_path=temp_repo)
plan = EditPlan(
approach="Test",
files_to_modify=["src/tools/self_edit.py"],
files_to_create=[],
tests_to_add=[],
explanation="Test",
)
assert tool._validate_plan(plan) is False
async def test_validate_plan_valid(self, temp_repo):
"""Should accept valid plan."""
tool = SelfEditTool(repo_path=temp_repo)
plan = EditPlan(
approach="Test",
files_to_modify=["src/myproject/app.py"],
files_to_create=[],
tests_to_add=[],
explanation="Test",
)
assert tool._validate_plan(plan) is True
@pytest.mark.asyncio
class TestSelfEditToolExecution:
"""Edit execution tests."""
async def test_strip_code_fences(self, temp_repo):
"""Should strip markdown code fences."""
tool = SelfEditTool(repo_path=temp_repo)
content = "```python\ndef test(): pass\n```"
result = tool._strip_code_fences(content)
assert "```" not in result
assert "def test(): pass" in result
async def test_parse_list(self, temp_repo):
"""Should parse comma-separated lists."""
tool = SelfEditTool(repo_path=temp_repo)
assert tool._parse_list("a, b, c") == ["a", "b", "c"]
assert tool._parse_list("none") == []
assert tool._parse_list("") == []
assert tool._parse_list("N/A") == []
@pytest.mark.asyncio
class TestSelfEditToolIntegration:
"""Integration tests with mocked dependencies."""
async def test_successful_edit_flow(self, temp_repo, mock_llm):
"""Test complete successful edit flow."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
# Mock Aider to succeed
with patch.object(tool, '_aider_available', return_value=False):
with patch.object(tool, '_execute_direct_edit') as mock_exec:
mock_exec.return_value = {
"success": True,
"test_output": "1 passed",
}
result = await tool.execute("Add error handling")
assert result.success is True
assert result.attempt_id is not None
async def test_failed_edit_with_rollback(self, temp_repo, mock_llm):
"""Test failed edit with rollback."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
# Mock execution to always fail
with patch.object(tool, '_execute_edit') as mock_exec:
mock_exec.return_value = {
"success": False,
"error": "Tests failed",
"test_output": "1 failed",
}
result = await tool.execute("Add broken feature")
assert result.success is False
assert result.attempt_id is not None
assert "failed" in result.message.lower() or "retry" in result.message.lower()
async def test_preflight_failure(self, temp_repo):
"""Should fail early if preflight checks fail."""
tool = SelfEditTool(repo_path=temp_repo)
# Make repo dirty
(temp_repo / "dirty.txt").write_text("dirty")
result = await tool.execute("Some task")
assert result.success is False
assert "pre-flight" in result.message.lower()
@pytest.mark.asyncio
class TestSelfEditMCPRegistration:
"""MCP tool registration tests."""
async def test_register_self_edit_tool(self):
"""Should register with MCP registry."""
mock_registry = MagicMock()
mock_llm = AsyncMock()
register_self_edit_tool(mock_registry, mock_llm)
mock_registry.register.assert_called_once()
call_args = mock_registry.register.call_args
assert call_args.kwargs["name"] == "self_edit"
assert call_args.kwargs["requires_confirmation"] is True
assert "self_coding" in call_args.kwargs["category"]
@pytest.mark.asyncio
class TestSelfEditGlobalTool:
"""Global tool instance tests."""
async def test_self_edit_tool_singleton(self, temp_repo):
"""Should use singleton pattern."""
from tools import self_edit as self_edit_module
# Reset singleton
self_edit_module._self_edit_tool = None
# First call should initialize
with patch.object(SelfEditTool, '__init__', return_value=None) as mock_init:
mock_init.return_value = None
with patch.object(SelfEditTool, 'execute') as mock_execute:
mock_execute.return_value = SelfEditResult(
success=True,
message="Test",
)
await self_edit_tool("Test task")
mock_init.assert_called_once()
mock_execute.assert_called_once()
@pytest.mark.asyncio
class TestSelfEditErrorHandling:
"""Error handling tests."""
async def test_exception_handling(self, temp_repo):
"""Should handle exceptions gracefully."""
tool = SelfEditTool(repo_path=temp_repo)
# Mock preflight to raise exception
with patch.object(tool, '_preflight_checks', side_effect=Exception("Unexpected")):
result = await tool.execute("Test task")
assert result.success is False
assert "exception" in result.message.lower()
async def test_llm_failure_fallback(self, temp_repo, mock_llm):
"""Should fallback when LLM fails."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
await tool._ensure_indexed()
# Mock LLM to fail
mock_llm.chat.side_effect = Exception("LLM timeout")
plan = await tool._plan_edit(
task_description="Test",
relevant_files=["src/app.py"],
similar_attempts=[],
)
# Should return fallback plan
assert isinstance(plan, EditPlan)
assert len(plan.files_to_modify) > 0