Merge pull request #54 from AlexanderWhitestone/feature/self-coding-rebased
Feature/self coding rebased
This commit is contained in:
@@ -35,6 +35,7 @@ from dashboard.routes.upgrades import router as upgrades_router
|
||||
from dashboard.routes.work_orders import router as work_orders_router
|
||||
from dashboard.routes.tasks import router as tasks_router
|
||||
from dashboard.routes.scripture import router as scripture_router
|
||||
from dashboard.routes.self_coding import router as self_coding_router
|
||||
from router.api import router as cascade_router
|
||||
|
||||
logging.basicConfig(
|
||||
@@ -199,6 +200,7 @@ app.include_router(upgrades_router)
|
||||
app.include_router(work_orders_router)
|
||||
app.include_router(tasks_router)
|
||||
app.include_router(scripture_router)
|
||||
app.include_router(self_coding_router)
|
||||
app.include_router(cascade_router)
|
||||
|
||||
|
||||
|
||||
368
src/dashboard/routes/self_coding.py
Normal file
368
src/dashboard/routes/self_coding.py
Normal file
@@ -0,0 +1,368 @@
|
||||
"""Self-Coding Dashboard Routes.
|
||||
|
||||
API endpoints and HTMX views for the self-coding system:
|
||||
- Journal viewer with filtering
|
||||
- Stats dashboard
|
||||
- Manual task execution
|
||||
- Real-time status updates
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Form, Request
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from self_coding import (
|
||||
CodebaseIndexer,
|
||||
ModificationJournal,
|
||||
Outcome,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/self-coding", tags=["self_coding"])
|
||||
|
||||
|
||||
# ── API Models ────────────────────────────────────────────────────────────
|
||||
|
||||
class JournalEntryResponse(BaseModel):
|
||||
"""A journal entry for API response."""
|
||||
id: int
|
||||
timestamp: str
|
||||
task_description: str
|
||||
approach: str
|
||||
files_modified: list[str]
|
||||
outcome: str
|
||||
retry_count: int
|
||||
has_reflection: bool
|
||||
|
||||
|
||||
class StatsResponse(BaseModel):
|
||||
"""Self-coding stats for API response."""
|
||||
total_attempts: int
|
||||
success_count: int
|
||||
failure_count: int
|
||||
rollback_count: int
|
||||
success_rate: float
|
||||
recent_failures: list[JournalEntryResponse]
|
||||
|
||||
|
||||
class ExecuteRequest(BaseModel):
|
||||
"""Request to execute a self-edit task."""
|
||||
task_description: str
|
||||
|
||||
|
||||
class ExecuteResponse(BaseModel):
|
||||
"""Response from executing a self-edit task."""
|
||||
success: bool
|
||||
message: str
|
||||
attempt_id: Optional[int] = None
|
||||
files_modified: list[str] = []
|
||||
commit_hash: Optional[str] = None
|
||||
|
||||
|
||||
# ── Services (initialized lazily) ─────────────────────────────────────────
|
||||
|
||||
_journal: Optional[ModificationJournal] = None
|
||||
_indexer: Optional[CodebaseIndexer] = None
|
||||
|
||||
|
||||
def get_journal() -> ModificationJournal:
|
||||
"""Get or create ModificationJournal singleton."""
|
||||
global _journal
|
||||
if _journal is None:
|
||||
_journal = ModificationJournal()
|
||||
return _journal
|
||||
|
||||
|
||||
def get_indexer() -> CodebaseIndexer:
|
||||
"""Get or create CodebaseIndexer singleton."""
|
||||
global _indexer
|
||||
if _indexer is None:
|
||||
_indexer = CodebaseIndexer()
|
||||
return _indexer
|
||||
|
||||
|
||||
# ── API Endpoints ─────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/api/journal", response_model=list[JournalEntryResponse])
|
||||
async def api_journal_list(
|
||||
limit: int = 50,
|
||||
outcome: Optional[str] = None,
|
||||
):
|
||||
"""Get modification journal entries.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of entries to return
|
||||
outcome: Filter by outcome (success, failure, rollback)
|
||||
"""
|
||||
journal = get_journal()
|
||||
|
||||
# Build query based on filters
|
||||
if outcome:
|
||||
try:
|
||||
outcome_enum = Outcome(outcome)
|
||||
# Get recent and filter
|
||||
from self_coding.modification_journal import ModificationAttempt
|
||||
# Note: This is a simplified query - in production you'd add
|
||||
# proper filtering to the journal class
|
||||
entries = []
|
||||
# Placeholder for filtered query
|
||||
except ValueError:
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={"error": f"Invalid outcome: {outcome}"},
|
||||
)
|
||||
|
||||
# For now, return recent failures mixed with successes
|
||||
recent = await journal.get_recent_failures(limit=limit)
|
||||
|
||||
# Also get some successes
|
||||
# Note: We'd need to add a method to journal for this
|
||||
# For now, return what we have
|
||||
|
||||
response = []
|
||||
for entry in recent:
|
||||
response.append(JournalEntryResponse(
|
||||
id=entry.id or 0,
|
||||
timestamp=entry.timestamp.isoformat() if entry.timestamp else "",
|
||||
task_description=entry.task_description,
|
||||
approach=entry.approach,
|
||||
files_modified=entry.files_modified,
|
||||
outcome=entry.outcome.value,
|
||||
retry_count=entry.retry_count,
|
||||
has_reflection=bool(entry.reflection),
|
||||
))
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@router.get("/api/journal/{attempt_id}", response_model=dict)
|
||||
async def api_journal_detail(attempt_id: int):
|
||||
"""Get detailed information about a specific attempt."""
|
||||
journal = get_journal()
|
||||
entry = await journal.get_by_id(attempt_id)
|
||||
|
||||
if not entry:
|
||||
return JSONResponse(
|
||||
status_code=404,
|
||||
content={"error": "Attempt not found"},
|
||||
)
|
||||
|
||||
return {
|
||||
"id": entry.id,
|
||||
"timestamp": entry.timestamp.isoformat() if entry.timestamp else "",
|
||||
"task_description": entry.task_description,
|
||||
"approach": entry.approach,
|
||||
"files_modified": entry.files_modified,
|
||||
"diff": entry.diff,
|
||||
"test_results": entry.test_results,
|
||||
"outcome": entry.outcome.value,
|
||||
"failure_analysis": entry.failure_analysis,
|
||||
"reflection": entry.reflection,
|
||||
"retry_count": entry.retry_count,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/stats", response_model=StatsResponse)
|
||||
async def api_stats():
|
||||
"""Get self-coding statistics."""
|
||||
journal = get_journal()
|
||||
|
||||
metrics = await journal.get_success_rate()
|
||||
recent_failures = await journal.get_recent_failures(limit=5)
|
||||
|
||||
return StatsResponse(
|
||||
total_attempts=metrics["total"],
|
||||
success_count=metrics["success"],
|
||||
failure_count=metrics["failure"],
|
||||
rollback_count=metrics["rollback"],
|
||||
success_rate=metrics["overall"],
|
||||
recent_failures=[
|
||||
JournalEntryResponse(
|
||||
id=f.id or 0,
|
||||
timestamp=f.timestamp.isoformat() if f.timestamp else "",
|
||||
task_description=f.task_description,
|
||||
approach=f.approach,
|
||||
files_modified=f.files_modified,
|
||||
outcome=f.outcome.value,
|
||||
retry_count=f.retry_count,
|
||||
has_reflection=bool(f.reflection),
|
||||
)
|
||||
for f in recent_failures
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@router.post("/api/execute", response_model=ExecuteResponse)
|
||||
async def api_execute(request: ExecuteRequest):
|
||||
"""Execute a self-edit task.
|
||||
|
||||
This is the API endpoint for manual task execution.
|
||||
In production, this should require authentication and confirmation.
|
||||
"""
|
||||
from tools.self_edit import SelfEditTool
|
||||
|
||||
tool = SelfEditTool()
|
||||
result = await tool.execute(request.task_description)
|
||||
|
||||
return ExecuteResponse(
|
||||
success=result.success,
|
||||
message=result.message,
|
||||
attempt_id=result.attempt_id,
|
||||
files_modified=result.files_modified,
|
||||
commit_hash=result.commit_hash,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/api/codebase/summary")
|
||||
async def api_codebase_summary():
|
||||
"""Get codebase summary for LLM context."""
|
||||
indexer = get_indexer()
|
||||
await indexer.index_changed()
|
||||
|
||||
summary = await indexer.get_summary(max_tokens=3000)
|
||||
|
||||
return {
|
||||
"summary": summary,
|
||||
"generated_at": "",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/api/codebase/reindex")
|
||||
async def api_codebase_reindex():
|
||||
"""Trigger a full codebase reindex."""
|
||||
indexer = get_indexer()
|
||||
stats = await indexer.index_all()
|
||||
|
||||
return {
|
||||
"indexed": stats["indexed"],
|
||||
"failed": stats["failed"],
|
||||
"skipped": stats["skipped"],
|
||||
}
|
||||
|
||||
|
||||
# ── HTMX Page Routes ──────────────────────────────────────────────────────
|
||||
|
||||
@router.get("", response_class=HTMLResponse)
|
||||
async def self_coding_page(request: Request):
|
||||
"""Main self-coding dashboard page."""
|
||||
from dashboard.app import templates
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"self_coding.html",
|
||||
{
|
||||
"request": request,
|
||||
"title": "Self-Coding",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/journal", response_class=HTMLResponse)
|
||||
async def journal_partial(
|
||||
request: Request,
|
||||
outcome: Optional[str] = None,
|
||||
limit: int = 20,
|
||||
):
|
||||
"""HTMX partial for journal entries."""
|
||||
from dashboard.app import templates
|
||||
|
||||
journal = get_journal()
|
||||
|
||||
# Get entries (simplified - in production, add proper filtering)
|
||||
if outcome == "failure":
|
||||
entries = await journal.get_recent_failures(limit=limit)
|
||||
else:
|
||||
# Get all recent
|
||||
entries = await journal.get_recent_failures(limit=limit)
|
||||
# TODO: Add method to get successes too
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"partials/journal_entries.html",
|
||||
{
|
||||
"request": request,
|
||||
"entries": entries,
|
||||
"outcome_filter": outcome,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/stats", response_class=HTMLResponse)
|
||||
async def stats_partial(request: Request):
|
||||
"""HTMX partial for stats cards."""
|
||||
from dashboard.app import templates
|
||||
|
||||
journal = get_journal()
|
||||
metrics = await journal.get_success_rate()
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"partials/self_coding_stats.html",
|
||||
{
|
||||
"request": request,
|
||||
"metrics": metrics,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/execute-form", response_class=HTMLResponse)
|
||||
async def execute_form_partial(request: Request):
|
||||
"""HTMX partial for execute task form."""
|
||||
from dashboard.app import templates
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"partials/execute_form.html",
|
||||
{
|
||||
"request": request,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.post("/execute", response_class=HTMLResponse)
|
||||
async def execute_task(
|
||||
request: Request,
|
||||
task_description: str = Form(...),
|
||||
):
|
||||
"""HTMX endpoint to execute a task."""
|
||||
from dashboard.app import templates
|
||||
from tools.self_edit import SelfEditTool
|
||||
|
||||
tool = SelfEditTool()
|
||||
result = await tool.execute(task_description)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"partials/execute_result.html",
|
||||
{
|
||||
"request": request,
|
||||
"result": result,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/journal/{attempt_id}/detail", response_class=HTMLResponse)
|
||||
async def journal_entry_detail(request: Request, attempt_id: int):
|
||||
"""HTMX partial for journal entry detail."""
|
||||
from dashboard.app import templates
|
||||
|
||||
journal = get_journal()
|
||||
entry = await journal.get_by_id(attempt_id)
|
||||
|
||||
if not entry:
|
||||
return templates.TemplateResponse(
|
||||
"partials/error.html",
|
||||
{
|
||||
"request": request,
|
||||
"message": "Attempt not found",
|
||||
},
|
||||
)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"partials/journal_entry_detail.html",
|
||||
{
|
||||
"request": request,
|
||||
"entry": entry,
|
||||
},
|
||||
)
|
||||
@@ -40,6 +40,7 @@
|
||||
<a href="/memory" class="mc-test-link">MEMORY</a>
|
||||
<a href="/router/status" class="mc-test-link">ROUTER</a>
|
||||
<a href="/self-modify/queue" class="mc-test-link">UPGRADES</a>
|
||||
<a href="/self-coding" class="mc-test-link">SELF-CODING</a>
|
||||
<a href="/work-orders/queue" class="mc-test-link">WORK ORDERS</a>
|
||||
<a href="/creative/ui" class="mc-test-link">CREATIVE</a>
|
||||
<a href="/mobile" class="mc-test-link" title="Mobile-optimized view">MOBILE</a>
|
||||
@@ -71,6 +72,7 @@
|
||||
<a href="/lightning/ledger" class="mc-mobile-link">LEDGER</a>
|
||||
<a href="/memory" class="mc-mobile-link">MEMORY</a>
|
||||
<a href="/work-orders/queue" class="mc-mobile-link">WORK ORDERS</a>
|
||||
<a href="/self-coding" class="mc-mobile-link">SELF-CODING</a>
|
||||
<a href="/creative/ui" class="mc-mobile-link">CREATIVE</a>
|
||||
<a href="/voice/button" class="mc-mobile-link">VOICE</a>
|
||||
<a href="/mobile" class="mc-mobile-link">MOBILE</a>
|
||||
|
||||
7
src/dashboard/templates/partials/error.html
Normal file
7
src/dashboard/templates/partials/error.html
Normal file
@@ -0,0 +1,7 @@
|
||||
{# Error partial #}
|
||||
<div class="alert alert-danger mb-0">
|
||||
<div class="d-flex align-items-center gap-2">
|
||||
<span>⚠️</span>
|
||||
<span>{{ message }}</span>
|
||||
</div>
|
||||
</div>
|
||||
45
src/dashboard/templates/partials/execute_form.html
Normal file
45
src/dashboard/templates/partials/execute_form.html
Normal file
@@ -0,0 +1,45 @@
|
||||
{# Execute task form partial #}
|
||||
<form hx-post="/self-coding/execute" hx-target="#execute-result" hx-indicator="#execute-loading">
|
||||
<div class="mb-3">
|
||||
<label for="task-description" class="form-label">Task Description</label>
|
||||
<textarea
|
||||
class="form-control form-control-sm bg-dark text-light border-secondary"
|
||||
id="task-description"
|
||||
name="task_description"
|
||||
rows="4"
|
||||
placeholder="Describe what you want Timmy to do...
|
||||
|
||||
Example: Add error handling to the /health endpoint that returns 503 when Ollama is unreachable."
|
||||
required
|
||||
></textarea>
|
||||
<div class="form-text">
|
||||
Be specific. Include what to change and what the expected behavior should be.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="alert alert-warning d-flex align-items-start gap-2 py-2">
|
||||
<span>⚠️</span>
|
||||
<small>
|
||||
<strong>Warning:</strong> This will modify source code. Changes will be tested and committed.
|
||||
Safety constraints: max 3 files, only files with tests, protected files cannot be modified.
|
||||
</small>
|
||||
</div>
|
||||
|
||||
<div class="d-flex justify-content-end gap-2">
|
||||
<button type="button" class="btn btn-sm btn-outline-secondary" onclick="document.getElementById('execute-modal').close()">
|
||||
Cancel
|
||||
</button>
|
||||
<button type="submit" class="btn btn-sm btn-primary" onclick="document.getElementById('execute-modal').close()">
|
||||
Execute Task
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<div id="execute-loading" class="htmx-indicator mt-3">
|
||||
<div class="d-flex align-items-center gap-2 text-muted">
|
||||
<div class="spinner-border spinner-border-sm" role="status"></div>
|
||||
<small>Executing self-edit task... This may take a few minutes.</small>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="execute-result" class="mt-3"></div>
|
||||
58
src/dashboard/templates/partials/execute_result.html
Normal file
58
src/dashboard/templates/partials/execute_result.html
Normal file
@@ -0,0 +1,58 @@
|
||||
{# Execute task result partial #}
|
||||
<div class="alert {% if result.success %}alert-success{% else %}alert-danger{% endif %} mb-0">
|
||||
<div class="d-flex align-items-start gap-2">
|
||||
<span class="fs-5">{% if result.success %}✅{% else %}❌{% endif %}</span>
|
||||
<div>
|
||||
<h6 class="alert-heading mb-1">
|
||||
{% if result.success %}Success!{% else %}Failed{% endif %}
|
||||
</h6>
|
||||
<p class="mb-0 small">{{ result.message }}</p>
|
||||
|
||||
{% if result.success %}
|
||||
{% if result.files_modified %}
|
||||
<div class="mt-2">
|
||||
<small class="text-muted">Files modified:</small>
|
||||
<ul class="list-unstyled mb-0 small">
|
||||
{% for file in result.files_modified %}
|
||||
<li><code>{{ file }}</code></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if result.commit_hash %}
|
||||
<div class="mt-2 small">
|
||||
<span class="text-muted">Commit:</span>
|
||||
<code>{{ result.commit_hash[:8] }}</code>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if result.attempt_id %}
|
||||
<div class="mt-2">
|
||||
<a href="/self-coding#journal-{{ result.attempt_id }}" class="btn btn-sm btn-outline-success">
|
||||
View in Journal
|
||||
</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{% if result.test_results %}
|
||||
<div class="mt-2">
|
||||
<small class="text-muted">Test output:</small>
|
||||
<pre class="small bg-black bg-opacity-25 p-2 rounded mb-0 mt-1 overflow-auto" style="max-height: 150px;"><code>{{ result.test_results[:500] }}{% if result.test_results|length > 500 %}...{% endif %}</code></pre>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{# Refresh journal and stats after execution #}
|
||||
{% if result.success %}
|
||||
<script>
|
||||
// Refresh journal and stats after successful execution
|
||||
setTimeout(() => {
|
||||
htmx.ajax('GET', '/self-coding/journal', { target: '#journal-container' });
|
||||
htmx.ajax('GET', '/self-coding/stats', { target: '#stats-container' });
|
||||
}, 500);
|
||||
</script>
|
||||
{% endif %}
|
||||
64
src/dashboard/templates/partials/journal_entries.html
Normal file
64
src/dashboard/templates/partials/journal_entries.html
Normal file
@@ -0,0 +1,64 @@
|
||||
{# Journal entries list partial #}
|
||||
{% if entries %}
|
||||
<div class="list-group list-group-flush">
|
||||
{% for entry in entries %}
|
||||
<div class="list-group-item journal-entry {{ entry.outcome.value }} p-3"
|
||||
hx-get="/self-coding/journal/{{ entry.id }}/detail"
|
||||
hx-target="#journal-detail-{{ entry.id }}"
|
||||
hx-swap="innerHTML"
|
||||
style="cursor: pointer;">
|
||||
|
||||
<div class="d-flex justify-content-between align-items-start mb-2">
|
||||
<div class="d-flex align-items-center gap-2">
|
||||
{# Outcome icon #}
|
||||
{% if entry.outcome.value == 'success' %}
|
||||
<span class="badge bg-success">✓</span>
|
||||
{% elif entry.outcome.value == 'failure' %}
|
||||
<span class="badge bg-danger">✗</span>
|
||||
{% else %}
|
||||
<span class="badge bg-warning text-dark">↩</span>
|
||||
{% endif %}
|
||||
|
||||
<span class="text-muted small">
|
||||
#{{ entry.id }}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<small class="text-muted">
|
||||
{{ entry.timestamp.strftime('%Y-%m-%d %H:%M') if entry.timestamp else 'Unknown' }}
|
||||
</small>
|
||||
</div>
|
||||
|
||||
<p class="mb-1 fw-medium">{{ entry.task_description }}</p>
|
||||
|
||||
<div class="d-flex justify-content-between align-items-center">
|
||||
<div class="small text-muted">
|
||||
{% if entry.files_modified %}
|
||||
<span class="me-2">📁 {{ entry.files_modified|length }} file(s)</span>
|
||||
{% endif %}
|
||||
|
||||
{% if entry.retry_count > 0 %}
|
||||
<span class="me-2">🔄 {{ entry.retry_count }} retries</span>
|
||||
{% endif %}
|
||||
|
||||
{% if entry.reflection %}
|
||||
<span title="Has reflection">💡</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<span class="badge {% if entry.outcome.value == 'success' %}bg-success{% elif entry.outcome.value == 'failure' %}bg-danger{% else %}bg-warning text-dark{% endif %}">
|
||||
{{ entry.outcome.value|upper }}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{# Detail container - populated on click #}
|
||||
<div id="journal-detail-{{ entry.id }}" class="mt-3"></div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="text-center py-5 text-muted">
|
||||
<p class="mb-0">No journal entries found.</p>
|
||||
<small>Self-edit attempts will appear here.</small>
|
||||
</div>
|
||||
{% endif %}
|
||||
54
src/dashboard/templates/partials/journal_entry_detail.html
Normal file
54
src/dashboard/templates/partials/journal_entry_detail.html
Normal file
@@ -0,0 +1,54 @@
|
||||
{# Journal entry detail partial #}
|
||||
<div class="card mt-3 bg-dark-subtle border-0">
|
||||
<div class="card-body">
|
||||
<h6 class="card-subtitle mb-3 text-muted">Attempt Details</h6>
|
||||
|
||||
{% if entry.approach %}
|
||||
<div class="mb-3">
|
||||
<small class="text-muted">Approach:</small>
|
||||
<p class="mb-0">{{ entry.approach }}</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if entry.files_modified %}
|
||||
<div class="mb-3">
|
||||
<small class="text-muted">Files Modified:</small>
|
||||
<ul class="list-unstyled mb-0">
|
||||
{% for file in entry.files_modified %}
|
||||
<li><code class="small">{{ file }}</code></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if entry.diff %}
|
||||
<div class="mb-3">
|
||||
<small class="text-muted">Diff:</small>
|
||||
<pre class="small bg-black p-2 rounded overflow-auto" style="max-height: 200px;"><code>{{ entry.diff[:500] }}{% if entry.diff|length > 500 %}...{% endif %}</code></pre>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if entry.test_results %}
|
||||
<div class="mb-3">
|
||||
<small class="text-muted">Test Results:</small>
|
||||
<pre class="small bg-black p-2 rounded overflow-auto" style="max-height: 150px;"><code>{{ entry.test_results[:500] }}{% if entry.test_results|length > 500 %}...{% endif %}</code></pre>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if entry.failure_analysis %}
|
||||
<div class="mb-3">
|
||||
<small class="text-danger">Failure Analysis:</small>
|
||||
<p class="mb-0 text-danger-emphasis">{{ entry.failure_analysis }}</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if entry.reflection %}
|
||||
<div class="mb-0">
|
||||
<small class="text-info">Reflection:</small>
|
||||
<div class="p-2 bg-info-subtle rounded">
|
||||
{{ entry.reflection|markdown }}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
71
src/dashboard/templates/partials/self_coding_stats.html
Normal file
71
src/dashboard/templates/partials/self_coding_stats.html
Normal file
@@ -0,0 +1,71 @@
|
||||
{# Stats cards partial for self-coding dashboard #}
|
||||
<div class="row g-3">
|
||||
<!-- Total Attempts -->
|
||||
<div class="col-md-3 col-6">
|
||||
<div class="card border-0 shadow-sm stat-card h-100">
|
||||
<div class="card-body">
|
||||
<div class="d-flex justify-content-between align-items-start">
|
||||
<div>
|
||||
<h6 class="text-muted small mb-1">Total Attempts</h6>
|
||||
<h3 class="mb-0">{{ metrics.total }}</h3>
|
||||
</div>
|
||||
<span class="fs-4">📝</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Success Rate -->
|
||||
<div class="col-md-3 col-6">
|
||||
<div class="card border-0 shadow-sm stat-card h-100">
|
||||
<div class="card-body">
|
||||
<div class="d-flex justify-content-between align-items-start">
|
||||
<div>
|
||||
<h6 class="text-muted small mb-1">Success Rate</h6>
|
||||
<h3 class="mb-0 {% if metrics.overall >= 0.7 %}text-success{% elif metrics.overall >= 0.4 %}text-warning{% else %}text-danger{% endif %}">
|
||||
{{ "%.0f"|format(metrics.overall * 100) }}%
|
||||
</h3>
|
||||
</div>
|
||||
<span class="fs-4">📊</span>
|
||||
</div>
|
||||
<div class="progress mt-2" style="height: 4px;">
|
||||
<div class="progress-bar {% if metrics.overall >= 0.7 %}bg-success{% elif metrics.overall >= 0.4 %}bg-warning{% else %}bg-danger{% endif %}"
|
||||
style="width: {{ metrics.overall * 100 }}%"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Successes -->
|
||||
<div class="col-md-3 col-6">
|
||||
<div class="card border-0 shadow-sm stat-card h-100 border-start border-3 border-success">
|
||||
<div class="card-body">
|
||||
<div class="d-flex justify-content-between align-items-start">
|
||||
<div>
|
||||
<h6 class="text-muted small mb-1">Successes</h6>
|
||||
<h3 class="mb-0 text-success">{{ metrics.success }}</h3>
|
||||
</div>
|
||||
<span class="fs-4">✅</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Failures -->
|
||||
<div class="col-md-3 col-6">
|
||||
<div class="card border-0 shadow-sm stat-card h-100 border-start border-3 border-danger">
|
||||
<div class="card-body">
|
||||
<div class="d-flex justify-content-between align-items-start">
|
||||
<div>
|
||||
<h6 class="text-muted small mb-1">Failures</h6>
|
||||
<h3 class="mb-0 text-danger">{{ metrics.failure + metrics.rollback }}</h3>
|
||||
</div>
|
||||
<span class="fs-4">❌</span>
|
||||
</div>
|
||||
<small class="text-muted">
|
||||
{{ metrics.failure }} fail / {{ metrics.rollback }} rollback
|
||||
</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
184
src/dashboard/templates/self_coding.html
Normal file
184
src/dashboard/templates/self_coding.html
Normal file
@@ -0,0 +1,184 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Self-Coding — Timmy Time{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container-fluid py-4">
|
||||
<!-- Header -->
|
||||
<div class="d-flex justify-content-between align-items-center mb-4">
|
||||
<div>
|
||||
<h1 class="h3 mb-0">Self-Coding</h1>
|
||||
<p class="text-muted small mb-0">Timmy's ability to modify its own source code</p>
|
||||
</div>
|
||||
<div class="d-flex gap-2">
|
||||
<button class="btn btn-sm btn-outline-info" hx-get="/self-coding/stats" hx-target="#stats-container" hx-indicator="#stats-loading">
|
||||
Refresh Stats
|
||||
</button>
|
||||
<button class="btn btn-sm btn-primary" hx-get="/self-coding/execute-form" hx-target="#execute-modal-content" onclick="document.getElementById('execute-modal').showModal()">
|
||||
+ New Task
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Stats Cards -->
|
||||
<div id="stats-container" hx-get="/self-coding/stats" hx-trigger="load">
|
||||
<div id="stats-loading" class="htmx-indicator">
|
||||
<div class="d-flex justify-content-center py-4">
|
||||
<div class="spinner-border text-info" role="status">
|
||||
<span class="visually-hidden">Loading stats...</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Main Content Grid -->
|
||||
<div class="row g-4 mt-2">
|
||||
<!-- Left Column: Journal -->
|
||||
<div class="col-lg-8">
|
||||
<div class="card border-0 shadow-sm">
|
||||
<div class="card-header bg-transparent border-secondary d-flex justify-content-between align-items-center">
|
||||
<h5 class="mb-0">Modification Journal</h5>
|
||||
<div class="btn-group btn-group-sm">
|
||||
<button class="btn btn-outline-secondary active" hx-get="/self-coding/journal" hx-target="#journal-container">All</button>
|
||||
<button class="btn btn-outline-secondary" hx-get="/self-coding/journal?outcome=success" hx-target="#journal-container">Success</button>
|
||||
<button class="btn btn-outline-secondary" hx-get="/self-coding/journal?outcome=failure" hx-target="#journal-container">Failed</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body p-0">
|
||||
<div id="journal-container" hx-get="/self-coding/journal" hx-trigger="load" class="journal-list">
|
||||
<div class="d-flex justify-content-center py-5">
|
||||
<div class="spinner-border text-info" role="status">
|
||||
<span class="visually-hidden">Loading journal...</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Right Column: Quick Actions & Info -->
|
||||
<div class="col-lg-4">
|
||||
<!-- Quick Actions -->
|
||||
<div class="card border-0 shadow-sm mb-4">
|
||||
<div class="card-header bg-transparent border-secondary">
|
||||
<h5 class="mb-0">Quick Actions</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="d-grid gap-2">
|
||||
<button class="btn btn-outline-info" hx-post="/self-coding/api/codebase/reindex" hx-swap="none" hx-confirm="Reindex codebase? This may take a moment.">
|
||||
🔄 Reindex Codebase
|
||||
</button>
|
||||
<a href="/self-coding/api/codebase/summary" target="_blank" class="btn btn-outline-secondary">
|
||||
📄 View Codebase Summary
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Safety Info -->
|
||||
<div class="card border-0 shadow-sm mb-4">
|
||||
<div class="card-header bg-transparent border-secondary">
|
||||
<h5 class="mb-0">Safety Constraints</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<ul class="list-unstyled small mb-0">
|
||||
<li class="mb-2">✓ Max 3 files per commit</li>
|
||||
<li class="mb-2">✓ Max 100 lines changed</li>
|
||||
<li class="mb-2">✓ Only files with test coverage</li>
|
||||
<li class="mb-2">✓ Max 3 retries on failure</li>
|
||||
<li class="mb-2">✓ Protected files cannot be modified</li>
|
||||
<li>✓ All changes on feature branches</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- How It Works -->
|
||||
<div class="card border-0 shadow-sm">
|
||||
<div class="card-header bg-transparent border-secondary">
|
||||
<h5 class="mb-0">How It Works</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<ol class="small mb-0">
|
||||
<li class="mb-2">Receive task description</li>
|
||||
<li class="mb-2">Find relevant files via indexer</li>
|
||||
<li class="mb-2">Check journal for similar attempts</li>
|
||||
<li class="mb-2">Create feature branch</li>
|
||||
<li class="mb-2">Plan edit with LLM</li>
|
||||
<li class="mb-2">Execute via Aider or direct edit</li>
|
||||
<li class="mb-2">Run tests</li>
|
||||
<li class="mb-2">Commit on success, rollback on failure</li>
|
||||
<li>Log attempt and reflect</li>
|
||||
</ol>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Execute Modal -->
|
||||
<dialog id="execute-modal" class="rounded border-0 shadow-lg" style="max-width: 600px; width: 90%; background: var(--bs-body-bg);">
|
||||
<div class="p-4">
|
||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||
<h5 class="mb-0">Execute Self-Edit Task</h5>
|
||||
<button type="button" class="btn-close" onclick="document.getElementById('execute-modal').close()"></button>
|
||||
</div>
|
||||
<div id="execute-modal-content">
|
||||
<!-- Form loaded via HTMX -->
|
||||
</div>
|
||||
</div>
|
||||
</dialog>
|
||||
|
||||
<style>
|
||||
.journal-list {
|
||||
max-height: 600px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.journal-entry {
|
||||
border-left: 3px solid transparent;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.journal-entry:hover {
|
||||
background-color: rgba(255, 255, 255, 0.03);
|
||||
}
|
||||
|
||||
.journal-entry.success {
|
||||
border-left-color: #198754;
|
||||
}
|
||||
|
||||
.journal-entry.failure {
|
||||
border-left-color: #dc3545;
|
||||
}
|
||||
|
||||
.journal-entry.rollback {
|
||||
border-left-color: #fd7e14;
|
||||
}
|
||||
|
||||
.stat-card {
|
||||
transition: transform 0.2s ease;
|
||||
}
|
||||
|
||||
.stat-card:hover {
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
/* Custom scrollbar for journal */
|
||||
.journal-list::-webkit-scrollbar {
|
||||
width: 6px;
|
||||
}
|
||||
|
||||
.journal-list::-webkit-scrollbar-track {
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
}
|
||||
|
||||
.journal-list::-webkit-scrollbar-thumb {
|
||||
background: rgba(255, 255, 255, 0.2);
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
.journal-list::-webkit-scrollbar-thumb:hover {
|
||||
background: rgba(255, 255, 255, 0.3);
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
824
src/tools/self_edit.py
Normal file
824
src/tools/self_edit.py
Normal file
@@ -0,0 +1,824 @@
|
||||
"""Self-Edit MCP Tool — Timmy's ability to modify its own source code.
|
||||
|
||||
This is the core self-modification orchestrator that:
|
||||
1. Receives task descriptions
|
||||
2. Queries codebase indexer for relevant files
|
||||
3. Queries modification journal for similar past attempts
|
||||
4. Creates feature branches via GitSafety
|
||||
5. Plans changes with LLM
|
||||
6. Executes via Aider (preferred) or direct editing (fallback)
|
||||
7. Runs tests via pytest
|
||||
8. Commits on success, rolls back on failure
|
||||
9. Logs outcomes to ModificationJournal
|
||||
10. Generates reflections
|
||||
|
||||
Usage:
|
||||
from tools.self_edit import self_edit_tool
|
||||
from mcp.registry import tool_registry
|
||||
|
||||
# Register with MCP
|
||||
tool_registry.register("self_edit", self_edit_schema, self_edit_tool)
|
||||
|
||||
# Invoke
|
||||
result = await tool_registry.execute("self_edit", {
|
||||
"task_description": "Add error handling to health endpoint"
|
||||
})
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
from config import settings
|
||||
|
||||
# Phase 1 imports
|
||||
from self_coding import (
|
||||
CodebaseIndexer,
|
||||
GitSafety,
|
||||
ModificationAttempt,
|
||||
ModificationJournal,
|
||||
Outcome,
|
||||
ReflectionService,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Safety constraints (Phase 1 hard limits)
|
||||
MAX_FILES_PER_COMMIT = 3
|
||||
MAX_LINES_CHANGED = 100
|
||||
PROTECTED_FILES = {
|
||||
"src/tools/self_edit.py",
|
||||
"src/self_coding/git_safety.py",
|
||||
"src/self_coding/codebase_indexer.py",
|
||||
"src/self_coding/modification_journal.py",
|
||||
"src/self_coding/reflection.py",
|
||||
}
|
||||
MAX_RETRIES = 3
|
||||
|
||||
|
||||
@dataclass
|
||||
class SelfEditResult:
|
||||
"""Result of a self-edit operation."""
|
||||
success: bool
|
||||
message: str
|
||||
attempt_id: Optional[int] = None
|
||||
files_modified: list[str] = field(default_factory=list)
|
||||
commit_hash: Optional[str] = None
|
||||
test_results: str = ""
|
||||
diff: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class EditPlan:
|
||||
"""Plan for a self-edit operation."""
|
||||
approach: str
|
||||
files_to_modify: list[str]
|
||||
files_to_create: list[str]
|
||||
tests_to_add: list[str]
|
||||
explanation: str
|
||||
|
||||
|
||||
class SelfEditTool:
|
||||
"""Self-modification orchestrator.
|
||||
|
||||
This class encapsulates the complete self-edit workflow:
|
||||
- Pre-flight checks
|
||||
- Context gathering (indexer + journal)
|
||||
- Branch creation
|
||||
- Edit planning (LLM)
|
||||
- Execution (Aider or direct)
|
||||
- Testing
|
||||
- Commit/rollback
|
||||
- Logging and reflection
|
||||
|
||||
Usage:
|
||||
tool = SelfEditTool(repo_path="/path/to/repo")
|
||||
result = await tool.execute("Add error handling to health endpoint")
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
repo_path: Optional[Path] = None,
|
||||
llm_adapter: Optional[object] = None,
|
||||
) -> None:
|
||||
"""Initialize SelfEditTool.
|
||||
|
||||
Args:
|
||||
repo_path: Path to repository. Defaults to current directory.
|
||||
llm_adapter: LLM adapter for planning and reflection
|
||||
"""
|
||||
self.repo_path = Path(repo_path) if repo_path else Path.cwd()
|
||||
self.llm_adapter = llm_adapter
|
||||
|
||||
# Initialize Phase 1 services
|
||||
self.git = GitSafety(repo_path=self.repo_path)
|
||||
self.indexer = CodebaseIndexer(repo_path=self.repo_path)
|
||||
self.journal = ModificationJournal()
|
||||
self.reflection = ReflectionService(llm_adapter=llm_adapter)
|
||||
|
||||
# Ensure codebase is indexed
|
||||
self._indexing_done = False
|
||||
|
||||
logger.info("SelfEditTool initialized for %s", self.repo_path)
|
||||
|
||||
async def _ensure_indexed(self) -> None:
|
||||
"""Ensure codebase is indexed."""
|
||||
if not self._indexing_done:
|
||||
await self.indexer.index_changed()
|
||||
self._indexing_done = True
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
task_description: str,
|
||||
context: Optional[dict] = None,
|
||||
) -> SelfEditResult:
|
||||
"""Execute a self-edit task.
|
||||
|
||||
This is the main entry point for self-modification.
|
||||
|
||||
Args:
|
||||
task_description: What to do (e.g., "Add error handling")
|
||||
context: Optional additional context
|
||||
|
||||
Returns:
|
||||
SelfEditResult with success/failure details
|
||||
"""
|
||||
logger.info("Starting self-edit: %s", task_description[:50])
|
||||
|
||||
try:
|
||||
# Step 1: Pre-flight checks
|
||||
if not await self._preflight_checks():
|
||||
return SelfEditResult(
|
||||
success=False,
|
||||
message="Pre-flight checks failed. See logs for details.",
|
||||
)
|
||||
|
||||
# Step 2: Gather context
|
||||
await self._ensure_indexed()
|
||||
relevant_files = await self._get_relevant_files(task_description)
|
||||
similar_attempts = await self._get_similar_attempts(task_description)
|
||||
|
||||
# Step 3: Create feature branch
|
||||
branch_name = f"timmy/self-edit/{datetime.now().strftime('%Y%m%d-%H%M%S')}"
|
||||
await self.git.create_branch(branch_name)
|
||||
logger.info("Created branch: %s", branch_name)
|
||||
|
||||
# Step 4: Take snapshot for rollback
|
||||
snapshot = await self.git.snapshot(run_tests=False)
|
||||
|
||||
# Step 5: Plan the edit
|
||||
plan = await self._plan_edit(
|
||||
task_description,
|
||||
relevant_files,
|
||||
similar_attempts,
|
||||
)
|
||||
|
||||
# Validate plan against safety constraints
|
||||
if not self._validate_plan(plan):
|
||||
return SelfEditResult(
|
||||
success=False,
|
||||
message=f"Plan violates safety constraints: {plan.files_to_modify}",
|
||||
)
|
||||
|
||||
# Step 6: Execute the edit
|
||||
execution_result = await self._execute_edit(plan, task_description)
|
||||
|
||||
if not execution_result["success"]:
|
||||
# Attempt retries
|
||||
for retry in range(MAX_RETRIES):
|
||||
logger.info("Retry %d/%d", retry + 1, MAX_RETRIES)
|
||||
|
||||
# Rollback to clean state
|
||||
await self.git.rollback(snapshot)
|
||||
|
||||
# Try again with adjusted approach
|
||||
execution_result = await self._execute_edit(
|
||||
plan,
|
||||
task_description,
|
||||
retry_count=retry + 1,
|
||||
)
|
||||
|
||||
if execution_result["success"]:
|
||||
break
|
||||
|
||||
if not execution_result["success"]:
|
||||
# Final rollback and log failure
|
||||
await self.git.rollback(snapshot)
|
||||
await self.git._run_git("checkout", "main") # Return to main
|
||||
|
||||
attempt_id = await self._log_failure(
|
||||
task_description,
|
||||
plan,
|
||||
execution_result["test_output"],
|
||||
execution_result.get("error", "Unknown error"),
|
||||
)
|
||||
|
||||
return SelfEditResult(
|
||||
success=False,
|
||||
message=f"Failed after {MAX_RETRIES} retries",
|
||||
attempt_id=attempt_id,
|
||||
test_results=execution_result.get("test_output", ""),
|
||||
)
|
||||
|
||||
# Step 7: Commit and merge
|
||||
commit_hash = await self.git.commit(
|
||||
message=f"Self-edit: {task_description[:50]}",
|
||||
files=plan.files_to_modify + plan.files_to_create + plan.tests_to_add,
|
||||
)
|
||||
|
||||
# Merge to main (tests already passed in execution)
|
||||
await self.git.merge_to_main(branch_name, require_tests=False)
|
||||
|
||||
# Step 8: Log success
|
||||
diff = await self.git.get_diff(snapshot.commit_hash, commit_hash)
|
||||
attempt_id = await self._log_success(
|
||||
task_description,
|
||||
plan,
|
||||
commit_hash,
|
||||
execution_result.get("test_output", ""),
|
||||
diff,
|
||||
)
|
||||
|
||||
return SelfEditResult(
|
||||
success=True,
|
||||
message=f"Successfully modified {len(plan.files_to_modify)} files",
|
||||
attempt_id=attempt_id,
|
||||
files_modified=plan.files_to_modify,
|
||||
commit_hash=commit_hash,
|
||||
test_results=execution_result.get("test_output", ""),
|
||||
diff=diff,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Self-edit failed with exception")
|
||||
return SelfEditResult(
|
||||
success=False,
|
||||
message=f"Exception: {str(e)}",
|
||||
)
|
||||
|
||||
async def _preflight_checks(self) -> bool:
|
||||
"""Run pre-flight safety checks.
|
||||
|
||||
Returns:
|
||||
True if all checks pass
|
||||
"""
|
||||
# Check if repo is clean
|
||||
if not await self.git.is_clean():
|
||||
logger.error("Pre-flight failed: Working directory not clean")
|
||||
return False
|
||||
|
||||
# Check if we're on main
|
||||
current_branch = await self.git.get_current_branch()
|
||||
if current_branch != self.git.main_branch:
|
||||
logger.error("Pre-flight failed: Not on %s branch (on %s)",
|
||||
self.git.main_branch, current_branch)
|
||||
return False
|
||||
|
||||
# Check if self-modification is enabled
|
||||
if not getattr(settings, 'self_modify_enabled', True):
|
||||
logger.error("Pre-flight failed: Self-modification disabled in config")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
async def _get_relevant_files(self, task_description: str) -> list[str]:
|
||||
"""Get files relevant to the task.
|
||||
|
||||
Args:
|
||||
task_description: Task to find relevant files for
|
||||
|
||||
Returns:
|
||||
List of file paths
|
||||
"""
|
||||
files = await self.indexer.get_relevant_files(task_description, limit=10)
|
||||
|
||||
# Filter to only files with test coverage
|
||||
files_with_tests = [
|
||||
f for f in files
|
||||
if await self.indexer.has_test_coverage(f)
|
||||
]
|
||||
|
||||
logger.info("Found %d relevant files (%d with tests)",
|
||||
len(files), len(files_with_tests))
|
||||
|
||||
return files_with_tests[:MAX_FILES_PER_COMMIT]
|
||||
|
||||
async def _get_similar_attempts(
|
||||
self,
|
||||
task_description: str,
|
||||
) -> list[ModificationAttempt]:
|
||||
"""Get similar past modification attempts.
|
||||
|
||||
Args:
|
||||
task_description: Task to find similar attempts for
|
||||
|
||||
Returns:
|
||||
List of similar attempts
|
||||
"""
|
||||
similar = await self.journal.find_similar(task_description, limit=5)
|
||||
logger.info("Found %d similar past attempts", len(similar))
|
||||
return similar
|
||||
|
||||
async def _plan_edit(
|
||||
self,
|
||||
task_description: str,
|
||||
relevant_files: list[str],
|
||||
similar_attempts: list[ModificationAttempt],
|
||||
) -> EditPlan:
|
||||
"""Plan the edit using LLM.
|
||||
|
||||
Args:
|
||||
task_description: What to do
|
||||
relevant_files: Files that might need modification
|
||||
similar_attempts: Similar past attempts for context
|
||||
|
||||
Returns:
|
||||
EditPlan with approach and file list
|
||||
"""
|
||||
if not self.llm_adapter:
|
||||
# Fallback: simple plan
|
||||
return EditPlan(
|
||||
approach=f"Edit files to implement: {task_description}",
|
||||
files_to_modify=relevant_files[:MAX_FILES_PER_COMMIT],
|
||||
files_to_create=[],
|
||||
tests_to_add=[],
|
||||
explanation="No LLM available, using heuristic plan",
|
||||
)
|
||||
|
||||
# Build prompt with context
|
||||
codebase_summary = await self.indexer.get_summary(max_tokens=2000)
|
||||
|
||||
similar_context = ""
|
||||
if similar_attempts:
|
||||
similar_context = "\n\nSimilar past attempts:\n"
|
||||
for attempt in similar_attempts:
|
||||
similar_context += f"- {attempt.task_description} ({attempt.outcome.value})\n"
|
||||
if attempt.reflection:
|
||||
similar_context += f" Lesson: {attempt.reflection[:100]}...\n"
|
||||
|
||||
prompt = f"""You are planning a code modification for a Python project.
|
||||
|
||||
Task: {task_description}
|
||||
|
||||
Codebase Summary:
|
||||
{codebase_summary}
|
||||
|
||||
Potentially relevant files (all have test coverage):
|
||||
{chr(10).join(f"- {f}" for f in relevant_files)}
|
||||
{similar_context}
|
||||
|
||||
Create a plan for implementing this task. You can modify at most {MAX_FILES_PER_COMMIT} files.
|
||||
|
||||
Respond in this format:
|
||||
APPROACH: <brief description of the approach>
|
||||
FILES_TO_MODIFY: <comma-separated list of file paths>
|
||||
FILES_TO_CREATE: <comma-separated list of new file paths (if any)>
|
||||
TESTS_TO_ADD: <comma-separated list of test files to add/modify>
|
||||
EXPLANATION: <brief explanation of why this approach>
|
||||
"""
|
||||
|
||||
try:
|
||||
response = await self.llm_adapter.chat(message=prompt)
|
||||
content = response.content
|
||||
|
||||
# Parse response
|
||||
approach = self._extract_field(content, "APPROACH")
|
||||
files_to_modify = self._parse_list(self._extract_field(content, "FILES_TO_MODIFY"))
|
||||
files_to_create = self._parse_list(self._extract_field(content, "FILES_TO_CREATE"))
|
||||
tests_to_add = self._parse_list(self._extract_field(content, "TESTS_TO_ADD"))
|
||||
explanation = self._extract_field(content, "EXPLANATION")
|
||||
|
||||
return EditPlan(
|
||||
approach=approach or "No approach specified",
|
||||
files_to_modify=files_to_modify[:MAX_FILES_PER_COMMIT],
|
||||
files_to_create=files_to_create,
|
||||
tests_to_add=tests_to_add,
|
||||
explanation=explanation or "No explanation provided",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("LLM planning failed: %s", e)
|
||||
return EditPlan(
|
||||
approach=f"Fallback: Modify relevant files for {task_description}",
|
||||
files_to_modify=relevant_files[:MAX_FILES_PER_COMMIT],
|
||||
files_to_create=[],
|
||||
tests_to_add=[],
|
||||
explanation=f"LLM failed, using fallback: {e}",
|
||||
)
|
||||
|
||||
def _extract_field(self, content: str, field_name: str) -> str:
|
||||
"""Extract a field from LLM response."""
|
||||
for line in content.split("\n"):
|
||||
if line.startswith(f"{field_name}:"):
|
||||
return line.split(":", 1)[1].strip()
|
||||
return ""
|
||||
|
||||
def _parse_list(self, text: str) -> list[str]:
|
||||
"""Parse comma-separated list."""
|
||||
if not text or text.lower() in ("none", "n/a", ""):
|
||||
return []
|
||||
return [item.strip() for item in text.split(",") if item.strip()]
|
||||
|
||||
def _validate_plan(self, plan: EditPlan) -> bool:
|
||||
"""Validate plan against safety constraints.
|
||||
|
||||
Args:
|
||||
plan: EditPlan to validate
|
||||
|
||||
Returns:
|
||||
True if plan is valid
|
||||
"""
|
||||
# Check file count
|
||||
if len(plan.files_to_modify) > MAX_FILES_PER_COMMIT:
|
||||
logger.error("Plan modifies too many files: %d > %d",
|
||||
len(plan.files_to_modify), MAX_FILES_PER_COMMIT)
|
||||
return False
|
||||
|
||||
# Check for protected files
|
||||
for file_path in plan.files_to_modify:
|
||||
if file_path in PROTECTED_FILES:
|
||||
logger.error("Plan tries to modify protected file: %s", file_path)
|
||||
return False
|
||||
|
||||
# Check all files have test coverage
|
||||
for file_path in plan.files_to_modify:
|
||||
# This is async, so we check in _get_relevant_files
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
async def _execute_edit(
|
||||
self,
|
||||
plan: EditPlan,
|
||||
task_description: str,
|
||||
retry_count: int = 0,
|
||||
) -> dict:
|
||||
"""Execute the edit using Aider or direct editing.
|
||||
|
||||
Args:
|
||||
plan: EditPlan to execute
|
||||
task_description: Original task description
|
||||
retry_count: Current retry attempt
|
||||
|
||||
Returns:
|
||||
Dict with success, test_output, error
|
||||
"""
|
||||
all_files = plan.files_to_modify + plan.files_to_create
|
||||
|
||||
if not all_files:
|
||||
return {"success": False, "error": "No files to modify"}
|
||||
|
||||
# Try Aider first
|
||||
if await self._aider_available():
|
||||
return await self._execute_with_aider(plan, task_description, all_files)
|
||||
else:
|
||||
# Fallback to direct editing
|
||||
return await self._execute_direct_edit(plan, task_description)
|
||||
|
||||
async def _aider_available(self) -> bool:
|
||||
"""Check if Aider is available."""
|
||||
try:
|
||||
result = await asyncio.create_subprocess_exec(
|
||||
"aider", "--version",
|
||||
stdout=asyncio.subprocess.DEVNULL,
|
||||
stderr=asyncio.subprocess.DEVNULL,
|
||||
)
|
||||
await result.wait()
|
||||
return result.returncode == 0
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
|
||||
async def _execute_with_aider(
|
||||
self,
|
||||
plan: EditPlan,
|
||||
task_description: str,
|
||||
files: list[str],
|
||||
) -> dict:
|
||||
"""Execute edit using Aider.
|
||||
|
||||
Args:
|
||||
plan: EditPlan
|
||||
task_description: Task description
|
||||
files: Files to edit
|
||||
|
||||
Returns:
|
||||
Dict with success, test_output
|
||||
"""
|
||||
cmd = [
|
||||
"aider",
|
||||
"--model", "ollama_chat/qwen2.5-coder:14b-instruct",
|
||||
"--auto-test",
|
||||
"--test-cmd", "python -m pytest tests/ -xvs",
|
||||
"--yes",
|
||||
"--no-git",
|
||||
"--message", f"{task_description}\n\nApproach: {plan.approach}",
|
||||
] + files
|
||||
|
||||
logger.info("Running Aider: %s", " ".join(cmd))
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.STDOUT,
|
||||
cwd=self.repo_path,
|
||||
)
|
||||
|
||||
stdout, _ = await asyncio.wait_for(
|
||||
proc.communicate(),
|
||||
timeout=300.0,
|
||||
)
|
||||
|
||||
output = stdout.decode() if stdout else ""
|
||||
|
||||
# Check if tests passed
|
||||
success = proc.returncode == 0 and "passed" in output.lower()
|
||||
|
||||
return {
|
||||
"success": success,
|
||||
"test_output": output,
|
||||
}
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.error("Aider timed out after 300s")
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Timeout",
|
||||
"test_output": "Aider timed out after 300s",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error("Aider execution failed: %s", e)
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"test_output": "",
|
||||
}
|
||||
|
||||
async def _execute_direct_edit(
|
||||
self,
|
||||
plan: EditPlan,
|
||||
task_description: str,
|
||||
) -> dict:
|
||||
"""Execute edit via direct file modification (fallback).
|
||||
|
||||
Args:
|
||||
plan: EditPlan
|
||||
task_description: Task description
|
||||
|
||||
Returns:
|
||||
Dict with success, test_output
|
||||
"""
|
||||
if not self.llm_adapter:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "No LLM adapter for direct editing",
|
||||
}
|
||||
|
||||
# Edit each file
|
||||
for file_path in plan.files_to_modify:
|
||||
full_path = self.repo_path / file_path
|
||||
|
||||
if not full_path.exists():
|
||||
logger.error("File does not exist: %s", file_path)
|
||||
continue
|
||||
|
||||
try:
|
||||
content = full_path.read_text()
|
||||
|
||||
# Build edit prompt
|
||||
edit_prompt = f"""Edit this Python file to implement the task.
|
||||
|
||||
Task: {task_description}
|
||||
Approach: {plan.approach}
|
||||
|
||||
Current file content:
|
||||
```python
|
||||
{content}
|
||||
```
|
||||
|
||||
Provide the complete new file content. Only return the code, no explanation.
|
||||
"""
|
||||
|
||||
response = await self.llm_adapter.chat(message=edit_prompt)
|
||||
new_content = response.content
|
||||
|
||||
# Strip code fences if present
|
||||
new_content = self._strip_code_fences(new_content)
|
||||
|
||||
# Validate with AST
|
||||
try:
|
||||
ast.parse(new_content)
|
||||
except SyntaxError as e:
|
||||
logger.error("Generated code has syntax error: %s", e)
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Syntax error in generated code: {e}",
|
||||
}
|
||||
|
||||
# Write file
|
||||
full_path.write_text(new_content)
|
||||
logger.info("Modified: %s", file_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to edit %s: %s", file_path, e)
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Failed to edit {file_path}: {e}",
|
||||
}
|
||||
|
||||
# Run tests
|
||||
return await self._run_tests()
|
||||
|
||||
def _strip_code_fences(self, content: str) -> str:
|
||||
"""Strip markdown code fences from content."""
|
||||
lines = content.split("\n")
|
||||
|
||||
# Remove opening fence
|
||||
if lines and lines[0].startswith("```"):
|
||||
lines = lines[1:]
|
||||
|
||||
# Remove closing fence
|
||||
if lines and lines[-1].startswith("```"):
|
||||
lines = lines[:-1]
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _run_tests(self) -> dict:
|
||||
"""Run tests and return results.
|
||||
|
||||
Returns:
|
||||
Dict with success, test_output
|
||||
"""
|
||||
cmd = ["python", "-m", "pytest", "tests/", "-x", "--tb=short"]
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.STDOUT,
|
||||
cwd=self.repo_path,
|
||||
)
|
||||
|
||||
stdout, _ = await asyncio.wait_for(
|
||||
proc.communicate(),
|
||||
timeout=120.0,
|
||||
)
|
||||
|
||||
output = stdout.decode() if stdout else ""
|
||||
|
||||
return {
|
||||
"success": proc.returncode == 0,
|
||||
"test_output": output,
|
||||
}
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "Tests timed out",
|
||||
"test_output": "Timeout after 120s",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"test_output": "",
|
||||
}
|
||||
|
||||
async def _log_success(
|
||||
self,
|
||||
task_description: str,
|
||||
plan: EditPlan,
|
||||
commit_hash: str,
|
||||
test_results: str,
|
||||
diff: str,
|
||||
) -> int:
|
||||
"""Log successful attempt.
|
||||
|
||||
Returns:
|
||||
Attempt ID
|
||||
"""
|
||||
attempt = ModificationAttempt(
|
||||
task_description=task_description,
|
||||
approach=plan.approach,
|
||||
files_modified=plan.files_to_modify + plan.files_to_create,
|
||||
diff=diff[:5000], # Truncate for storage
|
||||
test_results=test_results,
|
||||
outcome=Outcome.SUCCESS,
|
||||
)
|
||||
|
||||
attempt_id = await self.journal.log_attempt(attempt)
|
||||
|
||||
# Generate and store reflection
|
||||
reflection_text = await self.reflection.reflect_on_attempt(attempt)
|
||||
await self.journal.update_reflection(attempt_id, reflection_text)
|
||||
|
||||
return attempt_id
|
||||
|
||||
async def _log_failure(
|
||||
self,
|
||||
task_description: str,
|
||||
plan: EditPlan,
|
||||
test_results: str,
|
||||
error: str,
|
||||
) -> int:
|
||||
"""Log failed attempt.
|
||||
|
||||
Returns:
|
||||
Attempt ID
|
||||
"""
|
||||
attempt = ModificationAttempt(
|
||||
task_description=task_description,
|
||||
approach=plan.approach,
|
||||
files_modified=plan.files_to_modify,
|
||||
test_results=test_results,
|
||||
outcome=Outcome.FAILURE,
|
||||
failure_analysis=error,
|
||||
retry_count=MAX_RETRIES,
|
||||
)
|
||||
|
||||
attempt_id = await self.journal.log_attempt(attempt)
|
||||
|
||||
# Generate reflection even for failures
|
||||
reflection_text = await self.reflection.reflect_on_attempt(attempt)
|
||||
await self.journal.update_reflection(attempt_id, reflection_text)
|
||||
|
||||
return attempt_id
|
||||
|
||||
|
||||
# MCP Tool Schema
|
||||
self_edit_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_description": {
|
||||
"type": "string",
|
||||
"description": "Description of the code modification to make",
|
||||
},
|
||||
"context": {
|
||||
"type": "object",
|
||||
"description": "Optional additional context for the modification",
|
||||
},
|
||||
},
|
||||
"required": ["task_description"],
|
||||
}
|
||||
|
||||
|
||||
# Global tool instance (singleton pattern)
|
||||
_self_edit_tool: Optional[SelfEditTool] = None
|
||||
|
||||
|
||||
async def self_edit_tool(task_description: str, context: Optional[dict] = None) -> dict:
|
||||
"""MCP tool entry point for self-edit.
|
||||
|
||||
Args:
|
||||
task_description: What to modify
|
||||
context: Optional context
|
||||
|
||||
Returns:
|
||||
Dict with result
|
||||
"""
|
||||
global _self_edit_tool
|
||||
|
||||
if _self_edit_tool is None:
|
||||
_self_edit_tool = SelfEditTool()
|
||||
|
||||
result = await _self_edit_tool.execute(task_description, context)
|
||||
|
||||
return {
|
||||
"success": result.success,
|
||||
"message": result.message,
|
||||
"attempt_id": result.attempt_id,
|
||||
"files_modified": result.files_modified,
|
||||
"commit_hash": result.commit_hash,
|
||||
"test_results": result.test_results,
|
||||
}
|
||||
|
||||
|
||||
def register_self_edit_tool(registry: Any, llm_adapter: Optional[object] = None) -> None:
|
||||
"""Register the self-edit tool with MCP registry.
|
||||
|
||||
Args:
|
||||
registry: MCP ToolRegistry
|
||||
llm_adapter: Optional LLM adapter
|
||||
"""
|
||||
global _self_edit_tool
|
||||
_self_edit_tool = SelfEditTool(llm_adapter=llm_adapter)
|
||||
|
||||
registry.register(
|
||||
name="self_edit",
|
||||
schema=self_edit_schema,
|
||||
handler=self_edit_tool,
|
||||
category="self_coding",
|
||||
requires_confirmation=True, # Safety: require user approval
|
||||
tags=["self-modification", "code-generation"],
|
||||
source_module="tools.self_edit",
|
||||
)
|
||||
|
||||
logger.info("Self-edit tool registered with MCP")
|
||||
143
tests/test_self_coding_dashboard.py
Normal file
143
tests/test_self_coding_dashboard.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""Tests for Self-Coding Dashboard Routes.
|
||||
|
||||
Tests API endpoints and HTMX views.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
"""Create test client."""
|
||||
from dashboard.app import app
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
class TestSelfCodingPageRoutes:
|
||||
"""Page route tests."""
|
||||
|
||||
def test_main_page_loads(self, client):
|
||||
"""Main self-coding page should load."""
|
||||
response = client.get("/self-coding")
|
||||
assert response.status_code == 200
|
||||
assert "Self-Coding" in response.text
|
||||
|
||||
def test_journal_partial(self, client):
|
||||
"""Journal partial should return HTML."""
|
||||
response = client.get("/self-coding/journal")
|
||||
assert response.status_code == 200
|
||||
# Should contain journal list or empty message
|
||||
assert "journal" in response.text.lower() or "no entries" in response.text.lower()
|
||||
|
||||
def test_stats_partial(self, client):
|
||||
"""Stats partial should return HTML."""
|
||||
response = client.get("/self-coding/stats")
|
||||
assert response.status_code == 200
|
||||
# Should contain stats cards
|
||||
assert "Total Attempts" in response.text or "success rate" in response.text.lower()
|
||||
|
||||
def test_execute_form_partial(self, client):
|
||||
"""Execute form partial should return HTML."""
|
||||
response = client.get("/self-coding/execute-form")
|
||||
assert response.status_code == 200
|
||||
assert "Task Description" in response.text
|
||||
assert "textarea" in response.text
|
||||
|
||||
|
||||
class TestSelfCodingAPIRoutes:
|
||||
"""API route tests."""
|
||||
|
||||
def test_api_journal_list(self, client):
|
||||
"""API should return journal entries."""
|
||||
response = client.get("/self-coding/api/journal")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert isinstance(data, list)
|
||||
|
||||
def test_api_journal_list_with_limit(self, client):
|
||||
"""API should respect limit parameter."""
|
||||
response = client.get("/self-coding/api/journal?limit=5")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert isinstance(data, list)
|
||||
assert len(data) <= 5
|
||||
|
||||
def test_api_journal_detail_not_found(self, client):
|
||||
"""API should return 404 for non-existent entry."""
|
||||
response = client.get("/self-coding/api/journal/99999")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_api_stats(self, client):
|
||||
"""API should return stats."""
|
||||
response = client.get("/self-coding/api/stats")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "total_attempts" in data
|
||||
assert "success_rate" in data
|
||||
assert "recent_failures" in data
|
||||
|
||||
def test_api_codebase_summary(self, client):
|
||||
"""API should return codebase summary."""
|
||||
response = client.get("/self-coding/api/codebase/summary")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "summary" in data
|
||||
|
||||
def test_api_codebase_reindex(self, client):
|
||||
"""API should trigger reindex."""
|
||||
response = client.post("/self-coding/api/codebase/reindex")
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "indexed" in data
|
||||
assert "failed" in data
|
||||
assert "skipped" in data
|
||||
|
||||
|
||||
class TestSelfCodingExecuteEndpoint:
|
||||
"""Execute endpoint tests."""
|
||||
|
||||
def test_execute_api_endpoint(self, client):
|
||||
"""Execute API endpoint should accept task."""
|
||||
# Note: This will actually try to execute, which may fail
|
||||
# In production, this should be mocked or require auth
|
||||
response = client.post(
|
||||
"/self-coding/api/execute",
|
||||
json={"task_description": "Test task that will fail preflight"}
|
||||
)
|
||||
|
||||
# Should return response (success or failure)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert "success" in data
|
||||
assert "message" in data
|
||||
|
||||
def test_execute_htmx_endpoint(self, client):
|
||||
"""Execute HTMX endpoint should accept form data."""
|
||||
response = client.post(
|
||||
"/self-coding/execute",
|
||||
data={"task_description": "Test task that will fail preflight"}
|
||||
)
|
||||
|
||||
# Should return HTML response
|
||||
assert response.status_code == 200
|
||||
assert "text/html" in response.headers["content-type"]
|
||||
|
||||
|
||||
class TestSelfCodingNavigation:
|
||||
"""Navigation integration tests."""
|
||||
|
||||
def test_nav_link_in_header(self, client):
|
||||
"""Self-coding link should be in header."""
|
||||
response = client.get("/")
|
||||
assert response.status_code == 200
|
||||
assert "/self-coding" in response.text
|
||||
assert "SELF-CODING" in response.text
|
||||
398
tests/test_self_edit_tool.py
Normal file
398
tests/test_self_edit_tool.py
Normal file
@@ -0,0 +1,398 @@
|
||||
"""Tests for Self-Edit MCP Tool.
|
||||
|
||||
Tests the complete self-edit workflow with mocked dependencies.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.self_edit import (
|
||||
MAX_FILES_PER_COMMIT,
|
||||
MAX_RETRIES,
|
||||
PROTECTED_FILES,
|
||||
EditPlan,
|
||||
SelfEditResult,
|
||||
SelfEditTool,
|
||||
register_self_edit_tool,
|
||||
self_edit_tool,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_repo():
|
||||
"""Create a temporary git repository."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
repo_path = Path(tmpdir)
|
||||
|
||||
# Initialize git
|
||||
import subprocess
|
||||
subprocess.run(["git", "init"], cwd=repo_path, check=True, capture_output=True)
|
||||
subprocess.run(
|
||||
["git", "config", "user.email", "test@test.com"],
|
||||
cwd=repo_path, check=True, capture_output=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["git", "config", "user.name", "Test"],
|
||||
cwd=repo_path, check=True, capture_output=True,
|
||||
)
|
||||
|
||||
# Create src structure
|
||||
src_path = repo_path / "src" / "myproject"
|
||||
src_path.mkdir(parents=True)
|
||||
|
||||
(src_path / "__init__.py").write_text("")
|
||||
(src_path / "app.py").write_text('''
|
||||
"""Main application."""
|
||||
|
||||
def hello():
|
||||
return "Hello"
|
||||
''')
|
||||
|
||||
# Create tests
|
||||
tests_path = repo_path / "tests"
|
||||
tests_path.mkdir()
|
||||
(tests_path / "test_app.py").write_text('''
|
||||
"""Tests for app."""
|
||||
from myproject.app import hello
|
||||
|
||||
def test_hello():
|
||||
assert hello() == "Hello"
|
||||
''')
|
||||
|
||||
# Initial commit
|
||||
subprocess.run(["git", "add", "."], cwd=repo_path, check=True, capture_output=True)
|
||||
subprocess.run(
|
||||
["git", "commit", "-m", "Initial"],
|
||||
cwd=repo_path, check=True, capture_output=True,
|
||||
)
|
||||
subprocess.run(
|
||||
["git", "branch", "-M", "main"],
|
||||
cwd=repo_path, check=True, capture_output=True,
|
||||
)
|
||||
|
||||
yield repo_path
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_settings():
|
||||
"""Mock settings to enable self-modification."""
|
||||
with patch('tools.self_edit.settings') as mock_settings:
|
||||
mock_settings.self_modify_enabled = True
|
||||
yield mock_settings
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_llm():
|
||||
"""Create mock LLM adapter."""
|
||||
mock = AsyncMock()
|
||||
mock.chat.return_value = MagicMock(
|
||||
content="""APPROACH: Add error handling
|
||||
FILES_TO_MODIFY: src/myproject/app.py
|
||||
FILES_TO_CREATE:
|
||||
TESTS_TO_ADD: tests/test_app.py
|
||||
EXPLANATION: Wrap function in try/except"""
|
||||
)
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSelfEditToolBasics:
|
||||
"""Basic functionality tests."""
|
||||
|
||||
async def test_initialization(self, temp_repo):
|
||||
"""Should initialize with services."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
assert tool.repo_path == temp_repo
|
||||
assert tool.git is not None
|
||||
assert tool.indexer is not None
|
||||
assert tool.journal is not None
|
||||
assert tool.reflection is not None
|
||||
|
||||
async def test_preflight_checks_clean_repo(self, temp_repo):
|
||||
"""Should pass preflight on clean repo."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
assert await tool._preflight_checks() is True
|
||||
|
||||
async def test_preflight_checks_dirty_repo(self, temp_repo):
|
||||
"""Should fail preflight on dirty repo."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
# Make uncommitted change
|
||||
(temp_repo / "dirty.txt").write_text("dirty")
|
||||
|
||||
assert await tool._preflight_checks() is False
|
||||
|
||||
async def test_preflight_checks_wrong_branch(self, temp_repo):
|
||||
"""Should fail preflight when not on main."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
# Create and checkout feature branch
|
||||
import subprocess
|
||||
subprocess.run(
|
||||
["git", "checkout", "-b", "feature"],
|
||||
cwd=temp_repo, check=True, capture_output=True,
|
||||
)
|
||||
|
||||
assert await tool._preflight_checks() is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSelfEditToolPlanning:
|
||||
"""Edit planning tests."""
|
||||
|
||||
async def test_plan_edit_with_llm(self, temp_repo, mock_llm):
|
||||
"""Should generate plan using LLM."""
|
||||
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
|
||||
await tool._ensure_indexed()
|
||||
|
||||
plan = await tool._plan_edit(
|
||||
task_description="Add error handling",
|
||||
relevant_files=["src/myproject/app.py"],
|
||||
similar_attempts=[],
|
||||
)
|
||||
|
||||
assert isinstance(plan, EditPlan)
|
||||
assert plan.approach == "Add error handling"
|
||||
assert "src/myproject/app.py" in plan.files_to_modify
|
||||
|
||||
async def test_plan_edit_without_llm(self, temp_repo):
|
||||
"""Should generate fallback plan without LLM."""
|
||||
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=None)
|
||||
await tool._ensure_indexed()
|
||||
|
||||
plan = await tool._plan_edit(
|
||||
task_description="Add feature",
|
||||
relevant_files=["src/myproject/app.py"],
|
||||
similar_attempts=[],
|
||||
)
|
||||
|
||||
assert isinstance(plan, EditPlan)
|
||||
assert len(plan.files_to_modify) > 0
|
||||
|
||||
async def test_plan_respects_max_files(self, temp_repo, mock_llm):
|
||||
"""Plan should respect MAX_FILES_PER_COMMIT."""
|
||||
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
|
||||
await tool._ensure_indexed()
|
||||
|
||||
# Mock LLM to return many files
|
||||
mock_llm.chat.return_value = MagicMock(
|
||||
content="FILES_TO_MODIFY: " + ",".join([f"file{i}.py" for i in range(10)])
|
||||
)
|
||||
|
||||
plan = await tool._plan_edit(
|
||||
task_description="Test",
|
||||
relevant_files=[f"file{i}.py" for i in range(10)],
|
||||
similar_attempts=[],
|
||||
)
|
||||
|
||||
assert len(plan.files_to_modify) <= MAX_FILES_PER_COMMIT
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSelfEditToolValidation:
|
||||
"""Safety constraint validation tests."""
|
||||
|
||||
async def test_validate_plan_too_many_files(self, temp_repo):
|
||||
"""Should reject plan with too many files."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
plan = EditPlan(
|
||||
approach="Test",
|
||||
files_to_modify=[f"file{i}.py" for i in range(MAX_FILES_PER_COMMIT + 1)],
|
||||
files_to_create=[],
|
||||
tests_to_add=[],
|
||||
explanation="Test",
|
||||
)
|
||||
|
||||
assert tool._validate_plan(plan) is False
|
||||
|
||||
async def test_validate_plan_protected_file(self, temp_repo):
|
||||
"""Should reject plan modifying protected files."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
plan = EditPlan(
|
||||
approach="Test",
|
||||
files_to_modify=["src/tools/self_edit.py"],
|
||||
files_to_create=[],
|
||||
tests_to_add=[],
|
||||
explanation="Test",
|
||||
)
|
||||
|
||||
assert tool._validate_plan(plan) is False
|
||||
|
||||
async def test_validate_plan_valid(self, temp_repo):
|
||||
"""Should accept valid plan."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
plan = EditPlan(
|
||||
approach="Test",
|
||||
files_to_modify=["src/myproject/app.py"],
|
||||
files_to_create=[],
|
||||
tests_to_add=[],
|
||||
explanation="Test",
|
||||
)
|
||||
|
||||
assert tool._validate_plan(plan) is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSelfEditToolExecution:
|
||||
"""Edit execution tests."""
|
||||
|
||||
async def test_strip_code_fences(self, temp_repo):
|
||||
"""Should strip markdown code fences."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
content = "```python\ndef test(): pass\n```"
|
||||
result = tool._strip_code_fences(content)
|
||||
|
||||
assert "```" not in result
|
||||
assert "def test(): pass" in result
|
||||
|
||||
async def test_parse_list(self, temp_repo):
|
||||
"""Should parse comma-separated lists."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
assert tool._parse_list("a, b, c") == ["a", "b", "c"]
|
||||
assert tool._parse_list("none") == []
|
||||
assert tool._parse_list("") == []
|
||||
assert tool._parse_list("N/A") == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSelfEditToolIntegration:
|
||||
"""Integration tests with mocked dependencies."""
|
||||
|
||||
async def test_successful_edit_flow(self, temp_repo, mock_llm):
|
||||
"""Test complete successful edit flow."""
|
||||
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
|
||||
|
||||
# Mock Aider to succeed
|
||||
with patch.object(tool, '_aider_available', return_value=False):
|
||||
with patch.object(tool, '_execute_direct_edit') as mock_exec:
|
||||
mock_exec.return_value = {
|
||||
"success": True,
|
||||
"test_output": "1 passed",
|
||||
}
|
||||
|
||||
result = await tool.execute("Add error handling")
|
||||
|
||||
assert result.success is True
|
||||
assert result.attempt_id is not None
|
||||
|
||||
async def test_failed_edit_with_rollback(self, temp_repo, mock_llm):
|
||||
"""Test failed edit with rollback."""
|
||||
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
|
||||
|
||||
# Mock execution to always fail
|
||||
with patch.object(tool, '_execute_edit') as mock_exec:
|
||||
mock_exec.return_value = {
|
||||
"success": False,
|
||||
"error": "Tests failed",
|
||||
"test_output": "1 failed",
|
||||
}
|
||||
|
||||
result = await tool.execute("Add broken feature")
|
||||
|
||||
assert result.success is False
|
||||
assert result.attempt_id is not None
|
||||
assert "failed" in result.message.lower() or "retry" in result.message.lower()
|
||||
|
||||
async def test_preflight_failure(self, temp_repo):
|
||||
"""Should fail early if preflight checks fail."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
# Make repo dirty
|
||||
(temp_repo / "dirty.txt").write_text("dirty")
|
||||
|
||||
result = await tool.execute("Some task")
|
||||
|
||||
assert result.success is False
|
||||
assert "pre-flight" in result.message.lower()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSelfEditMCPRegistration:
|
||||
"""MCP tool registration tests."""
|
||||
|
||||
async def test_register_self_edit_tool(self):
|
||||
"""Should register with MCP registry."""
|
||||
mock_registry = MagicMock()
|
||||
mock_llm = AsyncMock()
|
||||
|
||||
register_self_edit_tool(mock_registry, mock_llm)
|
||||
|
||||
mock_registry.register.assert_called_once()
|
||||
call_args = mock_registry.register.call_args
|
||||
|
||||
assert call_args.kwargs["name"] == "self_edit"
|
||||
assert call_args.kwargs["requires_confirmation"] is True
|
||||
assert "self_coding" in call_args.kwargs["category"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSelfEditGlobalTool:
|
||||
"""Global tool instance tests."""
|
||||
|
||||
async def test_self_edit_tool_singleton(self, temp_repo):
|
||||
"""Should use singleton pattern."""
|
||||
from tools import self_edit as self_edit_module
|
||||
|
||||
# Reset singleton
|
||||
self_edit_module._self_edit_tool = None
|
||||
|
||||
# First call should initialize
|
||||
with patch.object(SelfEditTool, '__init__', return_value=None) as mock_init:
|
||||
mock_init.return_value = None
|
||||
|
||||
with patch.object(SelfEditTool, 'execute') as mock_execute:
|
||||
mock_execute.return_value = SelfEditResult(
|
||||
success=True,
|
||||
message="Test",
|
||||
)
|
||||
|
||||
await self_edit_tool("Test task")
|
||||
|
||||
mock_init.assert_called_once()
|
||||
mock_execute.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSelfEditErrorHandling:
|
||||
"""Error handling tests."""
|
||||
|
||||
async def test_exception_handling(self, temp_repo):
|
||||
"""Should handle exceptions gracefully."""
|
||||
tool = SelfEditTool(repo_path=temp_repo)
|
||||
|
||||
# Mock preflight to raise exception
|
||||
with patch.object(tool, '_preflight_checks', side_effect=Exception("Unexpected")):
|
||||
result = await tool.execute("Test task")
|
||||
|
||||
assert result.success is False
|
||||
assert "exception" in result.message.lower()
|
||||
|
||||
async def test_llm_failure_fallback(self, temp_repo, mock_llm):
|
||||
"""Should fallback when LLM fails."""
|
||||
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
|
||||
await tool._ensure_indexed()
|
||||
|
||||
# Mock LLM to fail
|
||||
mock_llm.chat.side_effect = Exception("LLM timeout")
|
||||
|
||||
plan = await tool._plan_edit(
|
||||
task_description="Test",
|
||||
relevant_files=["src/app.py"],
|
||||
similar_attempts=[],
|
||||
)
|
||||
|
||||
# Should return fallback plan
|
||||
assert isinstance(plan, EditPlan)
|
||||
assert len(plan.files_to_modify) > 0
|
||||
Reference in New Issue
Block a user