1
0

feat: task queue system with startup drain and backlogging (#76)

* feat: add task queue system for Timmy - all work goes through the queue

- Add queue position tracking to task_queue models with task_type field
- Add TaskProcessor class that consumes tasks from queue one at a time
- Modify chat route to queue all messages for async processing
- Chat responses get 'high' priority to jump ahead of thought tasks
- Add queue status API endpoints for position polling
- Update UI to show queue position (x/y) and current task banner
- Replace thinking loop with task-based approach - thoughts are queued tasks
- Push responses to user via WebSocket instead of immediate HTTP response
- Add database migrations for existing tables

* feat: Timmy drains task queue on startup, backlogs unhandleable tasks

On spin-up, Timmy now iterates through all pending/approved tasks
immediately instead of waiting for the polling loop. Tasks without a
registered handler or with permanent errors are moved to a new
BACKLOGGED status with a reason, keeping the queue clear for work
Timmy can actually do.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alexander Whitestone
2026-02-27 01:52:42 -05:00
committed by GitHub
parent 849b5b1a8d
commit 5b6d33e05a
12 changed files with 1286 additions and 120 deletions

View File

@@ -32,6 +32,7 @@ from swarm.task_queue.models import (
get_counts_by_status,
get_pending_count,
get_task,
list_backlogged_tasks,
list_tasks,
update_task,
update_task_status,
@@ -45,6 +46,7 @@ templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templa
# ── Helper to broadcast task events via WebSocket ────────────────────────
def _broadcast_task_event(event_type: str, task: QueueTask):
"""Best-effort broadcast a task event to connected WebSocket clients."""
try:
@@ -74,25 +76,30 @@ def _broadcast_task_event(event_type: str, task: QueueTask):
# ── Dashboard page ───────────────────────────────────────────────────────
@router.get("/tasks", response_class=HTMLResponse)
async def task_queue_page(request: Request, assign: Optional[str] = None):
"""Task queue dashboard with three columns."""
pending = list_tasks(status=TaskStatus.PENDING_APPROVAL) + \
list_tasks(status=TaskStatus.APPROVED)
active = list_tasks(status=TaskStatus.RUNNING) + \
list_tasks(status=TaskStatus.PAUSED)
completed = list_tasks(status=TaskStatus.COMPLETED, limit=20) + \
list_tasks(status=TaskStatus.VETOED, limit=10) + \
list_tasks(status=TaskStatus.FAILED, limit=10)
pending = list_tasks(status=TaskStatus.PENDING_APPROVAL) + list_tasks(
status=TaskStatus.APPROVED
)
active = list_tasks(status=TaskStatus.RUNNING) + list_tasks(
status=TaskStatus.PAUSED
)
backlogged = list_backlogged_tasks(limit=20)
completed = (
list_tasks(status=TaskStatus.COMPLETED, limit=20)
+ list_tasks(status=TaskStatus.VETOED, limit=10)
+ list_tasks(status=TaskStatus.FAILED, limit=10)
+ backlogged
)
# Get agents for the create modal
agents = []
try:
from swarm.coordinator import coordinator
agents = [
{"id": a.id, "name": a.name}
for a in coordinator.list_swarm_agents()
]
agents = [{"id": a.id, "name": a.name} for a in coordinator.list_swarm_agents()]
except Exception:
pass
# Always include core agents
@@ -120,11 +127,13 @@ async def task_queue_page(request: Request, assign: Optional[str] = None):
# ── HTMX partials ───────────────────────────────────────────────────────
@router.get("/tasks/pending", response_class=HTMLResponse)
async def tasks_pending_partial(request: Request):
"""HTMX partial: pending approval tasks."""
pending = list_tasks(status=TaskStatus.PENDING_APPROVAL) + \
list_tasks(status=TaskStatus.APPROVED)
pending = list_tasks(status=TaskStatus.PENDING_APPROVAL) + list_tasks(
status=TaskStatus.APPROVED
)
return templates.TemplateResponse(
request,
"partials/task_cards.html",
@@ -135,8 +144,9 @@ async def tasks_pending_partial(request: Request):
@router.get("/tasks/active", response_class=HTMLResponse)
async def tasks_active_partial(request: Request):
"""HTMX partial: active tasks."""
active = list_tasks(status=TaskStatus.RUNNING) + \
list_tasks(status=TaskStatus.PAUSED)
active = list_tasks(status=TaskStatus.RUNNING) + list_tasks(
status=TaskStatus.PAUSED
)
return templates.TemplateResponse(
request,
"partials/task_cards.html",
@@ -147,9 +157,12 @@ async def tasks_active_partial(request: Request):
@router.get("/tasks/completed", response_class=HTMLResponse)
async def tasks_completed_partial(request: Request):
"""HTMX partial: completed tasks."""
completed = list_tasks(status=TaskStatus.COMPLETED, limit=20) + \
list_tasks(status=TaskStatus.VETOED, limit=10) + \
list_tasks(status=TaskStatus.FAILED, limit=10)
completed = (
list_tasks(status=TaskStatus.COMPLETED, limit=20)
+ list_tasks(status=TaskStatus.VETOED, limit=10)
+ list_tasks(status=TaskStatus.FAILED, limit=10)
+ list_backlogged_tasks(limit=20)
)
return templates.TemplateResponse(
request,
"partials/task_cards.html",
@@ -159,6 +172,7 @@ async def tasks_completed_partial(request: Request):
# ── JSON API ─────────────────────────────────────────────────────────────
@router.get("/api/tasks", response_class=JSONResponse)
async def api_list_tasks(
status: Optional[str] = None,
@@ -242,10 +256,24 @@ async def api_task_counts():
"completed": counts.get("completed", 0),
"failed": counts.get("failed", 0),
"vetoed": counts.get("vetoed", 0),
"backlogged": counts.get("backlogged", 0),
"total": sum(counts.values()),
}
# ── Backlog API (must be before {task_id} catch-all) ─────────────────────
@router.get("/api/tasks/backlog", response_class=JSONResponse)
async def api_list_backlogged(assigned_to: Optional[str] = None, limit: int = 50):
"""List all backlogged tasks."""
tasks = list_backlogged_tasks(assigned_to=assigned_to, limit=limit)
return {
"tasks": [_task_to_dict(t) for t in tasks],
"count": len(tasks),
}
@router.get("/api/tasks/{task_id}", response_class=JSONResponse)
async def api_get_task(task_id: str):
"""Get a single task by ID."""
@@ -257,6 +285,7 @@ async def api_get_task(task_id: str):
# ── Workflow actions ─────────────────────────────────────────────────────
@router.patch("/api/tasks/{task_id}/approve", response_class=JSONResponse)
async def api_approve_task(task_id: str):
"""Approve a pending task."""
@@ -436,10 +465,101 @@ async def htmx_retry_task(request: Request, task_id: str):
)
@router.patch("/api/tasks/{task_id}/unbacklog", response_class=JSONResponse)
async def api_unbacklog_task(task_id: str):
"""Move a backlogged task back to approved for re-processing."""
task = get_task(task_id)
if not task:
raise HTTPException(404, "Task not found")
if task.status != TaskStatus.BACKLOGGED:
raise HTTPException(400, "Can only unbacklog backlogged tasks")
updated = update_task_status(
task_id, TaskStatus.APPROVED, result=None, backlog_reason=None
)
_broadcast_task_event("task_unbacklogged", updated)
return {"success": True, "task": _task_to_dict(updated)}
@router.post("/tasks/{task_id}/unbacklog", response_class=HTMLResponse)
async def htmx_unbacklog_task(request: Request, task_id: str):
"""Move a backlogged task back to approved (HTMX)."""
task = get_task(task_id)
if not task:
raise HTTPException(404, "Task not found")
updated = update_task_status(
task_id, TaskStatus.APPROVED, result=None, backlog_reason=None
)
_broadcast_task_event("task_unbacklogged", updated)
return templates.TemplateResponse(
request, "partials/task_card.html", {"task": updated}
)
# ── Queue Status API ─────────────────────────────────────────────────────
@router.get("/api/queue/status", response_class=JSONResponse)
async def api_queue_status(assigned_to: str = "timmy"):
"""Get queue status for an agent - position info for polling."""
from swarm.task_queue.models import (
get_current_task_for_agent,
get_queue_position_ahead,
get_next_pending_task,
)
current = get_current_task_for_agent(assigned_to)
next_task = get_next_pending_task(assigned_to)
ahead = get_queue_position_ahead(assigned_to)
return {
"agent": assigned_to,
"is_working": current is not None,
"current_task": _task_to_dict(current) if current else None,
"next_task": _task_to_dict(next_task) if next_task else None,
"tasks_ahead": ahead,
}
@router.get("/api/queue/position/{task_id}", response_class=JSONResponse)
async def api_queue_position(task_id: str):
"""Get queue position for a specific task."""
from swarm.task_queue.models import get_queue_status_for_task
status = get_queue_status_for_task(task_id)
if "error" in status:
raise HTTPException(404, status["error"])
return status
@router.get("/api/queue/agent/{assigned_to}", response_class=JSONResponse)
async def api_agent_queue(assigned_to: str, limit: int = 20):
"""Get all pending tasks for an agent."""
from swarm.task_queue.models import list_tasks, TaskStatus
tasks = list_tasks(
assigned_to=assigned_to,
status=None, # All statuses
limit=limit,
)
# Filter to pending/running tasks
pending = [
t
for t in tasks
if t.status not in (TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.VETOED)
]
return {
"assigned_to": assigned_to,
"tasks": [_task_to_dict(t) for t in pending],
"count": len(pending),
}
# ── Helpers ──────────────────────────────────────────────────────────────
def _task_to_dict(task: QueueTask) -> dict:
return {
d = {
"id": task.id,
"title": task.title,
"description": task.description,
@@ -457,11 +577,15 @@ def _task_to_dict(task: QueueTask) -> dict:
"completed_at": task.completed_at,
"updated_at": task.updated_at,
}
if task.backlog_reason:
d["backlog_reason"] = task.backlog_reason
return d
def _notify_task_created(task: QueueTask):
try:
from infrastructure.notifications.push import notifier
notifier.notify(
title="New Task",
message=f"{task.created_by} created: {task.title}",