feat: add Loop QA self-testing framework

Structured self-test framework that probes 6 capabilities (tool use, multistep planning, memory read/write, self-coding, lightning econ) in round-robin. Reuses existing infra: event_log for persistence, create_task() for upgrade proposals, capture_error() for crash handling, and in-memory circuit breaker for failure tracking. - src/timmy/loop_qa.py: Capability enum, 6 async probes, orchestrator - src/dashboard/routes/loop_qa.py: JSON + HTMX health endpoints - HTMX partial polls every 30s on the health panel - Background scheduler in app.py lifespan - 25 tests covering probes, orchestrator, health snapshot, routes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 22:33:16 -04:00
parent c7f92f6d7b
commit d42c574d26
8 changed files with 973 additions and 1 deletions
--- a/src/dashboard/app.py
+++ b/src/dashboard/app.py
@@ -32,6 +32,7 @@ from dashboard.routes.discord import router as discord_router
 from dashboard.routes.experiments import router as experiments_router
 from dashboard.routes.grok import router as grok_router
 from dashboard.routes.health import router as health_router
+from dashboard.routes.loop_qa import router as loop_qa_router
 from dashboard.routes.marketplace import router as marketplace_router
 from dashboard.routes.memory import router as memory_router
 from dashboard.routes.mobile import router as mobile_router
@@ -161,6 +162,35 @@ async def _thinking_scheduler() -> None:
        await asyncio.sleep(settings.thinking_interval_seconds)


+async def _loop_qa_scheduler() -> None:
+    """Background task: run capability self-tests on a separate timer.
+
+    Independent of the thinking loop — runs every N thinking ticks
+    to probe subsystems and detect degradation.
+    """
+    from timmy.loop_qa import loop_qa_orchestrator
+
+    await asyncio.sleep(10)  # Stagger after thinking scheduler
+
+    while True:
+        try:
+            if settings.loop_qa_enabled:
+                result = await loop_qa_orchestrator.run_next_test()
+                if result:
+                    status = "PASS" if result["success"] else "FAIL"
+                    logger.info(
+                        "Loop QA [%s]: %s — %s",
+                        result["capability"],
+                        status,
+                        result.get("details", "")[:80],
+                    )
+        except Exception as exc:
+            logger.error("Loop QA scheduler error: %s", exc)
+
+        interval = settings.thinking_interval_seconds * settings.loop_qa_interval_ticks
+        await asyncio.sleep(interval)
+
+
 async def _start_chat_integrations_background() -> None:
    """Background task: start chat integrations without blocking startup."""
    from integrations.chat_bridge.registry import platform_registry
@@ -268,6 +298,7 @@ async def lifespan(app: FastAPI):
    # Create all background tasks without waiting for them
    briefing_task = asyncio.create_task(_briefing_scheduler())
    thinking_task = asyncio.create_task(_thinking_scheduler())
+    loop_qa_task = asyncio.create_task(_loop_qa_scheduler())

    # Initialize Spark Intelligence engine
    from spark.engine import get_spark_engine
@@ -323,7 +354,7 @@ async def lifespan(app: FastAPI):
    await discord_bot.stop()
    await telegram_bot.stop()

-    for task in [briefing_task, thinking_task, chat_task]:
+    for task in [briefing_task, thinking_task, chat_task, loop_qa_task]:
        if task:
            task.cancel()
            try:
@@ -410,6 +441,7 @@ app.include_router(calm_router)
 app.include_router(swarm_router)
 app.include_router(tasks_router)
 app.include_router(work_orders_router)
+app.include_router(loop_qa_router)
 app.include_router(system_router)
 app.include_router(paperclip_router)
 app.include_router(experiments_router)