"""Loop QA — structured self-test framework for Timmy's capabilities. Runs alongside (not inside) the thinking loop. Each cycle probes one capability in round-robin, logs results via event_log, tracks failures in memory, and files upgrade tasks via create_task() when degradation is detected. Reuses existing infrastructure: - swarm.event_log.log_event / EventType → result persistence - swarm.task_queue.models.create_task → upgrade proposals - infrastructure.error_capture → crash handling Usage:: from timmy.loop_qa import loop_qa_orchestrator await loop_qa_orchestrator.run_next_test() snapshot = loop_qa_orchestrator.get_health_snapshot() """ import asyncio import logging import uuid from datetime import UTC, datetime from enum import StrEnum from config import settings logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Data models # --------------------------------------------------------------------------- class Capability(StrEnum): """Capabilities exercised by self-test probes.""" TOOL_USE = "tool_use" MULTISTEP_PLANNING = "multistep_planning" MEMORY_READ = "memory_read" MEMORY_WRITE = "memory_write" SELF_CODING = "self_coding" LIGHTNING_ECON = "lightning_econ" # --------------------------------------------------------------------------- # Lazy accessors (avoid import-time side effects) # --------------------------------------------------------------------------- def _get_shell_hand(): """Lazy-import the shell hand singleton.""" from infrastructure.hands.shell import shell_hand return shell_hand def _get_vault(): """Lazy-import the vault memory singleton.""" from timmy.memory_system import get_memory_system return get_memory_system().vault def _get_brain_memory(): """Return None — brain module removed. Memory operations now go through timmy.memory_system. """ return None # --------------------------------------------------------------------------- # Six self-test probes — each returns a result dict # --------------------------------------------------------------------------- async def probe_tool_use() -> dict: """T1: call shell_hand.run('ls') and confirm non-empty result.""" cap = Capability.TOOL_USE try: hand = _get_shell_hand() result = await hand.run("ls") if result.success and result.stdout.strip(): return { "success": True, "capability": cap, "details": f"ls returned {len(result.stdout.splitlines())} lines", "error_type": None, } return { "success": False, "capability": cap, "details": f"ls returned empty or failed: {result.stderr[:100]}", "error_type": "empty_result", } except Exception as exc: return { "success": False, "capability": cap, "details": str(exc)[:200], "error_type": type(exc).__name__, } async def probe_multistep_planning() -> dict: """T2: write a temp vault note and verify it exists with content.""" cap = Capability.MULTISTEP_PLANNING try: vault = _get_vault() marker = f"loop_qa_plan_test_{uuid.uuid4().hex[:8]}" content = ( f"# Loop QA Planning Test\n\nMarker: {marker}\nDate: {datetime.now(UTC).isoformat()}" ) path = await asyncio.to_thread(vault.write_note, "loop_qa_test", content, "notes") if path.exists() and marker in path.read_text(): return { "success": True, "capability": cap, "details": f"Wrote and verified {path.name}", "error_type": None, } return { "success": False, "capability": cap, "details": "File missing or content mismatch", "error_type": "verification_failed", } except Exception as exc: return { "success": False, "capability": cap, "details": str(exc)[:200], "error_type": type(exc).__name__, } async def probe_memory_write() -> dict: """T3: call brain.store_fact_sync and verify no exception.""" cap = Capability.MEMORY_WRITE try: mem = _get_brain_memory() marker = f"loop_qa_marker_{uuid.uuid4().hex[:8]}" await asyncio.to_thread(mem.store_fact_sync, "self_test_marker", marker) return { "success": True, "capability": cap, "details": f"Stored fact: {marker}", "error_type": None, } except Exception as exc: return { "success": False, "capability": cap, "details": str(exc)[:200], "error_type": type(exc).__name__, } async def probe_memory_read() -> dict: """T4: call brain.get_facts_sync and verify results returned.""" cap = Capability.MEMORY_READ try: mem = _get_brain_memory() facts = await asyncio.to_thread(mem.get_facts_sync, "self_test_marker") if facts: return { "success": True, "capability": cap, "details": f"Retrieved {len(facts)} self_test_marker facts", "error_type": None, } return { "success": False, "capability": cap, "details": "No self_test_marker facts found", "error_type": "empty_result", } except Exception as exc: return { "success": False, "capability": cap, "details": str(exc)[:200], "error_type": type(exc).__name__, } async def probe_self_coding() -> dict: """T5: write a self-test note to memory/self/ via vault.""" cap = Capability.SELF_CODING try: vault = _get_vault() content = ( "# Self-Test Improvement Note\n\n" f"**Generated:** {datetime.now(UTC).isoformat()}\n\n" "## What\nLoop QA self-coding probe — validates vault write capability.\n\n" "## Why\nEnsure the self-coding pathway is functional.\n\n" "## How\nWrite this note and verify it exists." ) path = await asyncio.to_thread(vault.write_note, "self_test_note", content, "self") if path.exists() and path.stat().st_size > 0: return { "success": True, "capability": cap, "details": f"Wrote {path.name} ({path.stat().st_size} bytes)", "error_type": None, } return { "success": False, "capability": cap, "details": "File missing or empty after write", "error_type": "verification_failed", } except Exception as exc: return { "success": False, "capability": cap, "details": str(exc)[:200], "error_type": type(exc).__name__, } async def probe_lightning_econ() -> dict: """T6: placeholder — Lightning module pending v2.""" return { "success": True, "capability": Capability.LIGHTNING_ECON, "details": "Lightning module pending v2 — placeholder pass", "error_type": None, } # --------------------------------------------------------------------------- # Test sequence (round-robin order) # --------------------------------------------------------------------------- TEST_SEQUENCE: list[tuple[Capability, str]] = [ (Capability.TOOL_USE, "probe_tool_use"), (Capability.MULTISTEP_PLANNING, "probe_multistep_planning"), (Capability.MEMORY_WRITE, "probe_memory_write"), (Capability.MEMORY_READ, "probe_memory_read"), (Capability.SELF_CODING, "probe_self_coding"), (Capability.LIGHTNING_ECON, "probe_lightning_econ"), ] # --------------------------------------------------------------------------- # Orchestrator # --------------------------------------------------------------------------- def log_event(event_type, **kwargs): """No-op — swarm event_log removed.""" logger.debug("log_event(%s) — swarm module removed, skipping", event_type) def capture_error(exc, **kwargs): """Proxy to infrastructure.error_capture — lazy import.""" try: from infrastructure.error_capture import capture_error as _capture return _capture(exc, **kwargs) except Exception as capture_exc: logger.debug("Failed to capture error: %s", capture_exc) logger.debug("Failed to capture error", exc_info=True) def create_task(**kwargs): """No-op — swarm task queue removed.""" logger.debug("create_task() — swarm module removed, skipping") return None class LoopQAOrchestrator: """Round-robin self-test orchestrator. Runs one probe per invocation, cycling through T1–T6. Tracks consecutive failures in memory (circuit-breaker pattern) and files upgrade tasks via create_task() when degradation is detected. """ def __init__(self) -> None: self._test_index: int = 0 self._failure_counts: dict[Capability, int] = {c: 0 for c in Capability} self._last_failed: dict[Capability, str | None] = {c: None for c in Capability} self._proposal_filed: set[Capability] = set() self._hourly_count: int = 0 self._hour_marker: int = -1 async def run_next_test(self) -> dict | None: """Run the next probe in the round-robin sequence. Returns result dict, or None if disabled/throttled. """ if not settings.loop_qa_enabled: return None # Hourly throttle now = datetime.now(UTC) current_hour = now.hour if current_hour != self._hour_marker: self._hourly_count = 0 self._hour_marker = current_hour if self._hourly_count >= settings.loop_qa_max_per_hour: logger.debug( "Loop QA throttled: %d/%d this hour", self._hourly_count, settings.loop_qa_max_per_hour, ) return None # Pick next probe (resolve name at call time for testability) import timmy.loop_qa as _self_module cap, probe_name = TEST_SEQUENCE[self._test_index] probe_fn = getattr(_self_module, probe_name) self._test_index = (self._test_index + 1) % len(TEST_SEQUENCE) self._hourly_count += 1 # Run probe try: result = await probe_fn() except Exception as exc: # Probe itself crashed — record failure and report capture_error(exc, source="loop_qa", context={"capability": cap.value}) result = { "success": False, "capability": cap, "details": f"Probe crashed: {exc!s}"[:200], "error_type": type(exc).__name__, } # Log result event_type = "loop_qa_ok" if result["success"] else "loop_qa_fail" log_event( event_type, source="loop_qa", data={ "capability": cap.value, "details": result.get("details", ""), "error_type": result.get("error_type"), }, ) # Update failure counter if result["success"]: self._failure_counts[cap] = 0 self._last_failed[cap] = None self._proposal_filed.discard(cap) else: self._failure_counts[cap] += 1 self._last_failed[cap] = now.isoformat() self._maybe_file_upgrade(cap) return result def _maybe_file_upgrade(self, cap: Capability) -> None: """File an upgrade task if threshold is reached and not already filed.""" count = self._failure_counts[cap] if count < settings.loop_qa_upgrade_threshold: return if cap in self._proposal_filed: return try: title = f"Stabilize {cap.value.upper()}: self-test failing {count}x in a row" description = ( f"Loop QA detected {count} consecutive failures " f"for capability '{cap.value}'.\n\n" f"Last failure: {self._last_failed[cap]}\n" f"Action: investigate root cause and restore capability." ) create_task( title=title, description=description, priority="high", created_by="timmy_loop_qa", task_type="loop_qa_upgrade", ) self._proposal_filed.add(cap) logger.info("Filed upgrade proposal for %s: %s", cap.value, title) except Exception as exc: logger.warning("Failed to file upgrade proposal: %s", exc) def get_health_snapshot(self) -> dict: """Build a health snapshot from in-memory failure counters.""" capabilities = [] for cap in Capability: count = self._failure_counts.get(cap, 0) capabilities.append( { "capability": cap, "status": self.status_for_failures(count), "last_failed_at": self._last_failed.get(cap), "consecutive_failures": count, } ) statuses = [c["status"] for c in capabilities] if "red" in statuses: overall = "red" elif "yellow" in statuses: overall = "yellow" else: overall = "green" return { "generated_at": datetime.now(UTC).isoformat(), "overall_status": overall, "capabilities": capabilities, } @staticmethod def status_for_failures(count: int) -> str: """Map consecutive failure count to green/yellow/red.""" if count >= settings.loop_qa_upgrade_threshold: return "red" elif count >= 2: return "yellow" return "green" # ── Module singleton ───────────────────────────────────────────────────────── loop_qa_orchestrator = LoopQAOrchestrator()