Files
Timmy-time-dashboard/src/timmy/loop_qa.py
Kimi Agent fdc5b861ca
All checks were successful
Tests / lint (pull_request) Successful in 3s
Tests / test (pull_request) Successful in 44s
fix: replace 59 bare except clauses with proper logging (#25)
All `except Exception:` now catch as `except Exception as exc:` with
appropriate logging (warning for critical paths, debug for graceful degradation).

Added logger setup to 4 files that lacked it:
- src/timmy/memory/vector_store.py
- src/dashboard/middleware/csrf.py
- src/dashboard/middleware/security_headers.py
- src/spark/memory.py

31 files changed across timmy core, dashboard, infrastructure, integrations.
Zero bare excepts remain. 1340 tests passing.
2026-03-14 19:07:14 -04:00

429 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Loop QA — structured self-test framework for Timmy's capabilities.
Runs alongside (not inside) the thinking loop. Each cycle probes one
capability in round-robin, logs results via event_log, tracks failures
in memory, and files upgrade tasks via create_task() when degradation
is detected.
Reuses existing infrastructure:
- swarm.event_log.log_event / EventType → result persistence
- swarm.task_queue.models.create_task → upgrade proposals
- infrastructure.error_capture → crash handling
Usage::
from timmy.loop_qa import loop_qa_orchestrator
await loop_qa_orchestrator.run_next_test()
snapshot = loop_qa_orchestrator.get_health_snapshot()
"""
import asyncio
import logging
import uuid
from datetime import UTC, datetime
from enum import StrEnum
from config import settings
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Data models
# ---------------------------------------------------------------------------
class Capability(StrEnum):
"""Capabilities exercised by self-test probes."""
TOOL_USE = "tool_use"
MULTISTEP_PLANNING = "multistep_planning"
MEMORY_READ = "memory_read"
MEMORY_WRITE = "memory_write"
SELF_CODING = "self_coding"
LIGHTNING_ECON = "lightning_econ"
# ---------------------------------------------------------------------------
# Lazy accessors (avoid import-time side effects)
# ---------------------------------------------------------------------------
def _get_shell_hand():
"""Lazy-import the shell hand singleton."""
from infrastructure.hands.shell import shell_hand
return shell_hand
def _get_vault():
"""Lazy-import the vault memory singleton."""
from timmy.memory_system import get_memory_system
return get_memory_system().vault
def _get_brain_memory():
"""Return None — brain module removed.
Memory operations now go through timmy.memory_system.
"""
return None
# ---------------------------------------------------------------------------
# Six self-test probes — each returns a result dict
# ---------------------------------------------------------------------------
async def probe_tool_use() -> dict:
"""T1: call shell_hand.run('ls') and confirm non-empty result."""
cap = Capability.TOOL_USE
try:
hand = _get_shell_hand()
result = await hand.run("ls")
if result.success and result.stdout.strip():
return {
"success": True,
"capability": cap,
"details": f"ls returned {len(result.stdout.splitlines())} lines",
"error_type": None,
}
return {
"success": False,
"capability": cap,
"details": f"ls returned empty or failed: {result.stderr[:100]}",
"error_type": "empty_result",
}
except Exception as exc:
return {
"success": False,
"capability": cap,
"details": str(exc)[:200],
"error_type": type(exc).__name__,
}
async def probe_multistep_planning() -> dict:
"""T2: write a temp vault note and verify it exists with content."""
cap = Capability.MULTISTEP_PLANNING
try:
vault = _get_vault()
marker = f"loop_qa_plan_test_{uuid.uuid4().hex[:8]}"
content = (
f"# Loop QA Planning Test\n\nMarker: {marker}\nDate: {datetime.now(UTC).isoformat()}"
)
path = await asyncio.to_thread(vault.write_note, "loop_qa_test", content, "notes")
if path.exists() and marker in path.read_text():
return {
"success": True,
"capability": cap,
"details": f"Wrote and verified {path.name}",
"error_type": None,
}
return {
"success": False,
"capability": cap,
"details": "File missing or content mismatch",
"error_type": "verification_failed",
}
except Exception as exc:
return {
"success": False,
"capability": cap,
"details": str(exc)[:200],
"error_type": type(exc).__name__,
}
async def probe_memory_write() -> dict:
"""T3: call brain.store_fact_sync and verify no exception."""
cap = Capability.MEMORY_WRITE
try:
mem = _get_brain_memory()
marker = f"loop_qa_marker_{uuid.uuid4().hex[:8]}"
await asyncio.to_thread(mem.store_fact_sync, "self_test_marker", marker)
return {
"success": True,
"capability": cap,
"details": f"Stored fact: {marker}",
"error_type": None,
}
except Exception as exc:
return {
"success": False,
"capability": cap,
"details": str(exc)[:200],
"error_type": type(exc).__name__,
}
async def probe_memory_read() -> dict:
"""T4: call brain.get_facts_sync and verify results returned."""
cap = Capability.MEMORY_READ
try:
mem = _get_brain_memory()
facts = await asyncio.to_thread(mem.get_facts_sync, "self_test_marker")
if facts:
return {
"success": True,
"capability": cap,
"details": f"Retrieved {len(facts)} self_test_marker facts",
"error_type": None,
}
return {
"success": False,
"capability": cap,
"details": "No self_test_marker facts found",
"error_type": "empty_result",
}
except Exception as exc:
return {
"success": False,
"capability": cap,
"details": str(exc)[:200],
"error_type": type(exc).__name__,
}
async def probe_self_coding() -> dict:
"""T5: write a self-test note to memory/self/ via vault."""
cap = Capability.SELF_CODING
try:
vault = _get_vault()
content = (
"# Self-Test Improvement Note\n\n"
f"**Generated:** {datetime.now(UTC).isoformat()}\n\n"
"## What\nLoop QA self-coding probe — validates vault write capability.\n\n"
"## Why\nEnsure the self-coding pathway is functional.\n\n"
"## How\nWrite this note and verify it exists."
)
path = await asyncio.to_thread(vault.write_note, "self_test_note", content, "self")
if path.exists() and path.stat().st_size > 0:
return {
"success": True,
"capability": cap,
"details": f"Wrote {path.name} ({path.stat().st_size} bytes)",
"error_type": None,
}
return {
"success": False,
"capability": cap,
"details": "File missing or empty after write",
"error_type": "verification_failed",
}
except Exception as exc:
return {
"success": False,
"capability": cap,
"details": str(exc)[:200],
"error_type": type(exc).__name__,
}
async def probe_lightning_econ() -> dict:
"""T6: placeholder — Lightning module pending v2."""
return {
"success": True,
"capability": Capability.LIGHTNING_ECON,
"details": "Lightning module pending v2 — placeholder pass",
"error_type": None,
}
# ---------------------------------------------------------------------------
# Test sequence (round-robin order)
# ---------------------------------------------------------------------------
TEST_SEQUENCE: list[tuple[Capability, str]] = [
(Capability.TOOL_USE, "probe_tool_use"),
(Capability.MULTISTEP_PLANNING, "probe_multistep_planning"),
(Capability.MEMORY_WRITE, "probe_memory_write"),
(Capability.MEMORY_READ, "probe_memory_read"),
(Capability.SELF_CODING, "probe_self_coding"),
(Capability.LIGHTNING_ECON, "probe_lightning_econ"),
]
# ---------------------------------------------------------------------------
# Orchestrator
# ---------------------------------------------------------------------------
def log_event(event_type, **kwargs):
"""No-op — swarm event_log removed."""
logger.debug("log_event(%s) — swarm module removed, skipping", event_type)
def capture_error(exc, **kwargs):
"""Proxy to infrastructure.error_capture — lazy import."""
try:
from infrastructure.error_capture import capture_error as _capture
return _capture(exc, **kwargs)
except Exception as capture_exc:
logger.debug("Failed to capture error: %s", capture_exc)
logger.debug("Failed to capture error", exc_info=True)
def create_task(**kwargs):
"""No-op — swarm task queue removed."""
logger.debug("create_task() — swarm module removed, skipping")
return None
class LoopQAOrchestrator:
"""Round-robin self-test orchestrator.
Runs one probe per invocation, cycling through T1T6. Tracks
consecutive failures in memory (circuit-breaker pattern) and
files upgrade tasks via create_task() when degradation is detected.
"""
def __init__(self) -> None:
self._test_index: int = 0
self._failure_counts: dict[Capability, int] = {c: 0 for c in Capability}
self._last_failed: dict[Capability, str | None] = {c: None for c in Capability}
self._proposal_filed: set[Capability] = set()
self._hourly_count: int = 0
self._hour_marker: int = -1
async def run_next_test(self) -> dict | None:
"""Run the next probe in the round-robin sequence.
Returns result dict, or None if disabled/throttled.
"""
if not settings.loop_qa_enabled:
return None
# Hourly throttle
now = datetime.now(UTC)
current_hour = now.hour
if current_hour != self._hour_marker:
self._hourly_count = 0
self._hour_marker = current_hour
if self._hourly_count >= settings.loop_qa_max_per_hour:
logger.debug(
"Loop QA throttled: %d/%d this hour",
self._hourly_count,
settings.loop_qa_max_per_hour,
)
return None
# Pick next probe (resolve name at call time for testability)
import timmy.loop_qa as _self_module
cap, probe_name = TEST_SEQUENCE[self._test_index]
probe_fn = getattr(_self_module, probe_name)
self._test_index = (self._test_index + 1) % len(TEST_SEQUENCE)
self._hourly_count += 1
# Run probe
try:
result = await probe_fn()
except Exception as exc:
# Probe itself crashed — record failure and report
capture_error(exc, source="loop_qa", context={"capability": cap.value})
result = {
"success": False,
"capability": cap,
"details": f"Probe crashed: {exc!s}"[:200],
"error_type": type(exc).__name__,
}
# Log result
event_type = "loop_qa_ok" if result["success"] else "loop_qa_fail"
log_event(
event_type,
source="loop_qa",
data={
"capability": cap.value,
"details": result.get("details", ""),
"error_type": result.get("error_type"),
},
)
# Update failure counter
if result["success"]:
self._failure_counts[cap] = 0
self._last_failed[cap] = None
self._proposal_filed.discard(cap)
else:
self._failure_counts[cap] += 1
self._last_failed[cap] = now.isoformat()
self._maybe_file_upgrade(cap)
return result
def _maybe_file_upgrade(self, cap: Capability) -> None:
"""File an upgrade task if threshold is reached and not already filed."""
count = self._failure_counts[cap]
if count < settings.loop_qa_upgrade_threshold:
return
if cap in self._proposal_filed:
return
try:
title = f"Stabilize {cap.value.upper()}: self-test failing {count}x in a row"
description = (
f"Loop QA detected {count} consecutive failures "
f"for capability '{cap.value}'.\n\n"
f"Last failure: {self._last_failed[cap]}\n"
f"Action: investigate root cause and restore capability."
)
create_task(
title=title,
description=description,
priority="high",
created_by="timmy_loop_qa",
task_type="loop_qa_upgrade",
)
self._proposal_filed.add(cap)
logger.info("Filed upgrade proposal for %s: %s", cap.value, title)
except Exception as exc:
logger.warning("Failed to file upgrade proposal: %s", exc)
def get_health_snapshot(self) -> dict:
"""Build a health snapshot from in-memory failure counters."""
capabilities = []
for cap in Capability:
count = self._failure_counts.get(cap, 0)
capabilities.append(
{
"capability": cap,
"status": self.status_for_failures(count),
"last_failed_at": self._last_failed.get(cap),
"consecutive_failures": count,
}
)
statuses = [c["status"] for c in capabilities]
if "red" in statuses:
overall = "red"
elif "yellow" in statuses:
overall = "yellow"
else:
overall = "green"
return {
"generated_at": datetime.now(UTC).isoformat(),
"overall_status": overall,
"capabilities": capabilities,
}
@staticmethod
def status_for_failures(count: int) -> str:
"""Map consecutive failure count to green/yellow/red."""
if count >= settings.loop_qa_upgrade_threshold:
return "red"
elif count >= 2:
return "yellow"
return "green"
# ── Module singleton ─────────────────────────────────────────────────────────
loop_qa_orchestrator = LoopQAOrchestrator()