feat: code quality audit + autoresearch integration + infra hardening (#150)

2026-03-08 12:50:44 -04:00
parent fd0ede0d51
commit ae3bb1cc21
186 changed files with 5129 additions and 3289 deletions
--- a/.env.example
+++ b/.env.example
@@ -71,3 +71,23 @@
 # Requires: pip install ".[discord]"
 # Optional: pip install pyzbar Pillow  (for QR code invite detection from screenshots)
 # DISCORD_TOKEN=
+
+# ── Autoresearch — autonomous ML experiment loops ────────────────────────────
+# Enable autonomous experiment loops (Karpathy autoresearch pattern).
+# AUTORESEARCH_ENABLED=false
+# AUTORESEARCH_WORKSPACE=data/experiments
+# AUTORESEARCH_TIME_BUDGET=300
+# AUTORESEARCH_MAX_ITERATIONS=100
+# AUTORESEARCH_METRIC=val_bpb
+
+# ── Docker Production ────────────────────────────────────────────────────────
+# When deploying with docker-compose.prod.yml:
+#   - Containers run as non-root user "timmy" (defined in Dockerfile)
+#   - No source bind mounts — code is baked into the image
+#   - Set TIMMY_ENV=production to enforce security checks
+#   - All secrets below MUST be set before production deployment
+#
+# Taskosaur secrets (change from dev defaults):
+# TASKOSAUR_JWT_SECRET=<generate with: python3 -c "import secrets; print(secrets.token_hex(32))">
+# TASKOSAUR_JWT_REFRESH_SECRET=<generate with: python3 -c "import secrets; print(secrets.token_hex(32))">
+# TASKOSAUR_ENCRYPTION_KEY=<generate with: python3 -c "import secrets; print(secrets.token_hex(32))">
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -7,8 +7,30 @@ on:
    branches: ["**"]

 jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install linters
+        run: pip install black==23.12.1 isort==5.13.2 bandit==1.7.5
+
+      - name: Check formatting (black)
+        run: black --check --line-length 100 src/ tests/
+
+      - name: Check import order (isort)
+        run: isort --check --profile black --line-length 100 src/ tests/
+
+      - name: Security scan (bandit)
+        run: bandit -r src/ -ll -s B101,B104,B307,B310,B324,B601,B608 -q
+
  test:
    runs-on: ubuntu-latest
+    needs: lint

    # Required for publish-unit-test-result-action to post check runs and PR comments
    permissions:
@@ -22,7 +44,15 @@ jobs:
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
-          cache: "pip"
+
+      - name: Cache Poetry virtualenv
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.cache/pip
+          key: poetry-${{ hashFiles('poetry.lock') }}
+          restore-keys: poetry-

      - name: Install dependencies
        run: |
@@ -60,3 +90,11 @@ jobs:
          name: coverage-report
          path: reports/coverage.xml
          retention-days: 14
+
+  docker-build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build Docker image
+        run: DOCKER_BUILDKIT=1 docker build -t timmy-time:ci .
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -51,12 +51,12 @@ repos:
        exclude: ^tests/
        stages: [manual]

-  # Full test suite with 30-second wall-clock limit.
-  # Current baseline: ~18s. If tests get slow, this blocks the commit.
+  # Unit tests only with 30-second wall-clock limit.
+  # Runs only fast unit tests on commit; full suite runs in CI.
  - repo: local
    hooks:
      - id: pytest-fast
-        name: pytest (30s limit)
+        name: pytest unit (30s limit)
        entry: timeout 30 poetry run pytest
        language: system
        types: [python]
@@ -68,4 +68,8 @@ repos:
          - -q
          - --tb=short
          - --timeout=10
+          - -m
+          - unit
+          - -p
+          - no:xdist
        verbose: true
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -56,7 +56,7 @@ make test-cov           # With coverage (term-missing + XML)
 - **Test mode:** `TIMMY_TEST_MODE=1` set automatically in conftest
 - **FastAPI testing:** Use the `client` fixture
 - **Async:** `asyncio_mode = "auto"` — async tests detected automatically
- **Coverage threshold:** 60% (`fail_under` in `pyproject.toml`)
+- **Coverage threshold:** 73% (`fail_under` in `pyproject.toml`)

 ---

--- a/15
+++ b/15
@@ -11,7 +11,7 @@
 #                    timmy-time:latest \
 #                    python -m swarm.agent_runner --agent-id w1 --name Worker-1

-# ── Stage 1: Builder — export deps via Poetry, install via pip ──────────────
+# ── Stage 1: Builder — install deps via Poetry ──────────────────────────────
 FROM python:3.12-slim AS builder

 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -20,18 +20,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \

 WORKDIR /build

-# Install Poetry + export plugin (only needed for export, not in runtime)
-RUN pip install --no-cache-dir poetry poetry-plugin-export
+# Install Poetry (only needed to resolve deps, not in runtime)
+RUN pip install --no-cache-dir poetry

 # Copy dependency files only (layer caching)
 COPY pyproject.toml poetry.lock ./

-# Export pinned requirements and install with pip cache mount
-RUN poetry export --extras swarm --extras telegram --extras discord --without-hashes \
-        -f requirements.txt -o requirements.txt
-
-RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install --no-cache-dir -r requirements.txt
+# Install deps directly from lock file (no virtualenv, no export plugin needed)
+RUN poetry config virtualenvs.create false && \
+    poetry install --only main --extras telegram --extras discord --no-interaction

 # ── Stage 2: Runtime ───────────────────────────────────────────────────────
 FROM python:3.12-slim AS base
--- a/5
+++ b/5
@@ -210,6 +210,11 @@ docker-up:
 	mkdir -p data
 	docker compose up -d dashboard

+docker-prod:
+	mkdir -p data
+	DOCKER_BUILDKIT=1 docker build -t timmy-time:latest .
+	docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d dashboard
+
 docker-down:
 	docker compose down

--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -0,0 +1,56 @@
+# ── Production Compose Overlay ─────────────────────────────────────────────────
+#
+# Usage:
+#   make docker-prod                     # build + start with prod settings
+#   docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
+#
+# Differences from dev:
+#   - Runs as non-root user (timmy) from Dockerfile
+#   - No bind mounts — uses image-baked source only
+#   - Named volumes only (no host path dependencies)
+#   - Read-only root filesystem with tmpfs for /tmp
+#   - Resource limits enforced
+#   - Secrets passed via environment variables (set in .env)
+#
+# Security note: Set all secrets in .env before deploying.
+#   Required: L402_HMAC_SECRET, L402_MACAROON_SECRET
+#   Recommended: TASKOSAUR_JWT_SECRET, TASKOSAUR_ENCRYPTION_KEY
+
+services:
+
+  dashboard:
+    # Remove dev-only root user override — use Dockerfile's USER timmy
+    user: ""
+    read_only: true
+    tmpfs:
+      - /tmp:size=100M
+    volumes:
+      # Override: named volume only, no host bind mounts
+      - timmy-data:/app/data
+      # Remove ./src and ./static bind mounts (use baked-in image files)
+    environment:
+      DEBUG: "false"
+      TIMMY_ENV: "production"
+    deploy:
+      resources:
+        limits:
+          cpus: "2.0"
+          memory: 2G
+
+  celery-worker:
+    user: ""
+    read_only: true
+    tmpfs:
+      - /tmp:size=100M
+    volumes:
+      - timmy-data:/app/data
+    deploy:
+      resources:
+        limits:
+          cpus: "1.0"
+          memory: 1G
+
+# Override timmy-data to use a simple named volume (no host bind)
+volumes:
+  timmy-data:
+    driver: local
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -97,6 +97,12 @@ markers = [
    "skip_ci: Skip in CI environment (local development only)",
 ]

+[tool.isort]
+profile = "black"
+line_length = 100
+src_paths = ["src", "tests"]
+known_first_party = ["brain", "config", "dashboard", "infrastructure", "integrations", "spark", "swarm", "timmy", "timmy_serve"]
+
 [tool.coverage.run]
 source = ["src"]
 omit = [
--- a/src/brain/init.py
+++ b/src/brain/init.py
@@ -11,9 +11,9 @@ upgrade to distributed rqlite over Tailscale — same API, replicated.
 """

 from brain.client import BrainClient
-from brain.worker import DistributedWorker
 from brain.embeddings import LocalEmbedder
 from brain.memory import UnifiedMemory, get_memory
+from brain.worker import DistributedWorker

 __all__ = [
    "BrainClient",
--- a/src/brain/client.py
+++ b/src/brain/client.py
@@ -28,6 +28,7 @@ class BrainClient:

    def __init__(self, rqlite_url: Optional[str] = None, node_id: Optional[str] = None):
        from config import settings
+
        self.rqlite_url = rqlite_url or settings.rqlite_url or DEFAULT_RQLITE_URL
        self.node_id = node_id or f"{socket.gethostname()}-{os.getpid()}"
        self.source = self._detect_source()
@@ -38,6 +39,7 @@ class BrainClient:
        # Could be 'timmy', 'zeroclaw', 'worker', etc.
        # For now, infer from context or env
        from config import settings
+
        return settings.brain_source

    # ──────────────────────────────────────────────────────────────────────────
@@ -49,7 +51,7 @@ class BrainClient:
        content: str,
        tags: Optional[List[str]] = None,
        source: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None
+        metadata: Optional[Dict[str, Any]] = None,
    ) -> Dict[str, Any]:
        """Store a memory with embedding.

@@ -77,14 +79,11 @@ class BrainClient:
            source or self.source,
            json.dumps(tags or []),
            json.dumps(metadata or {}),
-            datetime.utcnow().isoformat()
+            datetime.utcnow().isoformat(),
        ]

        try:
-            resp = await self._client.post(
-                f"{self.rqlite_url}/db/execute",
-                json=[query, params]
-            )
+            resp = await self._client.post(f"{self.rqlite_url}/db/execute", json=[query, params])
            resp.raise_for_status()
            result = resp.json()

@@ -101,10 +100,7 @@ class BrainClient:
            raise

    async def recall(
-        self,
-        query: str,
-        limit: int = 5,
-        sources: Optional[List[str]] = None
+        self, query: str, limit: int = 5, sources: Optional[List[str]] = None
    ) -> List[str]:
        """Semantic search for memories.

@@ -134,22 +130,21 @@ class BrainClient:
        params.append(limit)

        try:
-            resp = await self._client.post(
-                f"{self.rqlite_url}/db/query",
-                json=[sql, params]
-            )
+            resp = await self._client.post(f"{self.rqlite_url}/db/query", json=[sql, params])
            resp.raise_for_status()
            result = resp.json()

            results = []
            if "results" in result and result["results"]:
                for row in result["results"][0].get("rows", []):
-                    results.append({
-                        "content": row[0],
-                        "source": row[1],
-                        "metadata": json.loads(row[2]) if row[2] else {},
-                        "distance": row[3]
-                    })
+                    results.append(
+                        {
+                            "content": row[0],
+                            "source": row[1],
+                            "metadata": json.loads(row[2]) if row[2] else {},
+                            "distance": row[3],
+                        }
+                    )

            return results

@@ -159,10 +154,7 @@ class BrainClient:
            return []

    async def get_recent(
-        self,
-        hours: int = 24,
-        limit: int = 20,
-        sources: Optional[List[str]] = None
+        self, hours: int = 24, limit: int = 20, sources: Optional[List[str]] = None
    ) -> List[Dict[str, Any]]:
        """Get recent memories by time.

@@ -190,24 +182,23 @@ class BrainClient:
        params.append(limit)

        try:
-            resp = await self._client.post(
-                f"{self.rqlite_url}/db/query",
-                json=[sql, params]
-            )
+            resp = await self._client.post(f"{self.rqlite_url}/db/query", json=[sql, params])
            resp.raise_for_status()
            result = resp.json()

            memories = []
            if "results" in result and result["results"]:
                for row in result["results"][0].get("rows", []):
-                    memories.append({
-                        "id": row[0],
-                        "content": row[1],
-                        "source": row[2],
-                        "tags": json.loads(row[3]) if row[3] else [],
-                        "metadata": json.loads(row[4]) if row[4] else {},
-                        "created_at": row[5]
-                    })
+                    memories.append(
+                        {
+                            "id": row[0],
+                            "content": row[1],
+                            "source": row[2],
+                            "tags": json.loads(row[3]) if row[3] else [],
+                            "metadata": json.loads(row[4]) if row[4] else {},
+                            "created_at": row[5],
+                        }
+                    )

            return memories

@@ -248,7 +239,7 @@ class BrainClient:
        content: str,
        task_type: str = "general",
        priority: int = 0,
-        metadata: Optional[Dict[str, Any]] = None
+        metadata: Optional[Dict[str, Any]] = None,
    ) -> Dict[str, Any]:
        """Submit a task to the distributed queue.

@@ -270,14 +261,11 @@ class BrainClient:
            task_type,
            priority,
            json.dumps(metadata or {}),
-            datetime.utcnow().isoformat()
+            datetime.utcnow().isoformat(),
        ]

        try:
-            resp = await self._client.post(
-                f"{self.rqlite_url}/db/execute",
-                json=[query, params]
-            )
+            resp = await self._client.post(f"{self.rqlite_url}/db/execute", json=[query, params])
            resp.raise_for_status()
            result = resp.json()

@@ -293,9 +281,7 @@ class BrainClient:
            raise

    async def claim_task(
-        self,
-        capabilities: List[str],
-        node_id: Optional[str] = None
+        self, capabilities: List[str], node_id: Optional[str] = None
    ) -> Optional[Dict[str, Any]]:
        """Atomically claim next available task.

@@ -332,10 +318,7 @@ class BrainClient:
        params = [claimer, datetime.utcnow().isoformat()] + capabilities

        try:
-            resp = await self._client.post(
-                f"{self.rqlite_url}/db/execute",
-                json=[query, params]
-            )
+            resp = await self._client.post(f"{self.rqlite_url}/db/execute", json=[query, params])
            resp.raise_for_status()
            result = resp.json()

@@ -348,7 +331,7 @@ class BrainClient:
                        "content": row[1],
                        "type": row[2],
                        "priority": row[3],
-                        "metadata": json.loads(row[4]) if row[4] else {}
+                        "metadata": json.loads(row[4]) if row[4] else {},
                    }

            return None
@@ -358,11 +341,7 @@ class BrainClient:
            return None

    async def complete_task(
-        self,
-        task_id: int,
-        success: bool,
-        result: Optional[str] = None,
-        error: Optional[str] = None
+        self, task_id: int, success: bool, result: Optional[str] = None, error: Optional[str] = None
    ) -> None:
        """Mark task as completed or failed.

@@ -385,10 +364,7 @@ class BrainClient:
        params = [status, result, error, datetime.utcnow().isoformat(), task_id]

        try:
-            await self._client.post(
-                f"{self.rqlite_url}/db/execute",
-                json=[query, params]
-            )
+            await self._client.post(f"{self.rqlite_url}/db/execute", json=[query, params])
            logger.debug(f"Task {task_id} marked {status}")

        except Exception as e:
@@ -412,24 +388,23 @@ class BrainClient:
        """

        try:
-            resp = await self._client.post(
-                f"{self.rqlite_url}/db/query",
-                json=[sql, [limit]]
-            )
+            resp = await self._client.post(f"{self.rqlite_url}/db/query", json=[sql, [limit]])
            resp.raise_for_status()
            result = resp.json()

            tasks = []
            if "results" in result and result["results"]:
                for row in result["results"][0].get("rows", []):
-                    tasks.append({
-                        "id": row[0],
-                        "content": row[1],
-                        "type": row[2],
-                        "priority": row[3],
-                        "metadata": json.loads(row[4]) if row[4] else {},
-                        "created_at": row[5]
-                    })
+                    tasks.append(
+                        {
+                            "id": row[0],
+                            "content": row[1],
+                            "type": row[2],
+                            "priority": row[3],
+                            "metadata": json.loads(row[4]) if row[4] else {},
+                            "created_at": row[5],
+                        }
+                    )

            return tasks

--- a/src/brain/embeddings.py
+++ b/src/brain/embeddings.py
@@ -37,12 +37,15 @@ class LocalEmbedder:

        try:
            from sentence_transformers import SentenceTransformer
+
            logger.info(f"Loading embedding model: {self.model_name}")
            _model = SentenceTransformer(self.model_name)
            self._model = _model
            logger.info(f"Embedding model loaded ({self._dimensions} dims)")
        except ImportError:
-            logger.error("sentence-transformers not installed. Run: pip install sentence-transformers")
+            logger.error(
+                "sentence-transformers not installed. Run: pip install sentence-transformers"
+            )
            raise

    def encode(self, text: Union[str, List[str]]):
@@ -67,6 +70,7 @@ class LocalEmbedder:
            Float32 bytes
        """
        import numpy as np
+
        embedding = self.encode(text)
        if len(embedding.shape) > 1:
            embedding = embedding[0]
@@ -78,6 +82,7 @@ class LocalEmbedder:
        Vectors should already be normalized from encode().
        """
        import numpy as np
+
        return float(np.dot(a, b))


--- a/src/brain/memory.py
+++ b/src/brain/memory.py
@@ -48,6 +48,7 @@ _SCHEMA_VERSION = 1
 def _get_db_path() -> Path:
    """Get the brain database path from env or default."""
    from config import settings
+
    if settings.brain_db_path:
        return Path(settings.brain_db_path)
    return _DEFAULT_DB_PATH
@@ -75,6 +76,7 @@ class UnifiedMemory:
        # Auto-detect: use rqlite if RQLITE_URL is set, otherwise local SQLite
        if use_rqlite is None:
            from config import settings as _settings
+
            use_rqlite = bool(_settings.rqlite_url)
        self._use_rqlite = use_rqlite

@@ -107,10 +109,12 @@ class UnifiedMemory:
        """Lazy-load the embedding model."""
        if self._embedder is None:
            from config import settings as _settings
+
            if _settings.timmy_skip_embeddings:
                return None
            try:
                from brain.embeddings import LocalEmbedder
+
                self._embedder = LocalEmbedder()
            except ImportError:
                logger.warning("sentence-transformers not available — semantic search disabled")
@@ -125,6 +129,7 @@ class UnifiedMemory:
        """Lazy-load the rqlite BrainClient."""
        if self._rqlite_client is None:
            from brain.client import BrainClient
+
            self._rqlite_client = BrainClient()
        return self._rqlite_client

@@ -292,15 +297,17 @@ class UnifiedMemory:

            results = []
            for score, row in scored[:limit]:
-                results.append({
-                    "id": row["id"],
-                    "content": row["content"],
-                    "source": row["source"],
-                    "tags": json.loads(row["tags"]) if row["tags"] else [],
-                    "metadata": json.loads(row["metadata"]) if row["metadata"] else {},
-                    "score": score,
-                    "created_at": row["created_at"],
-                })
+                results.append(
+                    {
+                        "id": row["id"],
+                        "content": row["content"],
+                        "source": row["source"],
+                        "tags": json.loads(row["tags"]) if row["tags"] else [],
+                        "metadata": json.loads(row["metadata"]) if row["metadata"] else {},
+                        "score": score,
+                        "created_at": row["created_at"],
+                    }
+                )

            return results
        finally:
--- a/src/brain/schema.py
+++ b/src/brain/schema.py
@@ -89,6 +89,8 @@ def get_migration_sql(from_version: int, to_version: int) -> str:
    for v in range(from_version + 1, to_version + 1):
        if v in MIGRATIONS:
            sql_parts.append(MIGRATIONS[v])
-            sql_parts.append(f"UPDATE schema_version SET version = {v}, applied_at = datetime('now');")
+            sql_parts.append(
+                f"UPDATE schema_version SET version = {v}, applied_at = datetime('now');"
+            )

    return "\n".join(sql_parts)
--- a/src/brain/worker.py
+++ b/src/brain/worker.py
@@ -72,9 +72,7 @@ class DistributedWorker:
        """Check for NVIDIA or AMD GPU."""
        try:
            # Check for nvidia-smi
-            result = subprocess.run(
-                ["nvidia-smi"], capture_output=True, timeout=5
-            )
+            result = subprocess.run(["nvidia-smi"], capture_output=True, timeout=5)
            if result.returncode == 0:
                return True
        except (OSError, subprocess.SubprocessError):
@@ -89,7 +87,9 @@ class DistributedWorker:
            try:
                result = subprocess.run(
                    ["system_profiler", "SPDisplaysDataType"],
-                    capture_output=True, text=True, timeout=5
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
                )
                if "Metal" in result.stdout:
                    return True
@@ -102,8 +102,7 @@ class DistributedWorker:
        """Check if we have internet connectivity."""
        try:
            result = subprocess.run(
-                ["curl", "-s", "--max-time", "3", "https://1.1.1.1"],
-                capture_output=True, timeout=5
+                ["curl", "-s", "--max-time", "3", "https://1.1.1.1"], capture_output=True, timeout=5
            )
            return result.returncode == 0
        except (OSError, subprocess.SubprocessError):
@@ -114,8 +113,7 @@ class DistributedWorker:
        try:
            if os.uname().sysname == "Darwin":
                result = subprocess.run(
-                    ["sysctl", "-n", "hw.memsize"],
-                    capture_output=True, text=True
+                    ["sysctl", "-n", "hw.memsize"], capture_output=True, text=True
                )
                bytes_mem = int(result.stdout.strip())
                return bytes_mem / (1024**3)
@@ -132,9 +130,7 @@ class DistributedWorker:
    def _has_command(self, cmd: str) -> bool:
        """Check if command exists."""
        try:
-            result = subprocess.run(
-                ["which", cmd], capture_output=True, timeout=5
-            )
+            result = subprocess.run(["which", cmd], capture_output=True, timeout=5)
            return result.returncode == 0
        except (OSError, subprocess.SubprocessError):
            return False
@@ -171,7 +167,7 @@ class DistributedWorker:
            proc = await asyncio.create_subprocess_shell(
                f"zeroclaw exec --json '{command}'",
                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE
+                stderr=asyncio.subprocess.PIPE,
            )
            stdout, stderr = await proc.communicate()

@@ -180,7 +176,7 @@ class DistributedWorker:
                content=f"Shell: {command}\nOutput: {stdout.decode()}",
                tags=["shell", "result"],
                source=self.node_id,
-                metadata={"command": command, "exit_code": proc.returncode}
+                metadata={"command": command, "exit_code": proc.returncode},
            )

            if proc.returncode != 0:
@@ -189,9 +185,7 @@ class DistributedWorker:

        # Fallback to direct subprocess (less safe)
        proc = await asyncio.create_subprocess_shell(
-            command,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE
+            command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
        )
        stdout, stderr = await proc.communicate()

@@ -214,7 +208,7 @@ class DistributedWorker:
            content=result,
            tags=["creative", "generated"],
            source=self.node_id,
-            metadata={"prompt": prompt}
+            metadata={"prompt": prompt},
        )

        return result
@@ -243,13 +237,12 @@ class DistributedWorker:
        # Call Ollama
        try:
            proc = await asyncio.create_subprocess_exec(
-                "curl", "-s", "http://localhost:11434/api/generate",
-                "-d", json.dumps({
-                    "model": "llama3.1:8b-instruct",
-                    "prompt": prompt,
-                    "stream": False
-                }),
-                stdout=asyncio.subprocess.PIPE
+                "curl",
+                "-s",
+                "http://localhost:11434/api/generate",
+                "-d",
+                json.dumps({"model": "llama3.1:8b-instruct", "prompt": prompt, "stream": False}),
+                stdout=asyncio.subprocess.PIPE,
            )
            stdout, _ = await proc.communicate()

@@ -261,7 +254,7 @@ class DistributedWorker:
                content=f"Task: {prompt}\nResult: {result}",
                tags=["llm", "result"],
                source=self.node_id,
-                metadata={"model": "llama3.1:8b-instruct"}
+                metadata={"model": "llama3.1:8b-instruct"},
            )

            return result
@@ -359,7 +352,7 @@ async def main():
        await worker.run()
    except KeyboardInterrupt:
        worker.stop()
-        print("\nWorker stopped.")
+        logger.info("Worker stopped.")


 if __name__ == "__main__":
--- a/src/config.py
+++ b/src/config.py
@@ -213,6 +213,15 @@ class Settings(BaseSettings):
    # Timeout in seconds for OpenFang hand execution (some hands are slow).
    openfang_timeout: int = 120

+    # ── Autoresearch — autonomous ML experiment loops ──────────────────
+    # Integrates Karpathy's autoresearch pattern: agents modify training
+    # code, run time-boxed experiments, evaluate metrics, and iterate.
+    autoresearch_enabled: bool = False
+    autoresearch_workspace: str = "data/experiments"
+    autoresearch_time_budget: int = 300  # seconds per experiment run
+    autoresearch_max_iterations: int = 100
+    autoresearch_metric: str = "val_bpb"  # metric to optimise (lower = better)
+
    # ── Local Hands (Shell + Git) ──────────────────────────────────────
    # Enable local shell/git execution hands.
    hands_shell_enabled: bool = True
--- a/src/dashboard/app.py
+++ b/src/dashboard/app.py
@@ -18,36 +18,38 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.trustedhost import TrustedHostMiddleware
 from fastapi.responses import HTMLResponse
 from fastapi.staticfiles import StaticFiles
+
 from config import settings
-from dashboard.routes.agents import router as agents_router
-from dashboard.routes.health import router as health_router
-from dashboard.routes.marketplace import router as marketplace_router
-from dashboard.routes.voice import router as voice_router
-from dashboard.routes.mobile import router as mobile_router
-from dashboard.routes.briefing import router as briefing_router
-from dashboard.routes.telegram import router as telegram_router
-from dashboard.routes.tools import router as tools_router
-from dashboard.routes.spark import router as spark_router
-from dashboard.routes.discord import router as discord_router
-from dashboard.routes.memory import router as memory_router
-from dashboard.routes.router import router as router_status_router
-from dashboard.routes.grok import router as grok_router
-from dashboard.routes.models import router as models_router
-from dashboard.routes.models import api_router as models_api_router
-from dashboard.routes.chat_api import router as chat_api_router
-from dashboard.routes.thinking import router as thinking_router
-from dashboard.routes.calm import router as calm_router
-from dashboard.routes.swarm import router as swarm_router
-from dashboard.routes.tasks import router as tasks_router
-from dashboard.routes.work_orders import router as work_orders_router
-from dashboard.routes.system import router as system_router
-from dashboard.routes.paperclip import router as paperclip_router
-from infrastructure.router.api import router as cascade_router

 # Import dedicated middleware
 from dashboard.middleware.csrf import CSRFMiddleware
 from dashboard.middleware.request_logging import RequestLoggingMiddleware
 from dashboard.middleware.security_headers import SecurityHeadersMiddleware
+from dashboard.routes.agents import router as agents_router
+from dashboard.routes.briefing import router as briefing_router
+from dashboard.routes.calm import router as calm_router
+from dashboard.routes.chat_api import router as chat_api_router
+from dashboard.routes.discord import router as discord_router
+from dashboard.routes.experiments import router as experiments_router
+from dashboard.routes.grok import router as grok_router
+from dashboard.routes.health import router as health_router
+from dashboard.routes.marketplace import router as marketplace_router
+from dashboard.routes.memory import router as memory_router
+from dashboard.routes.mobile import router as mobile_router
+from dashboard.routes.models import api_router as models_api_router
+from dashboard.routes.models import router as models_router
+from dashboard.routes.paperclip import router as paperclip_router
+from dashboard.routes.router import router as router_status_router
+from dashboard.routes.spark import router as spark_router
+from dashboard.routes.swarm import router as swarm_router
+from dashboard.routes.system import router as system_router
+from dashboard.routes.tasks import router as tasks_router
+from dashboard.routes.telegram import router as telegram_router
+from dashboard.routes.thinking import router as thinking_router
+from dashboard.routes.tools import router as tools_router
+from dashboard.routes.voice import router as voice_router
+from dashboard.routes.work_orders import router as work_orders_router
+from infrastructure.router.api import router as cascade_router


 def _configure_logging() -> None:
@@ -100,8 +102,8 @@ _BRIEFING_INTERVAL_HOURS = 6

 async def _briefing_scheduler() -> None:
    """Background task: regenerate Timmy's briefing every 6 hours."""
-    from timmy.briefing import engine as briefing_engine
    from infrastructure.notifications.push import notify_briefing_ready
+    from timmy.briefing import engine as briefing_engine

    await asyncio.sleep(2)

@@ -121,9 +123,9 @@ async def _briefing_scheduler() -> None:

 async def _start_chat_integrations_background() -> None:
    """Background task: start chat integrations without blocking startup."""
-    from integrations.telegram_bot.bot import telegram_bot
-    from integrations.chat_bridge.vendors.discord import discord_bot
    from integrations.chat_bridge.registry import platform_registry
+    from integrations.chat_bridge.vendors.discord import discord_bot
+    from integrations.telegram_bot.bot import telegram_bot

    await asyncio.sleep(0.5)

@@ -164,9 +166,9 @@ async def _discord_token_watcher() -> None:
        if discord_bot.state.name == "CONNECTED":
            return  # Already running — stop watching

-        # 1. Check live environment variable (intentionally uses os.environ,
-        #    not settings, because this polls for runtime hot-reload changes)
-        token = os.environ.get("DISCORD_TOKEN", "")
+        # 1. Check settings (pydantic-settings reads env on instantiation;
+        #    hot-reload is handled by re-reading .env below)
+        token = settings.discord_token

        # 2. Re-read .env file for hot-reload
        if not token:
@@ -203,6 +205,7 @@ async def lifespan(app: FastAPI):

    # Initialize Spark Intelligence engine
    from spark.engine import spark_engine
+
    if spark_engine.enabled:
        logger.info("Spark Intelligence active — event capture enabled")

@@ -210,12 +213,17 @@ async def lifespan(app: FastAPI):
    if settings.memory_prune_days > 0:
        try:
            from timmy.memory.vector_store import prune_memories
+
            pruned = prune_memories(
                older_than_days=settings.memory_prune_days,
                keep_facts=settings.memory_prune_keep_facts,
            )
            if pruned:
-                logger.info("Memory auto-prune: removed %d entries older than %d days", pruned, settings.memory_prune_days)
+                logger.info(
+                    "Memory auto-prune: removed %d entries older than %d days",
+                    pruned,
+                    settings.memory_prune_days,
+                )
        except Exception as exc:
            logger.debug("Memory auto-prune skipped: %s", exc)

@@ -229,7 +237,8 @@ async def lifespan(app: FastAPI):
                if total_mb > settings.memory_vault_max_mb:
                    logger.warning(
                        "Memory vault (%.1f MB) exceeds limit (%d MB) — consider archiving old notes",
-                        total_mb, settings.memory_vault_max_mb,
+                        total_mb,
+                        settings.memory_vault_max_mb,
                    )
        except Exception as exc:
            logger.debug("Vault size check skipped: %s", exc)
@@ -284,10 +293,7 @@ def _get_cors_origins() -> list[str]:
 app.add_middleware(RequestLoggingMiddleware, skip_paths=["/health"])

 # 2. Security Headers
-app.add_middleware(
-    SecurityHeadersMiddleware, 
-    production=not settings.debug
-)
+app.add_middleware(SecurityHeadersMiddleware, production=not settings.debug)

 # 3. CSRF Protection
 app.add_middleware(CSRFMiddleware)
@@ -314,7 +320,6 @@ if static_dir.exists():
 # Shared templates instance
 from dashboard.templating import templates  # noqa: E402

-
 # Include routers
 app.include_router(health_router)
 app.include_router(agents_router)
@@ -339,6 +344,7 @@ app.include_router(tasks_router)
 app.include_router(work_orders_router)
 app.include_router(system_router)
 app.include_router(paperclip_router)
+app.include_router(experiments_router)
 app.include_router(cascade_router)


--- a/src/dashboard/middleware/init.py
+++ b/src/dashboard/middleware/init.py
@@ -1,8 +1,8 @@
 """Dashboard middleware package."""

 from .csrf import CSRFMiddleware, csrf_exempt, generate_csrf_token, validate_csrf_token
-from .security_headers import SecurityHeadersMiddleware
 from .request_logging import RequestLoggingMiddleware
+from .security_headers import SecurityHeadersMiddleware

 __all__ = [
    "CSRFMiddleware",
--- a/src/dashboard/middleware/csrf.py
+++ b/src/dashboard/middleware/csrf.py
@@ -4,16 +4,15 @@ Provides CSRF token generation, validation, and middleware integration
 to protect state-changing endpoints from cross-site request attacks.
 """

-import secrets
-import hmac
 import hashlib
-from typing import Callable, Optional
+import hmac
+import secrets
 from functools import wraps
+from typing import Callable, Optional

 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.requests import Request
-from starlette.responses import Response, JSONResponse
-
+from starlette.responses import JSONResponse, Response

 # Module-level set to track exempt routes
 _exempt_routes: set[str] = set()
@@ -28,6 +27,7 @@ def csrf_exempt(endpoint: Callable) -> Callable:
        def webhook_endpoint():
            ...
    """
+
    @wraps(endpoint)
    async def async_wrapper(*args, **kwargs):
        return await endpoint(*args, **kwargs)
@@ -40,7 +40,7 @@ def csrf_exempt(endpoint: Callable) -> Callable:
    endpoint._csrf_exempt = True  # type: ignore

    # Also mark the wrapper
-    if hasattr(endpoint, '__code__') and endpoint.__code__.co_flags & 0x80:
+    if hasattr(endpoint, "__code__") and endpoint.__code__.co_flags & 0x80:
        async_wrapper._csrf_exempt = True  # type: ignore
        return async_wrapper
    else:
@@ -50,7 +50,7 @@ def csrf_exempt(endpoint: Callable) -> Callable:

 def is_csrf_exempt(endpoint: Callable) -> bool:
    """Check if an endpoint is marked as CSRF exempt."""
-    return getattr(endpoint, '_csrf_exempt', False)
+    return getattr(endpoint, "_csrf_exempt", False)


 def generate_csrf_token() -> str:
@@ -109,7 +109,7 @@ class CSRFMiddleware(BaseHTTPMiddleware):
        secret: Optional[str] = None,
        cookie_name: str = "csrf_token",
        header_name: str = "X-CSRF-Token",
-        form_field: str = "csrf_token"
+        form_field: str = "csrf_token",
    ):
        super().__init__(app)
        self.secret = secret
@@ -125,6 +125,7 @@ class CSRFMiddleware(BaseHTTPMiddleware):
        """
        # Bypass CSRF if explicitly disabled (e.g. in tests)
        from config import settings
+
        if settings.timmy_disable_csrf:
            return await call_next(request)

@@ -144,7 +145,7 @@ class CSRFMiddleware(BaseHTTPMiddleware):
                    httponly=False,  # Must be readable by JavaScript
                    secure=settings.csrf_cookie_secure,
                    samesite="Lax",
-                    max_age=86400    # 24 hours
+                    max_age=86400,  # 24 hours
                )

            return response
@@ -164,8 +165,8 @@ class CSRFMiddleware(BaseHTTPMiddleware):
                    content={
                        "error": "CSRF validation failed",
                        "code": "CSRF_INVALID",
-                        "message": "Missing or invalid CSRF token. Include the token from the csrf_token cookie in the X-CSRF-Token header or as a form field."
-                    }
+                        "message": "Missing or invalid CSRF token. Include the token from the csrf_token cookie in the X-CSRF-Token header or as a form field.",
+                    },
                )

        return await call_next(request)
@@ -185,6 +186,7 @@ class CSRFMiddleware(BaseHTTPMiddleware):
        # 1. Normalize path to prevent /webhook/../ bypasses
        # Use posixpath for consistent behavior on all platforms
        import posixpath
+
        normalized_path = posixpath.normpath(path)

        # Ensure it starts with / for comparison
@@ -200,13 +202,13 @@ class CSRFMiddleware(BaseHTTPMiddleware):
        # Patterns ending with / are prefix-matched
        # Patterns NOT ending with / are exact-matched
        exempt_patterns = [
-            "/webhook/",           # Prefix match (e.g., /webhook/stripe)
-            "/webhook",            # Exact match
-            "/api/v1/",            # Prefix match
-            "/lightning/webhook/", # Prefix match
+            "/webhook/",  # Prefix match (e.g., /webhook/stripe)
+            "/webhook",  # Exact match
+            "/api/v1/",  # Prefix match
+            "/lightning/webhook/",  # Prefix match
            "/lightning/webhook",  # Exact match
-            "/_internal/",         # Prefix match
-            "/_internal",          # Exact match
+            "/_internal/",  # Prefix match
+            "/_internal",  # Exact match
        ]

        for pattern in exempt_patterns:
@@ -245,7 +247,10 @@ class CSRFMiddleware(BaseHTTPMiddleware):
        # If no header token, try form data (for non-JSON POSTs)
        # Check Content-Type to avoid hanging on non-form requests
        content_type = request.headers.get("Content-Type", "")
-        if "application/x-www-form-urlencoded" in content_type or "multipart/form-data" in content_type:
+        if (
+            "application/x-www-form-urlencoded" in content_type
+            or "multipart/form-data" in content_type
+        ):
            try:
                form_data = await request.form()
                form_token = form_data.get(self.form_field)
--- a/src/dashboard/middleware/request_logging.py
+++ b/src/dashboard/middleware/request_logging.py
@@ -4,16 +4,15 @@ Logs HTTP requests with timing, status codes, and client information
 for monitoring and debugging purposes.
 """

+import logging
 import time
 import uuid
-import logging
-from typing import Optional, List
+from typing import List, Optional

 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.requests import Request
 from starlette.responses import Response

-
 logger = logging.getLogger("timmy.requests")


@@ -39,12 +38,7 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
        log_level: Logging level for successful requests.
    """

-    def __init__(
-        self,
-        app,
-        skip_paths: Optional[List[str]] = None,
-        log_level: int = logging.INFO
-    ):
+    def __init__(self, app, skip_paths: Optional[List[str]] = None, log_level: int = logging.INFO):
        super().__init__(app)
        self.skip_paths = set(skip_paths or [])
        self.log_level = log_level
@@ -89,7 +83,7 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
                duration_ms=duration_ms,
                client_ip=client_ip,
                user_agent=user_agent,
-                correlation_id=correlation_id
+                correlation_id=correlation_id,
            )

            # Add correlation ID to response headers
@@ -110,6 +104,7 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
            # Auto-escalate: create bug report task from unhandled exception
            try:
                from infrastructure.error_capture import capture_error
+
                capture_error(
                    exc,
                    source="http",
@@ -163,7 +158,7 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
        duration_ms: float,
        client_ip: str,
        user_agent: str,
-        correlation_id: str
+        correlation_id: str,
    ) -> None:
        """Format and log the request details.

--- a/src/dashboard/middleware/security_headers.py
+++ b/src/dashboard/middleware/security_headers.py
@@ -4,6 +4,8 @@ Adds common security headers to all HTTP responses to improve
 application security posture against various attacks.
 """

+from typing import Optional
+
 from starlette.middleware.base import BaseHTTPMiddleware
 from starlette.requests import Request
 from starlette.responses import Response
@@ -37,7 +39,7 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
        app,
        production: bool = False,
        csp_report_only: bool = False,
-        custom_csp: str = None
+        custom_csp: Optional[str] = None,
    ):
        super().__init__(app)
        self.production = production
@@ -105,14 +107,18 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
        )

        # Content Security Policy
-        csp_header = "Content-Security-Policy-Report-Only" if self.csp_report_only else "Content-Security-Policy"
+        csp_header = (
+            "Content-Security-Policy-Report-Only"
+            if self.csp_report_only
+            else "Content-Security-Policy"
+        )
        response.headers[csp_header] = self.csp_directive

        # HTTPS enforcement (production only)
        if self.production:
-            response.headers["Strict-Transport-Security"] = (
-                "max-age=31536000; includeSubDomains; preload"
-            )
+            response.headers[
+                "Strict-Transport-Security"
+            ] = "max-age=31536000; includeSubDomains; preload"

    async def dispatch(self, request: Request, call_next) -> Response:
        """Add security headers to the response.
@@ -126,15 +132,14 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
        """
        try:
            response = await call_next(request)
-            self._add_security_headers(response)
-            return response
        except Exception:
-            # Create a response for the error with security headers
-            from starlette.responses import PlainTextResponse
-            response = PlainTextResponse(
-                content="Internal Server Error",
-                status_code=500
+            import logging
+
+            logging.getLogger(__name__).debug(
+                "Upstream error in security headers middleware", exc_info=True
            )
-            self._add_security_headers(response)
-            # Return the error response with headers (don't re-raise)
-            return response
+            from starlette.responses import PlainTextResponse
+
+            response = PlainTextResponse("Internal Server Error", status_code=500)
+        self._add_security_headers(response)
+        return response
--- a/src/dashboard/models/calm.py
+++ b/src/dashboard/models/calm.py
@@ -1,24 +1,27 @@
-
-from datetime import datetime, date
+from datetime import date, datetime
 from enum import Enum as PyEnum
-from sqlalchemy import (
-    Column, Integer, String, DateTime, Boolean, Enum as SQLEnum,
-    Date, ForeignKey, Index, JSON
-)
+
+from sqlalchemy import JSON, Boolean, Column, Date, DateTime
+from sqlalchemy import Enum as SQLEnum
+from sqlalchemy import ForeignKey, Index, Integer, String
 from sqlalchemy.orm import relationship
+
 from .database import Base  # Assuming a shared Base in models/database.py

+
 class TaskState(str, PyEnum):
    LATER = "LATER"
    NEXT = "NEXT"
    NOW = "NOW"
    DONE = "DONE"
-    DEFERRED = "DEFERRED" # Task pushed to tomorrow
+    DEFERRED = "DEFERRED"  # Task pushed to tomorrow
+

 class TaskCertainty(str, PyEnum):
-    FUZZY = "FUZZY" # An intention without a time
-    SOFT = "SOFT"   # A flexible task with a time
-    HARD = "HARD"   # A fixed meeting/appointment
+    FUZZY = "FUZZY"  # An intention without a time
+    SOFT = "SOFT"  # A flexible task with a time
+    HARD = "HARD"  # A fixed meeting/appointment
+

 class Task(Base):
    __tablename__ = "tasks"
@@ -29,7 +32,7 @@ class Task(Base):

    state = Column(SQLEnum(TaskState), default=TaskState.LATER, nullable=False, index=True)
    certainty = Column(SQLEnum(TaskCertainty), default=TaskCertainty.SOFT, nullable=False)
-    is_mit = Column(Boolean, default=False, nullable=False) # 1-3 per day
+    is_mit = Column(Boolean, default=False, nullable=False)  # 1-3 per day

    sort_order = Column(Integer, default=0, nullable=False)

@@ -42,7 +45,8 @@ class Task(Base):
    created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)

-    __table_args__ = (Index('ix_task_state_order', 'state', 'sort_order'),)
+    __table_args__ = (Index("ix_task_state_order", "state", "sort_order"),)
+

 class JournalEntry(Base):
    __tablename__ = "journal_entries"
--- a/src/dashboard/models/database.py
+++ b/src/dashboard/models/database.py
@@ -1,17 +1,16 @@
 from sqlalchemy import create_engine
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import sessionmaker, Session
+from sqlalchemy.orm import Session, sessionmaker

 SQLALCHEMY_DATABASE_URL = "sqlite:///./data/timmy_calm.db"

-engine = create_engine(
-    SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
-)
+engine = create_engine(SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False})

 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

 Base = declarative_base()

+
 def create_tables():
    """Create all tables defined by models that have imported Base."""
    Base.metadata.create_all(bind=engine)
--- a/src/dashboard/routes/agents.py
+++ b/src/dashboard/routes/agents.py
@@ -5,9 +5,9 @@ from datetime import datetime
 from fastapi import APIRouter, Form, Request
 from fastapi.responses import HTMLResponse

-from timmy.session import chat as agent_chat
 from dashboard.store import message_log
 from dashboard.templating import templates
+from timmy.session import chat as agent_chat

 logger = logging.getLogger(__name__)

@@ -38,9 +38,7 @@ async def list_agents():
@router.get("/default/panel", response_class=HTMLResponse)
 async def agent_panel(request: Request):
    """Chat panel — for HTMX main-panel swaps."""
-    return templates.TemplateResponse(
-        request, "partials/agent_panel_chat.html", {"agent": None}
-    )
+    return templates.TemplateResponse(request, "partials/agent_panel_chat.html", {"agent": None})


@router.get("/default/history", response_class=HTMLResponse)
@@ -77,7 +75,9 @@ async def chat_agent(request: Request, message: str = Form(...)):

    message_log.append(role="user", content=message, timestamp=timestamp, source="browser")
    if response_text is not None:
-        message_log.append(role="agent", content=response_text, timestamp=timestamp, source="browser")
+        message_log.append(
+            role="agent", content=response_text, timestamp=timestamp, source="browser"
+        )
    elif error_text:
        message_log.append(role="error", content=error_text, timestamp=timestamp, source="browser")

--- a/src/dashboard/routes/briefing.py
+++ b/src/dashboard/routes/briefing.py
@@ -12,9 +12,10 @@ from datetime import datetime, timezone
 from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse, JSONResponse

-from timmy.briefing import Briefing, engine as briefing_engine
-from timmy import approvals as approval_store
 from dashboard.templating import templates
+from timmy import approvals as approval_store
+from timmy.briefing import Briefing
+from timmy.briefing import engine as briefing_engine

 logger = logging.getLogger(__name__)

--- a/src/dashboard/routes/calm.py
+++ b/src/dashboard/routes/calm.py
@@ -1,4 +1,3 @@
-
 import logging
 from datetime import date, datetime
 from typing import List, Optional
@@ -8,7 +7,7 @@ from fastapi.responses import HTMLResponse
 from sqlalchemy.orm import Session

 from dashboard.models.calm import JournalEntry, Task, TaskCertainty, TaskState
-from dashboard.models.database import SessionLocal, engine, get_db, create_tables
+from dashboard.models.database import SessionLocal, create_tables, engine, get_db
 from dashboard.templating import templates

 # Ensure CALM tables exist (safe to call multiple times)
@@ -23,11 +22,19 @@ router = APIRouter(tags=["calm"])
 def get_now_task(db: Session) -> Optional[Task]:
    return db.query(Task).filter(Task.state == TaskState.NOW).first()

+
 def get_next_task(db: Session) -> Optional[Task]:
    return db.query(Task).filter(Task.state == TaskState.NEXT).first()

+
 def get_later_tasks(db: Session) -> List[Task]:
-    return db.query(Task).filter(Task.state == TaskState.LATER).order_by(Task.is_mit.desc(), Task.sort_order).all()
+    return (
+        db.query(Task)
+        .filter(Task.state == TaskState.LATER)
+        .order_by(Task.is_mit.desc(), Task.sort_order)
+        .all()
+    )
+

 def promote_tasks(db: Session):
    # Ensure only one NOW task exists. If multiple, demote extras to NEXT.
@@ -38,7 +45,7 @@ def promote_tasks(db: Session):
        for task_to_demote in now_tasks[1:]:
            task_to_demote.state = TaskState.NEXT
            db.add(task_to_demote)
-        db.flush() # Make changes visible
+        db.flush()  # Make changes visible

    # If no NOW task, promote NEXT to NOW
    current_now = db.query(Task).filter(Task.state == TaskState.NOW).first()
@@ -47,12 +54,17 @@ def promote_tasks(db: Session):
        if next_task:
            next_task.state = TaskState.NOW
            db.add(next_task)
-            db.flush() # Make changes visible
+            db.flush()  # Make changes visible

    # If no NEXT task, promote highest priority LATER to NEXT
    current_next = db.query(Task).filter(Task.state == TaskState.NEXT).first()
    if not current_next:
-        later_tasks = db.query(Task).filter(Task.state == TaskState.LATER).order_by(Task.is_mit.desc(), Task.sort_order).all()
+        later_tasks = (
+            db.query(Task)
+            .filter(Task.state == TaskState.LATER)
+            .order_by(Task.is_mit.desc(), Task.sort_order)
+            .all()
+        )
        if later_tasks:
            later_tasks[0].state = TaskState.NEXT
            db.add(later_tasks[0])
@@ -60,14 +72,17 @@ def promote_tasks(db: Session):
    db.commit()


-
 # Endpoints
@router.get("/calm", response_class=HTMLResponse)
 async def get_calm_view(request: Request, db: Session = Depends(get_db)):
    now_task = get_now_task(db)
    next_task = get_next_task(db)
    later_tasks_count = len(get_later_tasks(db))
-    return templates.TemplateResponse(request, "calm/calm_view.html", {"now_task": now_task,
+    return templates.TemplateResponse(
+        request,
+        "calm/calm_view.html",
+        {
+            "now_task": now_task,
            "next_task": next_task,
            "later_tasks_count": later_tasks_count,
        },
@@ -101,7 +116,7 @@ async def post_morning_ritual(
            task = Task(
                title=mit_title,
                is_mit=True,
-                state=TaskState.LATER, # Initially LATER, will be promoted
+                state=TaskState.LATER,  # Initially LATER, will be promoted
                certainty=TaskCertainty.SOFT,
            )
            db.add(task)
@@ -113,7 +128,7 @@ async def post_morning_ritual(
    db.add(journal_entry)

    # Create other tasks
-    for task_title in other_tasks.split('\n'):
+    for task_title in other_tasks.split("\n"):
        task_title = task_title.strip()
        if task_title:
            task = Task(
@@ -128,20 +143,29 @@ async def post_morning_ritual(
    # Set initial NOW/NEXT states
    # Set initial NOW/NEXT states after all tasks are created
    if not get_now_task(db) and not get_next_task(db):
-        later_tasks = db.query(Task).filter(Task.state == TaskState.LATER).order_by(Task.is_mit.desc(), Task.sort_order).all()
+        later_tasks = (
+            db.query(Task)
+            .filter(Task.state == TaskState.LATER)
+            .order_by(Task.is_mit.desc(), Task.sort_order)
+            .all()
+        )
        if later_tasks:
            # Set the highest priority LATER task to NOW
            later_tasks[0].state = TaskState.NOW
            db.add(later_tasks[0])
-            db.flush() # Flush to make the change visible for the next query
+            db.flush()  # Flush to make the change visible for the next query

            # Set the next highest priority LATER task to NEXT
            if len(later_tasks) > 1:
                later_tasks[1].state = TaskState.NEXT
                db.add(later_tasks[1])
-    db.commit() # Commit changes after initial NOW/NEXT setup
+    db.commit()  # Commit changes after initial NOW/NEXT setup

-    return templates.TemplateResponse(request, "calm/calm_view.html", {"now_task": get_now_task(db),
+    return templates.TemplateResponse(
+        request,
+        "calm/calm_view.html",
+        {
+            "now_task": get_now_task(db),
            "next_task": get_next_task(db),
            "later_tasks_count": len(get_later_tasks(db)),
        },
@@ -154,7 +178,8 @@ async def get_evening_ritual_form(request: Request, db: Session = Depends(get_db
    if not journal_entry:
        raise HTTPException(status_code=404, detail="No journal entry for today")
    return templates.TemplateResponse(
-        "calm/evening_ritual_form.html", {"request": request, "journal_entry": journal_entry})
+        "calm/evening_ritual_form.html", {"request": request, "journal_entry": journal_entry}
+    )


@router.post("/calm/ritual/evening", response_class=HTMLResponse)
@@ -175,9 +200,13 @@ async def post_evening_ritual(
    db.add(journal_entry)

    # Archive any remaining active tasks
-    active_tasks = db.query(Task).filter(Task.state.in_([TaskState.NOW, TaskState.NEXT, TaskState.LATER])).all()
+    active_tasks = (
+        db.query(Task)
+        .filter(Task.state.in_([TaskState.NOW, TaskState.NEXT, TaskState.LATER]))
+        .all()
+    )
    for task in active_tasks:
-        task.state = TaskState.DEFERRED # Or DONE, depending on desired archiving logic
+        task.state = TaskState.DEFERRED  # Or DONE, depending on desired archiving logic
        task.deferred_at = datetime.utcnow()
        db.add(task)

@@ -221,7 +250,7 @@ async def start_task(
 ):
    current_now_task = get_now_task(db)
    if current_now_task and current_now_task.id != task_id:
-        current_now_task.state = TaskState.NEXT # Demote current NOW to NEXT
+        current_now_task.state = TaskState.NEXT  # Demote current NOW to NEXT
        db.add(current_now_task)

    task = db.query(Task).filter(Task.id == task_id).first()
@@ -322,7 +351,7 @@ async def reorder_tasks(
 ):
    # Reorder LATER tasks
    if later_task_ids:
-        ids_in_order = [int(x.strip()) for x in later_task_ids.split(',') if x.strip()]
+        ids_in_order = [int(x.strip()) for x in later_task_ids.split(",") if x.strip()]
        for index, task_id in enumerate(ids_in_order):
            task = db.query(Task).filter(Task.id == task_id).first()
            if task and task.state == TaskState.LATER:
@@ -332,16 +361,18 @@ async def reorder_tasks(
    # Handle NEXT task if it's part of the reorder (e.g., moved from LATER to NEXT explicitly)
    if next_task_id:
        task = db.query(Task).filter(Task.id == next_task_id).first()
-        if task and task.state == TaskState.LATER: # Only if it was a LATER task being promoted manually
+        if (
+            task and task.state == TaskState.LATER
+        ):  # Only if it was a LATER task being promoted manually
            # Demote current NEXT to LATER
            current_next = get_next_task(db)
            if current_next:
                current_next.state = TaskState.LATER
-                current_next.sort_order = len(get_later_tasks(db)) # Add to end of later
+                current_next.sort_order = len(get_later_tasks(db))  # Add to end of later
                db.add(current_next)

            task.state = TaskState.NEXT
-            task.sort_order = 0 # NEXT tasks don't really need sort_order, but for consistency
+            task.sort_order = 0  # NEXT tasks don't really need sort_order, but for consistency
            db.add(task)

    db.commit()
--- a/src/dashboard/routes/chat_api.py
+++ b/src/dashboard/routes/chat_api.py
@@ -27,12 +27,13 @@ logger = logging.getLogger(__name__)

 router = APIRouter(prefix="/api", tags=["chat-api"])

-_UPLOAD_DIR = os.path.join("data", "chat-uploads")
+_UPLOAD_DIR = str(Path(settings.repo_root) / "data" / "chat-uploads")
 _MAX_UPLOAD_SIZE = 50 * 1024 * 1024  # 50 MB


 # ── POST /api/chat ────────────────────────────────────────────────────────────

+
@router.post("/chat")
 async def api_chat(request: Request):
    """Accept a JSON chat payload and return the agent's reply.
@@ -65,7 +66,8 @@ async def api_chat(request: Request):
            # Handle multimodal content arrays — extract text parts
            if isinstance(content, list):
                text_parts = [
-                    p.get("text", "") for p in content
+                    p.get("text", "")
+                    for p in content
                    if isinstance(p, dict) and p.get("type") == "text"
                ]
                last_user_msg = " ".join(text_parts).strip()
@@ -109,6 +111,7 @@ async def api_chat(request: Request):

 # ── POST /api/upload ──────────────────────────────────────────────────────────

+
@router.post("/upload")
 async def api_upload(file: UploadFile = File(...)):
    """Accept a file upload and return its URL.
@@ -147,6 +150,7 @@ async def api_upload(file: UploadFile = File(...)):

 # ── GET /api/chat/history ────────────────────────────────────────────────────

+
@router.get("/chat/history")
 async def api_chat_history():
    """Return the in-memory chat history as JSON."""
@@ -165,6 +169,7 @@ async def api_chat_history():

 # ── DELETE /api/chat/history ──────────────────────────────────────────────────

+
@router.delete("/chat/history")
 async def api_clear_history():
    """Clear the in-memory chat history."""
--- a/src/dashboard/routes/discord.py
+++ b/src/dashboard/routes/discord.py
@@ -7,9 +7,10 @@ Endpoints:
    GET  /discord/oauth-url  — get the bot's OAuth2 authorization URL
 """

+from typing import Optional
+
 from fastapi import APIRouter, File, Form, UploadFile
 from pydantic import BaseModel
-from typing import Optional

 router = APIRouter(prefix="/discord", tags=["discord"])

--- a/src/dashboard/routes/experiments.py
+++ b/src/dashboard/routes/experiments.py
@@ -0,0 +1,77 @@
+"""Experiment dashboard routes — autoresearch experiment monitoring.
+
+Provides endpoints for viewing, starting, and monitoring autonomous
+ML experiment loops powered by Karpathy's autoresearch pattern.
+"""
+
+import logging
+from pathlib import Path
+
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import HTMLResponse, JSONResponse
+
+from config import settings
+from dashboard.templating import templates
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/experiments", tags=["experiments"])
+
+
+def _workspace() -> Path:
+    return Path(settings.repo_root) / settings.autoresearch_workspace
+
+
+@router.get("", response_class=HTMLResponse)
+async def experiments_page(request: Request):
+    """Experiment dashboard — lists past runs and allows starting new ones."""
+    from timmy.autoresearch import get_experiment_history
+
+    history = []
+    try:
+        history = get_experiment_history(_workspace())
+    except Exception:
+        logger.debug("Failed to load experiment history", exc_info=True)
+
+    return templates.TemplateResponse(
+        request,
+        "experiments.html",
+        {
+            "page_title": "Experiments — Autoresearch",
+            "enabled": settings.autoresearch_enabled,
+            "history": history[:50],
+            "metric_name": settings.autoresearch_metric,
+            "time_budget": settings.autoresearch_time_budget,
+            "max_iterations": settings.autoresearch_max_iterations,
+        },
+    )
+
+
+@router.post("/start", response_class=JSONResponse)
+async def start_experiment(request: Request):
+    """Kick off an experiment loop in the background."""
+    if not settings.autoresearch_enabled:
+        raise HTTPException(
+            status_code=403,
+            detail="Autoresearch is disabled. Set AUTORESEARCH_ENABLED=true.",
+        )
+
+    from timmy.autoresearch import prepare_experiment
+
+    workspace = _workspace()
+    status = prepare_experiment(workspace)
+
+    return {"status": "started", "workspace": str(workspace), "prepare": status}
+
+
+@router.get("/{run_id}", response_class=JSONResponse)
+async def experiment_detail(run_id: str):
+    """Get details for a specific experiment run."""
+    from timmy.autoresearch import get_experiment_history
+
+    history = get_experiment_history(_workspace())
+    for entry in history:
+        if entry.get("run_id") == run_id:
+            return entry
+
+    raise HTTPException(status_code=404, detail=f"Run {run_id} not found")
--- a/src/dashboard/routes/grok.py
+++ b/src/dashboard/routes/grok.py
@@ -43,6 +43,7 @@ async def grok_status(request: Request):
    stats = None
    try:
        from timmy.backends import get_grok_backend
+
        backend = get_grok_backend()
        stats = {
            "total_requests": backend.stats.total_requests,
@@ -52,12 +53,16 @@ async def grok_status(request: Request):
            "errors": backend.stats.errors,
        }
    except Exception:
-        pass
+        logger.debug("Failed to load Grok stats", exc_info=True)

-    return templates.TemplateResponse(request, "grok_status.html", {
-        "status": status,
-        "stats": stats,
-    })
+    return templates.TemplateResponse(
+        request,
+        "grok_status.html",
+        {
+            "status": status,
+            "stats": stats,
+        },
+    )


@router.post("/toggle")
@@ -90,7 +95,7 @@ async def toggle_grok_mode(request: Request):
            success=True,
        )
    except Exception:
-        pass
+        logger.debug("Failed to log Grok toggle to Spark", exc_info=True)

    return HTMLResponse(
        _render_toggle_card(_grok_mode_active),
@@ -104,10 +109,13 @@ def _run_grok_query(message: str) -> dict:
    Returns:
        {"response": str | None, "error": str | None}
    """
-    from timmy.backends import grok_available, get_grok_backend
+    from timmy.backends import get_grok_backend, grok_available

    if not grok_available():
-        return {"response": None, "error": "Grok is not available. Set GROK_ENABLED=true and XAI_API_KEY."}
+        return {
+            "response": None,
+            "error": "Grok is not available. Set GROK_ENABLED=true and XAI_API_KEY.",
+        }

    backend = get_grok_backend()

@@ -115,12 +123,13 @@ def _run_grok_query(message: str) -> dict:
    if not settings.grok_free:
        try:
            from lightning.factory import get_backend as get_ln_backend
+
            ln = get_ln_backend()
            sats = min(settings.grok_max_sats_per_query, 100)
            ln.create_invoice(sats, f"Grok: {message[:50]}")
            invoice_note = f" | {sats} sats"
        except Exception:
-            pass
+            logger.debug("Lightning invoice creation failed", exc_info=True)

    try:
        result = backend.run(message)
@@ -132,9 +141,10 @@ def _run_grok_query(message: str) -> dict:
@router.post("/chat", response_class=HTMLResponse)
 async def grok_chat(request: Request, message: str = Form(...)):
    """Send a message directly to Grok and return HTMX chat partial."""
-    from dashboard.store import message_log
    from datetime import datetime

+    from dashboard.store import message_log
+
    timestamp = datetime.now().strftime("%H:%M:%S")
    result = _run_grok_query(message)

@@ -142,9 +152,13 @@ async def grok_chat(request: Request, message: str = Form(...)):
    message_log.append(role="user", content=user_msg, timestamp=timestamp, source="browser")

    if result["response"]:
-        message_log.append(role="agent", content=result["response"], timestamp=timestamp, source="browser")
+        message_log.append(
+            role="agent", content=result["response"], timestamp=timestamp, source="browser"
+        )
    else:
-        message_log.append(role="error", content=result["error"], timestamp=timestamp, source="browser")
+        message_log.append(
+            role="error", content=result["error"], timestamp=timestamp, source="browser"
+        )

    return templates.TemplateResponse(
        request,
@@ -185,6 +199,7 @@ async def grok_stats():
 def _render_toggle_card(active: bool) -> str:
    """Render the Grok Mode toggle card HTML."""
    import html
+
    color = "#00ff88" if active else "#666"
    state = "ACTIVE" if active else "STANDBY"
    glow = "0 0 20px rgba(0, 255, 136, 0.4)" if active else "none"
--- a/src/dashboard/routes/health.py
+++ b/src/dashboard/routes/health.py
@@ -22,6 +22,7 @@ router = APIRouter(tags=["health"])

 class DependencyStatus(BaseModel):
    """Status of a single dependency."""
+
    name: str
    status: str  # "healthy", "degraded", "unavailable"
    sovereignty_score: int  # 0-10
@@ -30,6 +31,7 @@ class DependencyStatus(BaseModel):

 class SovereigntyReport(BaseModel):
    """Full sovereignty audit report."""
+
    overall_score: float
    dependencies: list[DependencyStatus]
    timestamp: str
@@ -38,6 +40,7 @@ class SovereigntyReport(BaseModel):

 class HealthStatus(BaseModel):
    """System health status."""
+
    status: str
    timestamp: str
    version: str
@@ -52,6 +55,7 @@ def _check_ollama_sync() -> DependencyStatus:
    """Synchronous Ollama check — run via asyncio.to_thread()."""
    try:
        import urllib.request
+
        url = settings.ollama_url.replace("localhost", "127.0.0.1")
        req = urllib.request.Request(
            f"{url}/api/tags",
@@ -67,7 +71,7 @@ def _check_ollama_sync() -> DependencyStatus:
                    details={"url": settings.ollama_url, "model": settings.ollama_model},
                )
    except Exception:
-        pass
+        logger.debug("Ollama health check failed", exc_info=True)

    return DependencyStatus(
        name="Ollama AI",
@@ -197,6 +201,7 @@ async def health_status_panel(request: Request):
    status_text = "UP" if ollama_ok else "DOWN"
    status_color = "#10b981" if ollama_ok else "#ef4444"
    import html
+
    model = html.escape(settings.ollama_model)  # Include model for test compatibility

    html_content = f"""
--- a/src/dashboard/routes/marketplace.py
+++ b/src/dashboard/routes/marketplace.py
@@ -19,8 +19,7 @@ AGENT_CATALOG = [
        "name": "Orchestrator",
        "role": "Local AI",
        "description": (
-            "Primary AI agent. Coordinates tasks, manages memory. "
-            "Uses distributed brain."
+            "Primary AI agent. Coordinates tasks, manages memory. " "Uses distributed brain."
        ),
        "capabilities": "chat,reasoning,coordination,memory",
        "rate_sats": 0,
@@ -82,7 +81,7 @@ async def marketplace_ui(request: Request):
            "page_title": "Agent Marketplace",
            "active_count": active,
            "planned_count": 0,
-        }
+        },
    )


--- a/src/dashboard/routes/memory.py
+++ b/src/dashboard/routes/memory.py
@@ -5,17 +5,17 @@ from typing import Optional
 from fastapi import APIRouter, Form, HTTPException, Request
 from fastapi.responses import HTMLResponse, JSONResponse

+from dashboard.templating import templates
 from timmy.memory.vector_store import (
-    store_memory,
-    search_memories,
+    delete_memory,
    get_memory_stats,
    recall_personal_facts,
    recall_personal_facts_with_ids,
+    search_memories,
+    store_memory,
    store_personal_fact,
    update_personal_fact,
-    delete_memory,
 )
-from dashboard.templating import templates

 router = APIRouter(prefix="/memory", tags=["memory"])

--- a/src/dashboard/routes/models.py
+++ b/src/dashboard/routes/models.py
@@ -13,6 +13,7 @@ from fastapi.responses import HTMLResponse
 from pydantic import BaseModel

 from config import settings
+from dashboard.templating import templates
 from infrastructure.models.registry import (
    CustomModel,
    ModelFormat,
@@ -20,7 +21,6 @@ from infrastructure.models.registry import (
    ModelRole,
    model_registry,
 )
-from dashboard.templating import templates

 logger = logging.getLogger(__name__)

@@ -33,6 +33,7 @@ api_router = APIRouter(prefix="/api/v1/models", tags=["models-api"])

 class RegisterModelRequest(BaseModel):
    """Request body for model registration."""
+
    name: str
    format: str  # gguf, safetensors, hf, ollama
    path: str
@@ -45,12 +46,14 @@ class RegisterModelRequest(BaseModel):

 class AssignModelRequest(BaseModel):
    """Request body for assigning a model to an agent."""
+
    agent_id: str
    model_name: str


 class SetActiveRequest(BaseModel):
    """Request body for enabling/disabling a model."""
+
    active: bool


@@ -92,15 +95,14 @@ async def register_model(request: RegisterModelRequest) -> dict[str, Any]:
        raise HTTPException(
            status_code=400,
            detail=f"Invalid format: {request.format}. "
-                   f"Choose from: {[f.value for f in ModelFormat]}",
+            f"Choose from: {[f.value for f in ModelFormat]}",
        )
    try:
        role = ModelRole(request.role)
    except ValueError:
        raise HTTPException(
            status_code=400,
-            detail=f"Invalid role: {request.role}. "
-                   f"Choose from: {[r.value for r in ModelRole]}",
+            detail=f"Invalid role: {request.role}. " f"Choose from: {[r.value for r in ModelRole]}",
        )

    # Validate path exists for non-Ollama formats
@@ -163,9 +165,7 @@ async def unregister_model(model_name: str) -> dict[str, str]:


@api_router.patch("/{model_name}/active")
-async def set_model_active(
-    model_name: str, request: SetActiveRequest
-) -> dict[str, str]:
+async def set_model_active(model_name: str, request: SetActiveRequest) -> dict[str, str]:
    """Enable or disable a model."""
    if not model_registry.set_active(model_name, request.active):
        raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
@@ -182,8 +182,7 @@ async def list_assignments() -> dict[str, Any]:
    assignments = model_registry.get_agent_assignments()
    return {
        "assignments": [
-            {"agent_id": aid, "model_name": mname}
-            for aid, mname in assignments.items()
+            {"agent_id": aid, "model_name": mname} for aid, mname in assignments.items()
        ],
        "total": len(assignments),
    }
--- a/src/dashboard/routes/router.py
+++ b/src/dashboard/routes/router.py
@@ -3,8 +3,8 @@
 from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse

-from timmy.cascade_adapter import get_cascade_adapter
 from dashboard.templating import templates
+from timmy.cascade_adapter import get_cascade_adapter

 router = APIRouter(prefix="/router", tags=["router"])

--- a/src/dashboard/routes/spark.py
+++ b/src/dashboard/routes/spark.py
@@ -13,8 +13,8 @@ import logging
 from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse

-from spark.engine import spark_engine
 from dashboard.templating import templates
+from spark.engine import spark_engine

 logger = logging.getLogger(__name__)

@@ -86,23 +86,26 @@ async def spark_ui(request: Request):
 async def spark_status_json():
    """Return Spark Intelligence status as JSON."""
    from fastapi.responses import JSONResponse
+
    status = spark_engine.status()
    advisories = spark_engine.get_advisories()
-    return JSONResponse({
-        "status": status,
-        "advisories": [
-            {
-                "category": a.category,
-                "priority": a.priority,
-                "title": a.title,
-                "detail": a.detail,
-                "suggested_action": a.suggested_action,
-                "subject": a.subject,
-                "evidence_count": a.evidence_count,
-            }
-            for a in advisories
-        ],
-    })
+    return JSONResponse(
+        {
+            "status": status,
+            "advisories": [
+                {
+                    "category": a.category,
+                    "priority": a.priority,
+                    "title": a.title,
+                    "detail": a.detail,
+                    "suggested_action": a.suggested_action,
+                    "subject": a.subject,
+                    "evidence_count": a.evidence_count,
+                }
+                for a in advisories
+            ],
+        }
+    )


@router.get("/timeline", response_class=HTMLResponse)
--- a/src/dashboard/routes/swarm.py
+++ b/src/dashboard/routes/swarm.py
@@ -7,9 +7,9 @@ from typing import Optional
 from fastapi import APIRouter, Request, WebSocket, WebSocketDisconnect
 from fastapi.responses import HTMLResponse

-from spark.engine import spark_engine
 from dashboard.templating import templates
 from infrastructure.ws_manager.handler import ws_manager
+from spark.engine import spark_engine

 logger = logging.getLogger(__name__)

@@ -78,14 +78,16 @@ async def swarm_ws(websocket: WebSocket):
    await ws_manager.connect(websocket)
    try:
        # Send initial state so frontend can clear loading placeholders
-        await websocket.send_json({
-            "type": "initial_state",
-            "data": {
-                "agents": {"total": 0, "active": 0, "list": []},
-                "tasks": {"active": 0},
-                "auctions": {"list": []},
-            },
-        })
+        await websocket.send_json(
+            {
+                "type": "initial_state",
+                "data": {
+                    "agents": {"total": 0, "active": 0, "list": []},
+                    "tasks": {"active": 0},
+                    "auctions": {"list": []},
+                },
+            }
+        )
        while True:
            await websocket.receive_text()
    except WebSocketDisconnect:
--- a/src/dashboard/routes/system.py
+++ b/src/dashboard/routes/system.py
@@ -38,11 +38,27 @@ async def lightning_ledger(request: Request):
        completed = "completed"
        pending = "pending"

-    Tx = namedtuple("Tx", ["tx_type", "status", "amount_sats", "payment_hash", "memo", "created_at"])
+    Tx = namedtuple(
+        "Tx", ["tx_type", "status", "amount_sats", "payment_hash", "memo", "created_at"]
+    )

    transactions = [
-        Tx(TxType.outgoing, TxStatus.completed, 50, "hash1", "Model inference", "2026-03-04 10:00:00"),
-        Tx(TxType.incoming, TxStatus.completed, 1000, "hash2", "Manual deposit", "2026-03-03 15:00:00"),
+        Tx(
+            TxType.outgoing,
+            TxStatus.completed,
+            50,
+            "hash1",
+            "Model inference",
+            "2026-03-04 10:00:00",
+        ),
+        Tx(
+            TxType.incoming,
+            TxStatus.completed,
+            1000,
+            "hash2",
+            "Manual deposit",
+            "2026-03-03 15:00:00",
+        ),
    ]

    return templates.TemplateResponse(
@@ -84,9 +100,16 @@ async def mission_control(request: Request):

@router.get("/bugs", response_class=HTMLResponse)
 async def bugs_page(request: Request):
-    return templates.TemplateResponse(request, "bugs.html", {
-        "bugs": [], "total": 0, "stats": {}, "filter_status": None,
-    })
+    return templates.TemplateResponse(
+        request,
+        "bugs.html",
+        {
+            "bugs": [],
+            "total": 0,
+            "stats": {},
+            "filter_status": None,
+        },
+    )


@router.get("/self-coding", response_class=HTMLResponse)
@@ -109,14 +132,17 @@ async def api_notifications():
    """Return recent system events for the notification dropdown."""
    try:
        from spark.engine import spark_engine
+
        events = spark_engine.get_timeline(limit=20)
-        return JSONResponse([
-            {
-                "event_type": e.event_type,
-                "title": getattr(e, "description", e.event_type),
-                "timestamp": str(getattr(e, "timestamp", "")),
-            }
-            for e in events
-        ])
+        return JSONResponse(
+            [
+                {
+                    "event_type": e.event_type,
+                    "title": getattr(e, "description", e.event_type),
+                    "timestamp": str(getattr(e, "timestamp", "")),
+                }
+                for e in events
+            ]
+        )
    except Exception:
        return JSONResponse([])
--- a/src/dashboard/routes/tasks.py
+++ b/src/dashboard/routes/tasks.py
@@ -7,9 +7,10 @@ from datetime import datetime
 from pathlib import Path
 from typing import Optional

-from fastapi import APIRouter, HTTPException, Request, Form
+from fastapi import APIRouter, Form, HTTPException, Request
 from fastapi.responses import HTMLResponse, JSONResponse

+from config import settings
 from dashboard.templating import templates

 logger = logging.getLogger(__name__)
@@ -20,11 +21,17 @@ router = APIRouter(tags=["tasks"])
 # Database helpers
 # ---------------------------------------------------------------------------

-DB_PATH = Path("data/tasks.db")
+DB_PATH = Path(settings.repo_root) / "data" / "tasks.db"

 VALID_STATUSES = {
-    "pending_approval", "approved", "running", "paused",
-    "completed", "vetoed", "failed", "backlogged",
+    "pending_approval",
+    "approved",
+    "running",
+    "paused",
+    "completed",
+    "vetoed",
+    "failed",
+    "backlogged",
 }
 VALID_PRIORITIES = {"low", "normal", "high", "urgent"}

@@ -33,7 +40,8 @@ def _get_db() -> sqlite3.Connection:
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(DB_PATH))
    conn.row_factory = sqlite3.Row
-    conn.execute("""
+    conn.execute(
+        """
        CREATE TABLE IF NOT EXISTS tasks (
            id TEXT PRIMARY KEY,
            title TEXT NOT NULL,
@@ -46,7 +54,8 @@ def _get_db() -> sqlite3.Connection:
            created_at TEXT DEFAULT (datetime('now')),
            completed_at TEXT
        )
-    """)
+    """
+    )
    conn.commit()
    return conn

@@ -91,37 +100,52 @@ class _TaskView:
 # Page routes
 # ---------------------------------------------------------------------------

+
@router.get("/tasks", response_class=HTMLResponse)
 async def tasks_page(request: Request):
    """Render the main task queue page with 3-column layout."""
    db = _get_db()
    try:
-        pending = [_TaskView(_row_to_dict(r)) for r in db.execute(
-            "SELECT * FROM tasks WHERE status IN ('pending_approval') ORDER BY created_at DESC"
-        ).fetchall()]
-        active = [_TaskView(_row_to_dict(r)) for r in db.execute(
-            "SELECT * FROM tasks WHERE status IN ('approved','running','paused') ORDER BY created_at DESC"
-        ).fetchall()]
-        completed = [_TaskView(_row_to_dict(r)) for r in db.execute(
-            "SELECT * FROM tasks WHERE status IN ('completed','vetoed','failed') ORDER BY completed_at DESC LIMIT 50"
-        ).fetchall()]
+        pending = [
+            _TaskView(_row_to_dict(r))
+            for r in db.execute(
+                "SELECT * FROM tasks WHERE status IN ('pending_approval') ORDER BY created_at DESC"
+            ).fetchall()
+        ]
+        active = [
+            _TaskView(_row_to_dict(r))
+            for r in db.execute(
+                "SELECT * FROM tasks WHERE status IN ('approved','running','paused') ORDER BY created_at DESC"
+            ).fetchall()
+        ]
+        completed = [
+            _TaskView(_row_to_dict(r))
+            for r in db.execute(
+                "SELECT * FROM tasks WHERE status IN ('completed','vetoed','failed') ORDER BY completed_at DESC LIMIT 50"
+            ).fetchall()
+        ]
    finally:
        db.close()

-    return templates.TemplateResponse(request, "tasks.html", {
-        "pending_count": len(pending),
-        "pending": pending,
-        "active": active,
-        "completed": completed,
-        "agents": [],  # no agent roster wired yet
-        "pre_assign": "",
-    })
+    return templates.TemplateResponse(
+        request,
+        "tasks.html",
+        {
+            "pending_count": len(pending),
+            "pending": pending,
+            "active": active,
+            "completed": completed,
+            "agents": [],  # no agent roster wired yet
+            "pre_assign": "",
+        },
+    )


 # ---------------------------------------------------------------------------
 # HTMX partials (polled by the template)
 # ---------------------------------------------------------------------------

+
@router.get("/tasks/pending", response_class=HTMLResponse)
 async def tasks_pending(request: Request):
    db = _get_db()
@@ -134,9 +158,11 @@ async def tasks_pending(request: Request):
    tasks = [_TaskView(_row_to_dict(r)) for r in rows]
    parts = []
    for task in tasks:
-        parts.append(templates.TemplateResponse(
-            request, "partials/task_card.html", {"task": task}
-        ).body.decode())
+        parts.append(
+            templates.TemplateResponse(
+                request, "partials/task_card.html", {"task": task}
+            ).body.decode()
+        )
    if not parts:
        return HTMLResponse('<div class="empty-column">No pending tasks</div>')
    return HTMLResponse("".join(parts))
@@ -154,9 +180,11 @@ async def tasks_active(request: Request):
    tasks = [_TaskView(_row_to_dict(r)) for r in rows]
    parts = []
    for task in tasks:
-        parts.append(templates.TemplateResponse(
-            request, "partials/task_card.html", {"task": task}
-        ).body.decode())
+        parts.append(
+            templates.TemplateResponse(
+                request, "partials/task_card.html", {"task": task}
+            ).body.decode()
+        )
    if not parts:
        return HTMLResponse('<div class="empty-column">No active tasks</div>')
    return HTMLResponse("".join(parts))
@@ -174,9 +202,11 @@ async def tasks_completed(request: Request):
    tasks = [_TaskView(_row_to_dict(r)) for r in rows]
    parts = []
    for task in tasks:
-        parts.append(templates.TemplateResponse(
-            request, "partials/task_card.html", {"task": task}
-        ).body.decode())
+        parts.append(
+            templates.TemplateResponse(
+                request, "partials/task_card.html", {"task": task}
+            ).body.decode()
+        )
    if not parts:
        return HTMLResponse('<div class="empty-column">No completed tasks yet</div>')
    return HTMLResponse("".join(parts))
@@ -186,6 +216,7 @@ async def tasks_completed(request: Request):
 # Form-based create (used by the modal in tasks.html)
 # ---------------------------------------------------------------------------

+
@router.post("/tasks/create", response_class=HTMLResponse)
 async def create_task_form(
    request: Request,
@@ -218,6 +249,7 @@ async def create_task_form(
 # Task action endpoints (approve, veto, modify, pause, cancel, retry)
 # ---------------------------------------------------------------------------

+
@router.post("/tasks/{task_id}/approve", response_class=HTMLResponse)
 async def approve_task(request: Request, task_id: str):
    return await _set_status(request, task_id, "approved")
@@ -268,7 +300,9 @@ async def modify_task(

 async def _set_status(request: Request, task_id: str, new_status: str):
    """Helper to update status and return refreshed task card."""
-    completed_at = datetime.utcnow().isoformat() if new_status in ("completed", "vetoed", "failed") else None
+    completed_at = (
+        datetime.utcnow().isoformat() if new_status in ("completed", "vetoed", "failed") else None
+    )
    db = _get_db()
    try:
        db.execute(
@@ -289,6 +323,7 @@ async def _set_status(request: Request, task_id: str, new_status: str):
 # JSON API (for programmatic access / Timmy's tool calls)
 # ---------------------------------------------------------------------------

+
@router.post("/api/tasks", response_class=JSONResponse, status_code=201)
 async def api_create_task(request: Request):
    """Create a task via JSON API."""
@@ -345,7 +380,9 @@ async def api_update_status(task_id: str, request: Request):
    if not new_status or new_status not in VALID_STATUSES:
        raise HTTPException(422, f"Invalid status. Must be one of: {VALID_STATUSES}")

-    completed_at = datetime.utcnow().isoformat() if new_status in ("completed", "vetoed", "failed") else None
+    completed_at = (
+        datetime.utcnow().isoformat() if new_status in ("completed", "vetoed", "failed") else None
+    )
    db = _get_db()
    try:
        db.execute(
@@ -379,6 +416,7 @@ async def api_delete_task(task_id: str):
 # Queue status (polled by the chat panel every 10 seconds)
 # ---------------------------------------------------------------------------

+
@router.get("/api/queue/status", response_class=JSONResponse)
 async def queue_status(assigned_to: str = "default"):
    """Return queue status for the chat panel's agent status indicator."""
@@ -396,14 +434,18 @@ async def queue_status(assigned_to: str = "default"):
        db.close()

    if running:
-        return JSONResponse({
-            "is_working": True,
-            "current_task": {"id": running["id"], "title": running["title"]},
-            "tasks_ahead": 0,
-        })
+        return JSONResponse(
+            {
+                "is_working": True,
+                "current_task": {"id": running["id"], "title": running["title"]},
+                "tasks_ahead": 0,
+            }
+        )

-    return JSONResponse({
-        "is_working": False,
-        "current_task": None,
-        "tasks_ahead": ahead["cnt"] if ahead else 0,
-    })
+    return JSONResponse(
+        {
+            "is_working": False,
+            "current_task": None,
+            "tasks_ahead": ahead["cnt"] if ahead else 0,
+        }
+    )
--- a/src/dashboard/routes/thinking.py
+++ b/src/dashboard/routes/thinking.py
@@ -10,8 +10,8 @@ import logging
 from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse, JSONResponse

-from timmy.thinking import thinking_engine
 from dashboard.templating import templates
+from timmy.thinking import thinking_engine

 logger = logging.getLogger(__name__)

--- a/src/dashboard/routes/tools.py
+++ b/src/dashboard/routes/tools.py
@@ -8,8 +8,8 @@ from collections import namedtuple
 from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse, JSONResponse

-from timmy.tools import get_all_available_tools
 from dashboard.templating import templates
+from timmy.tools import get_all_available_tools

 router = APIRouter(tags=["tools"])

@@ -29,9 +29,7 @@ def _build_agent_tools():
        for name, fn in available.items()
    ]

-    return [
-        _AgentView(name="Timmy", status="idle", tools=tool_views, stats=_Stats(total_calls=0))
-    ]
+    return [_AgentView(name="Timmy", status="idle", tools=tool_views, stats=_Stats(total_calls=0))]


@router.get("/tools", response_class=HTMLResponse)
--- a/src/dashboard/routes/voice.py
+++ b/src/dashboard/routes/voice.py
@@ -10,9 +10,9 @@ import logging
 from fastapi import APIRouter, Form, Request
 from fastapi.responses import HTMLResponse

+from dashboard.templating import templates
 from integrations.voice.nlu import detect_intent, extract_command
 from timmy.agent import create_timmy
-from dashboard.templating import templates

 logger = logging.getLogger(__name__)

@@ -38,6 +38,7 @@ async def tts_status():
    """Check TTS engine availability."""
    try:
        from timmy_serve.voice_tts import voice_tts
+
        return {
            "available": voice_tts.available,
            "voices": voice_tts.get_voices() if voice_tts.available else [],
@@ -51,6 +52,7 @@ async def tts_speak(text: str = Form(...)):
    """Speak text aloud via TTS."""
    try:
        from timmy_serve.voice_tts import voice_tts
+
        if not voice_tts.available:
            return {"spoken": False, "reason": "TTS engine not available"}
        voice_tts.speak(text)
@@ -86,6 +88,7 @@ async def voice_command(text: str = Form(...)):

 # ── Enhanced voice pipeline ──────────────────────────────────────────────

+
@router.post("/enhanced/process")
 async def process_voice_input(
    text: str = Form(...),
@@ -133,6 +136,7 @@ async def process_voice_input(
    if speak_response and response_text:
        try:
            from timmy_serve.voice_tts import voice_tts
+
            if voice_tts.available:
                voice_tts.speak(response_text)
        except Exception:
--- a/src/dashboard/routes/work_orders.py
+++ b/src/dashboard/routes/work_orders.py
@@ -6,7 +6,7 @@ import uuid
 from datetime import datetime
 from pathlib import Path

-from fastapi import APIRouter, HTTPException, Request, Form
+from fastapi import APIRouter, Form, HTTPException, Request
 from fastapi.responses import HTMLResponse, JSONResponse

 from dashboard.templating import templates
@@ -26,7 +26,8 @@ def _get_db() -> sqlite3.Connection:
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(DB_PATH))
    conn.row_factory = sqlite3.Row
-    conn.execute("""
+    conn.execute(
+        """
        CREATE TABLE IF NOT EXISTS work_orders (
            id TEXT PRIMARY KEY,
            title TEXT NOT NULL,
@@ -41,7 +42,8 @@ def _get_db() -> sqlite3.Connection:
            created_at TEXT DEFAULT (datetime('now')),
            completed_at TEXT
        )
-    """)
+    """
+    )
    conn.commit()
    return conn

@@ -71,7 +73,9 @@ class _WOView:
        self.submitter = row.get("submitter", "dashboard")
        self.status = _EnumLike(row.get("status", "submitted"))
        raw_files = row.get("related_files", "")
-        self.related_files = [f.strip() for f in raw_files.split(",") if f.strip()] if raw_files else []
+        self.related_files = (
+            [f.strip() for f in raw_files.split(",") if f.strip()] if raw_files else []
+        )
        self.result = row.get("result", "")
        self.rejection_reason = row.get("rejection_reason", "")
        self.created_at = row.get("created_at", "")
@@ -98,6 +102,7 @@ def _query_wos(db, statuses):
 # Page route
 # ---------------------------------------------------------------------------

+
@router.get("/work-orders/queue", response_class=HTMLResponse)
 async def work_orders_page(request: Request):
    db = _get_db()
@@ -109,21 +114,26 @@ async def work_orders_page(request: Request):
    finally:
        db.close()

-    return templates.TemplateResponse(request, "work_orders.html", {
-        "pending_count": len(pending),
-        "pending": pending,
-        "active": active,
-        "completed": completed,
-        "rejected": rejected,
-        "priorities": PRIORITIES,
-        "categories": CATEGORIES,
-    })
+    return templates.TemplateResponse(
+        request,
+        "work_orders.html",
+        {
+            "pending_count": len(pending),
+            "pending": pending,
+            "active": active,
+            "completed": completed,
+            "rejected": rejected,
+            "priorities": PRIORITIES,
+            "categories": CATEGORIES,
+        },
+    )


 # ---------------------------------------------------------------------------
 # Form submit
 # ---------------------------------------------------------------------------

+
@router.post("/work-orders/submit", response_class=HTMLResponse)
 async def submit_work_order(
    request: Request,
@@ -159,6 +169,7 @@ async def submit_work_order(
 # HTMX partials
 # ---------------------------------------------------------------------------

+
@router.get("/work-orders/queue/pending", response_class=HTMLResponse)
 async def pending_partial(request: Request):
    db = _get_db()
@@ -174,7 +185,9 @@ async def pending_partial(request: Request):
    parts = []
    for wo in wos:
        parts.append(
-            templates.TemplateResponse(request, "partials/work_order_card.html", {"wo": wo}).body.decode()
+            templates.TemplateResponse(
+                request, "partials/work_order_card.html", {"wo": wo}
+            ).body.decode()
        )
    return HTMLResponse("".join(parts))

@@ -194,7 +207,9 @@ async def active_partial(request: Request):
    parts = []
    for wo in wos:
        parts.append(
-            templates.TemplateResponse(request, "partials/work_order_card.html", {"wo": wo}).body.decode()
+            templates.TemplateResponse(
+                request, "partials/work_order_card.html", {"wo": wo}
+            ).body.decode()
        )
    return HTMLResponse("".join(parts))

@@ -203,8 +218,11 @@ async def active_partial(request: Request):
 # Action endpoints
 # ---------------------------------------------------------------------------

+
 async def _update_status(request: Request, wo_id: str, new_status: str, **extra):
-    completed_at = datetime.utcnow().isoformat() if new_status in ("completed", "rejected") else None
+    completed_at = (
+        datetime.utcnow().isoformat() if new_status in ("completed", "rejected") else None
+    )
    db = _get_db()
    try:
        sets = ["status=?", "completed_at=COALESCE(?, completed_at)"]
--- a/src/dashboard/store.py
+++ b/src/dashboard/store.py
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field

@dataclass
 class Message:
-    role: str       # "user" | "agent" | "error"
+    role: str  # "user" | "agent" | "error"
    content: str
    timestamp: str
    source: str = "browser"  # "browser" | "api" | "telegram" | "discord" | "system"
@@ -16,7 +16,9 @@ class MessageLog:
        self._entries: list[Message] = []

    def append(self, role: str, content: str, timestamp: str, source: str = "browser") -> None:
-        self._entries.append(Message(role=role, content=content, timestamp=timestamp, source=source))
+        self._entries.append(
+            Message(role=role, content=content, timestamp=timestamp, source=source)
+        )

    def all(self) -> list[Message]:
        return list(self._entries)
--- a/src/dashboard/templates/experiments.html
+++ b/src/dashboard/templates/experiments.html
@@ -0,0 +1,90 @@
+{% extends "base.html" %}
+
+{% block title %}{{ page_title }}{% endblock %}
+
+{% block extra_styles %}
+<style>
+  .experiments-container { max-width: 1000px; margin: 0 auto; }
+  .exp-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px; }
+  .exp-title { font-size: 1.3rem; font-weight: 700; color: var(--text-bright); }
+  .exp-subtitle { font-size: 0.8rem; color: var(--text-dim); margin-top: 2px; }
+  .exp-config { display: flex; gap: 16px; font-size: 0.8rem; color: var(--text-dim); }
+  .exp-config span { background: var(--glass-bg); border: 1px solid var(--border); padding: 4px 10px; border-radius: 6px; }
+  .exp-table { width: 100%; border-collapse: collapse; font-size: 0.85rem; }
+  .exp-table th { text-align: left; padding: 8px 12px; color: var(--text-dim); border-bottom: 1px solid var(--border); font-weight: 600; }
+  .exp-table td { padding: 8px 12px; border-bottom: 1px solid var(--border); color: var(--text); }
+  .exp-table tr:hover { background: var(--glass-bg); }
+  .metric-good { color: var(--success); }
+  .metric-bad { color: var(--danger); }
+  .btn-start { background: var(--accent); color: #fff; border: none; padding: 8px 18px; border-radius: 6px; cursor: pointer; font-size: 0.85rem; }
+  .btn-start:hover { opacity: 0.9; }
+  .btn-start:disabled { opacity: 0.4; cursor: not-allowed; }
+  .disabled-note { font-size: 0.8rem; color: var(--text-dim); margin-top: 8px; }
+  .empty-state { text-align: center; padding: 40px; color: var(--text-dim); }
+</style>
+{% endblock %}
+
+{% block content %}
+<div class="experiments-container">
+  <div class="exp-header">
+    <div>
+      <div class="exp-title">Autoresearch Experiments</div>
+      <div class="exp-subtitle">Autonomous ML experiment loops — modify code, train, evaluate, iterate</div>
+    </div>
+    <div>
+      {% if enabled %}
+      <button class="btn-start"
+              hx-post="/experiments/start"
+              hx-target="#experiment-status"
+              hx-swap="innerHTML">
+        Start Experiment
+      </button>
+      {% else %}
+      <button class="btn-start" disabled>Disabled</button>
+      <div class="disabled-note">Set AUTORESEARCH_ENABLED=true to enable</div>
+      {% endif %}
+    </div>
+  </div>
+
+  <div class="exp-config">
+    <span>Metric: {{ metric_name }}</span>
+    <span>Budget: {{ time_budget }}s</span>
+    <span>Max iters: {{ max_iterations }}</span>
+  </div>
+
+  <div id="experiment-status" style="margin: 12px 0;"></div>
+
+  {% if history %}
+  <table class="exp-table">
+    <thead>
+      <tr>
+        <th>#</th>
+        <th>{{ metric_name }}</th>
+        <th>Duration</th>
+        <th>Status</th>
+      </tr>
+    </thead>
+    <tbody>
+      {% for run in history %}
+      <tr>
+        <td>{{ loop.index }}</td>
+        <td>
+          {% if run.metric is not none %}
+            {{ "%.4f"|format(run.metric) }}
+          {% else %}
+            —
+          {% endif %}
+        </td>
+        <td>{{ run.get("duration_s", "—") }}s</td>
+        <td>{% if run.get("success") %}OK{% else %}{{ run.get("error", "failed") }}{% endif %}</td>
+      </tr>
+      {% endfor %}
+    </tbody>
+  </table>
+  {% else %}
+  <div class="empty-state">
+    No experiments yet. Start one to begin autonomous training.
+  </div>
+  {% endif %}
+</div>
+{% endblock %}
--- a/src/infrastructure/error_capture.py
+++ b/src/infrastructure/error_capture.py
@@ -119,9 +119,7 @@ def capture_error(
        return None

    # Format the stack trace
-    tb_str = "".join(
-        traceback.format_exception(type(exc), exc, exc.__traceback__)
-    )
+    tb_str = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__))

    # Extract file/line from traceback
    tb_obj = exc.__traceback__
--- a/src/infrastructure/events/broadcaster.py
+++ b/src/infrastructure/events/broadcaster.py
@@ -33,6 +33,7 @@ class EventBroadcaster:
        if self._ws_manager is None:
            try:
                from infrastructure.ws_manager.handler import ws_manager
+
                self._ws_manager = ws_manager
            except Exception as exc:
                logger.debug("WebSocket manager not available: %s", exc)
@@ -62,7 +63,7 @@ class EventBroadcaster:
                "agent_id": event.agent_id,
                "timestamp": event.timestamp,
                "data": event.data,
-            }
+            },
        }

        try:
--- a/src/infrastructure/events/bus.py
+++ b/src/infrastructure/events/bus.py
@@ -16,6 +16,7 @@ logger = logging.getLogger(__name__)
@dataclass
 class Event:
    """A typed event in the system."""
+
    type: str  # e.g., "agent.task.assigned", "tool.execution.completed"
    source: str  # Agent or component that emitted the event
    data: dict = field(default_factory=dict)
@@ -61,12 +62,14 @@ class EventBus:
            - "agent.*" — any agent event
            - "*" — all events
        """
+
        def decorator(handler: EventHandler) -> EventHandler:
            if event_pattern not in self._subscribers:
                self._subscribers[event_pattern] = []
            self._subscribers[event_pattern].append(handler)
            logger.debug("Subscribed handler to '%s'", event_pattern)
            return handler
+
        return decorator

    def unsubscribe(self, event_pattern: str, handler: EventHandler) -> bool:
@@ -90,7 +93,7 @@ class EventBus:
        # Store in history
        self._history.append(event)
        if len(self._history) > self._max_history:
-            self._history = self._history[-self._max_history:]
+            self._history = self._history[-self._max_history :]

        # Find matching handlers
        handlers: list[EventHandler] = []
@@ -102,8 +105,7 @@ class EventBus:
        # Invoke handlers concurrently
        if handlers:
            await asyncio.gather(
-                *[self._invoke_handler(h, event) for h in handlers],
-                return_exceptions=True
+                *[self._invoke_handler(h, event) for h in handlers], return_exceptions=True
            )

        logger.debug("Published event '%s' to %d handlers", event.type, len(handlers))
@@ -156,11 +158,13 @@ event_bus = EventBus()
 # Convenience functions
 async def emit(event_type: str, source: str, data: dict) -> int:
    """Quick emit an event."""
-    return await event_bus.publish(Event(
-        type=event_type,
-        source=source,
-        data=data,
-    ))
+    return await event_bus.publish(
+        Event(
+            type=event_type,
+            source=source,
+            data=data,
+        )
+    )


 def on(event_pattern: str) -> Callable[[EventHandler], EventHandler]:
--- a/src/infrastructure/hands/init.py
+++ b/src/infrastructure/hands/init.py
@@ -11,7 +11,7 @@ Usage:
    result = await git_hand.run("status")
 """

-from infrastructure.hands.shell import shell_hand
 from infrastructure.hands.git import git_hand
+from infrastructure.hands.shell import shell_hand

 __all__ = ["shell_hand", "git_hand"]
--- a/src/infrastructure/hands/git.py
+++ b/src/infrastructure/hands/git.py
@@ -25,16 +25,18 @@ from config import settings
 logger = logging.getLogger(__name__)

 # Operations that require explicit confirmation before execution
-DESTRUCTIVE_OPS = frozenset({
-    "push --force",
-    "push -f",
-    "reset --hard",
-    "clean -fd",
-    "clean -f",
-    "branch -D",
-    "checkout -- .",
-    "restore .",
-})
+DESTRUCTIVE_OPS = frozenset(
+    {
+        "push --force",
+        "push -f",
+        "reset --hard",
+        "clean -fd",
+        "clean -f",
+        "branch -D",
+        "checkout -- .",
+        "restore .",
+    }
+)


@dataclass
@@ -190,7 +192,9 @@ class GitHand:
        flag = "-b" if create else ""
        return await self.run(f"checkout {flag} {branch}".strip())

-    async def push(self, remote: str = "origin", branch: str = "", force: bool = False) -> GitResult:
+    async def push(
+        self, remote: str = "origin", branch: str = "", force: bool = False
+    ) -> GitResult:
        """Push to remote. Force-push requires explicit opt-in."""
        args = f"push -u {remote} {branch}".strip()
        if force:
--- a/src/infrastructure/hands/shell.py
+++ b/src/infrastructure/hands/shell.py
@@ -26,15 +26,17 @@ from config import settings
 logger = logging.getLogger(__name__)

 # Commands that are always blocked regardless of allow-list
-_BLOCKED_COMMANDS = frozenset({
-    "rm -rf /",
-    "rm -rf /*",
-    "mkfs",
-    "dd if=/dev/zero",
-    ":(){ :|:& };:",  # fork bomb
-    "> /dev/sda",
-    "chmod -R 777 /",
-})
+_BLOCKED_COMMANDS = frozenset(
+    {
+        "rm -rf /",
+        "rm -rf /*",
+        "mkfs",
+        "dd if=/dev/zero",
+        ":(){ :|:& };:",  # fork bomb
+        "> /dev/sda",
+        "chmod -R 777 /",
+    }
+)

 # Default allow-list: safe build/dev commands
 DEFAULT_ALLOWED_PREFIXES = (
@@ -199,9 +201,7 @@ class ShellHand:
                proc.kill()
                await proc.wait()
                latency = (time.time() - start) * 1000
-                logger.warning(
-                    "Shell command timed out after %ds: %s", effective_timeout, command
-                )
+                logger.warning("Shell command timed out after %ds: %s", effective_timeout, command)
                return ShellResult(
                    command=command,
                    success=False,
--- a/src/infrastructure/hands/tools.py
+++ b/src/infrastructure/hands/tools.py
@@ -11,15 +11,17 @@ the tool registry.
 import logging
 from typing import Any

-from infrastructure.hands.shell import shell_hand
 from infrastructure.hands.git import git_hand
+from infrastructure.hands.shell import shell_hand

 try:
    from mcp.schemas.base import create_tool_schema
 except ImportError:
+
    def create_tool_schema(**kwargs):
        return kwargs

+
 logger = logging.getLogger(__name__)

 # ── Tool schemas ─────────────────────────────────────────────────────────────
@@ -83,6 +85,7 @@ PERSONA_LOCAL_HAND_MAP: dict[str, list[str]] = {

 # ── Handlers ─────────────────────────────────────────────────────────────────

+
 async def _handle_shell(**kwargs: Any) -> str:
    """Handler for the shell MCP tool."""
    command = kwargs.get("command", "")
--- a/src/infrastructure/models/init.py
+++ b/src/infrastructure/models/init.py
@@ -1,12 +1,5 @@
 """Infrastructure models package."""

-from infrastructure.models.registry import (
-    CustomModel,
-    ModelFormat,
-    ModelRegistry,
-    ModelRole,
-    model_registry,
-)
 from infrastructure.models.multimodal import (
    ModelCapability,
    ModelInfo,
@@ -17,6 +10,13 @@ from infrastructure.models.multimodal import (
    model_supports_vision,
    pull_model_with_fallback,
 )
+from infrastructure.models.registry import (
+    CustomModel,
+    ModelFormat,
+    ModelRegistry,
+    ModelRole,
+    model_registry,
+)

 __all__ = [
    # Registry
--- a/src/infrastructure/models/multimodal.py
+++ b/src/infrastructure/models/multimodal.py
@@ -21,39 +21,130 @@ logger = logging.getLogger(__name__)

 class ModelCapability(Enum):
    """Capabilities a model can have."""
-    TEXT = auto()      # Standard text completion
-    VISION = auto()    # Image understanding
-    AUDIO = auto()     # Audio/speech processing
-    TOOLS = auto()     # Function calling / tool use
-    JSON = auto()      # Structured output / JSON mode
-    STREAMING = auto() # Streaming responses
+
+    TEXT = auto()  # Standard text completion
+    VISION = auto()  # Image understanding
+    AUDIO = auto()  # Audio/speech processing
+    TOOLS = auto()  # Function calling / tool use
+    JSON = auto()  # Structured output / JSON mode
+    STREAMING = auto()  # Streaming responses


 # Known model capabilities (local Ollama models)
 # These are used when we can't query the model directly
 KNOWN_MODEL_CAPABILITIES: dict[str, set[ModelCapability]] = {
    # Llama 3.x series
-    "llama3.1": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "llama3.1:8b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "llama3.1:8b-instruct": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "llama3.1:70b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "llama3.1:405b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "llama3.2": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING, ModelCapability.VISION},
+    "llama3.1": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "llama3.1:8b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "llama3.1:8b-instruct": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "llama3.1:70b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "llama3.1:405b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "llama3.2": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+        ModelCapability.VISION,
+    },
    "llama3.2:1b": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
-    "llama3.2:3b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING, ModelCapability.VISION},
-    "llama3.2-vision": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING, ModelCapability.VISION},
-    "llama3.2-vision:11b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING, ModelCapability.VISION},
-    
+    "llama3.2:3b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+        ModelCapability.VISION,
+    },
+    "llama3.2-vision": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+        ModelCapability.VISION,
+    },
+    "llama3.2-vision:11b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+        ModelCapability.VISION,
+    },
    # Qwen series
-    "qwen2.5": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "qwen2.5:7b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "qwen2.5:14b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "qwen2.5:32b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "qwen2.5:72b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "qwen2.5-vl": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING, ModelCapability.VISION},
-    "qwen2.5-vl:3b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING, ModelCapability.VISION},
-    "qwen2.5-vl:7b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING, ModelCapability.VISION},
-    
+    "qwen2.5": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "qwen2.5:7b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "qwen2.5:14b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "qwen2.5:32b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "qwen2.5:72b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "qwen2.5-vl": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+        ModelCapability.VISION,
+    },
+    "qwen2.5-vl:3b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+        ModelCapability.VISION,
+    },
+    "qwen2.5-vl:7b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+        ModelCapability.VISION,
+    },
    # DeepSeek series
    "deepseek-r1": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
    "deepseek-r1:1.5b": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
@@ -61,21 +152,48 @@ KNOWN_MODEL_CAPABILITIES: dict[str, set[ModelCapability]] = {
    "deepseek-r1:14b": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
    "deepseek-r1:32b": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
    "deepseek-r1:70b": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
-    "deepseek-v3": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    
+    "deepseek-v3": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
    # Gemma series
    "gemma2": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
    "gemma2:2b": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
    "gemma2:9b": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
    "gemma2:27b": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
-    
    # Mistral series
-    "mistral": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "mistral:7b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "mistral-nemo": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "mistral-small": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "mistral-large": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    
+    "mistral": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "mistral:7b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "mistral-nemo": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "mistral-small": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "mistral-large": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
    # Vision-specific models
    "llava": {ModelCapability.TEXT, ModelCapability.VISION, ModelCapability.STREAMING},
    "llava:7b": {ModelCapability.TEXT, ModelCapability.VISION, ModelCapability.STREAMING},
@@ -86,21 +204,48 @@ KNOWN_MODEL_CAPABILITIES: dict[str, set[ModelCapability]] = {
    "bakllava": {ModelCapability.TEXT, ModelCapability.VISION, ModelCapability.STREAMING},
    "moondream": {ModelCapability.TEXT, ModelCapability.VISION, ModelCapability.STREAMING},
    "moondream:1.8b": {ModelCapability.TEXT, ModelCapability.VISION, ModelCapability.STREAMING},
-    
    # Phi series
    "phi3": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
    "phi3:3.8b": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
    "phi3:14b": {ModelCapability.TEXT, ModelCapability.JSON, ModelCapability.STREAMING},
-    "phi4": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    
+    "phi4": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
    # Command R
-    "command-r": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "command-r:35b": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "command-r-plus": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    
+    "command-r": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "command-r:35b": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "command-r-plus": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
    # Granite (IBM)
-    "granite3-dense": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
-    "granite3-moe": {ModelCapability.TEXT, ModelCapability.TOOLS, ModelCapability.JSON, ModelCapability.STREAMING},
+    "granite3-dense": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
+    "granite3-moe": {
+        ModelCapability.TEXT,
+        ModelCapability.TOOLS,
+        ModelCapability.JSON,
+        ModelCapability.STREAMING,
+    },
 }


@@ -108,15 +253,15 @@ KNOWN_MODEL_CAPABILITIES: dict[str, set[ModelCapability]] = {
 # These are tried in order when the primary model doesn't support a capability
 DEFAULT_FALLBACK_CHAINS: dict[ModelCapability, list[str]] = {
    ModelCapability.VISION: [
-        "llama3.2:3b",           # Fast vision model
-        "llava:7b",              # Classic vision model
-        "qwen2.5-vl:3b",         # Qwen vision
-        "moondream:1.8b",        # Tiny vision model (last resort)
+        "llama3.2:3b",  # Fast vision model
+        "llava:7b",  # Classic vision model
+        "qwen2.5-vl:3b",  # Qwen vision
+        "moondream:1.8b",  # Tiny vision model (last resort)
    ],
    ModelCapability.TOOLS: [
        "llama3.1:8b-instruct",  # Best tool use
-        "llama3.2:3b",           # Smaller but capable
-        "qwen2.5:7b",            # Reliable fallback
+        "llama3.2:3b",  # Smaller but capable
+        "qwen2.5:7b",  # Reliable fallback
    ],
    ModelCapability.AUDIO: [
        # Audio models are less common in Ollama
@@ -128,6 +273,7 @@ DEFAULT_FALLBACK_CHAINS: dict[ModelCapability, list[str]] = {
@dataclass
 class ModelInfo:
    """Information about a model's capabilities and availability."""
+
    name: str
    capabilities: set[ModelCapability] = field(default_factory=set)
    is_available: bool = False
@@ -159,8 +305,8 @@ class MultiModalManager:
    def _refresh_available_models(self) -> None:
        """Query Ollama for available models."""
        try:
-            import urllib.request
            import json
+            import urllib.request

            url = self.ollama_url.replace("localhost", "127.0.0.1")
            req = urllib.request.Request(
@@ -217,15 +363,10 @@ class MultiModalManager:

    def get_models_with_capability(self, capability: ModelCapability) -> list[ModelInfo]:
        """Get all available models that support a capability."""
-        return [
-            info for info in self._available_models.values()
-            if capability in info.capabilities
-        ]
+        return [info for info in self._available_models.values() if capability in info.capabilities]

    def get_best_model_for(
-        self, 
-        capability: ModelCapability,
-        preferred_model: Optional[str] = None
+        self, capability: ModelCapability, preferred_model: Optional[str] = None
    ) -> Optional[str]:
        """Get the best available model for a specific capability.

@@ -243,7 +384,8 @@ class MultiModalManager:
                    return preferred_model
                logger.debug(
                    "Preferred model %s doesn't support %s, checking fallbacks",
-                    preferred_model, capability.name
+                    preferred_model,
+                    capability.name,
                )

        # Check fallback chain for this capability
@@ -257,7 +399,7 @@ class MultiModalManager:
        capable_models = self.get_models_with_capability(capability)
        if capable_models:
            # Sort by size (prefer smaller/faster models as fallback)
-            capable_models.sort(key=lambda m: m.size_mb or float('inf'))
+            capable_models.sort(key=lambda m: m.size_mb or float("inf"))
            return capable_models[0].name

        return None
@@ -292,8 +434,7 @@ class MultiModalManager:
            fallback = self.get_best_model_for(capability, primary_model)
            if fallback:
                logger.info(
-                    "Primary model %s unavailable, using fallback %s",
-                    primary_model, fallback
+                    "Primary model %s unavailable, using fallback %s", primary_model, fallback
                )
                return fallback, True

@@ -302,7 +443,8 @@ class MultiModalManager:
        if default_model in self._available_models:
            logger.warning(
                "Falling back to default model %s (primary: %s unavailable)",
-                default_model, primary_model
+                default_model,
+                primary_model,
            )
            return default_model, True

@@ -316,8 +458,8 @@ class MultiModalManager:
            True if successful or model already exists
        """
        try:
-            import urllib.request
            import json
+            import urllib.request

            logger.info("Pulling model: %s", model_name)

@@ -343,11 +485,7 @@ class MultiModalManager:
            logger.error("Error pulling model %s: %s", model_name, exc)
            return False

-    def configure_fallback_chain(
-        self, 
-        capability: ModelCapability, 
-        models: list[str]
-    ) -> None:
+    def configure_fallback_chain(self, capability: ModelCapability, models: list[str]) -> None:
        """Configure a custom fallback chain for a capability."""
        self._fallback_chains[capability] = models
        logger.info("Configured fallback chain for %s: %s", capability.name, models)
@@ -417,8 +555,7 @@ def get_multimodal_manager() -> MultiModalManager:


 def get_model_for_capability(
-    capability: ModelCapability,
-    preferred_model: Optional[str] = None
+    capability: ModelCapability, preferred_model: Optional[str] = None
 ) -> Optional[str]:
    """Convenience function to get best model for a capability."""
    return get_multimodal_manager().get_best_model_for(capability, preferred_model)
@@ -430,9 +567,7 @@ def pull_model_with_fallback(
    auto_pull: bool = True,
 ) -> tuple[str, bool]:
    """Convenience function to pull model with fallback."""
-    return get_multimodal_manager().pull_model_with_fallback(
-        primary_model, capability, auto_pull
-    )
+    return get_multimodal_manager().pull_model_with_fallback(primary_model, capability, auto_pull)


 def model_supports_vision(model_name: str) -> bool:
--- a/src/infrastructure/models/registry.py
+++ b/src/infrastructure/models/registry.py
@@ -26,26 +26,29 @@ DB_PATH = Path("data/swarm.db")

 class ModelFormat(str, Enum):
    """Supported model weight formats."""
-    GGUF = "gguf"               # Ollama-compatible quantised weights
-    SAFETENSORS = "safetensors" # HuggingFace safetensors
-    HF_CHECKPOINT = "hf"        # Full HuggingFace checkpoint directory
-    OLLAMA = "ollama"           # Already loaded in Ollama by name
+
+    GGUF = "gguf"  # Ollama-compatible quantised weights
+    SAFETENSORS = "safetensors"  # HuggingFace safetensors
+    HF_CHECKPOINT = "hf"  # Full HuggingFace checkpoint directory
+    OLLAMA = "ollama"  # Already loaded in Ollama by name


 class ModelRole(str, Enum):
    """Role a model can play in the system (OpenClaw-RL style)."""
-    GENERAL = "general"     # Default agent inference
-    REWARD = "reward"       # Process Reward Model (PRM) scoring
-    TEACHER = "teacher"     # On-policy distillation teacher
-    JUDGE = "judge"         # Output quality evaluation
+
+    GENERAL = "general"  # Default agent inference
+    REWARD = "reward"  # Process Reward Model (PRM) scoring
+    TEACHER = "teacher"  # On-policy distillation teacher
+    JUDGE = "judge"  # Output quality evaluation


@dataclass
 class CustomModel:
    """A registered custom model."""
+
    name: str
    format: ModelFormat
-    path: str                       # Absolute path or Ollama model name
+    path: str  # Absolute path or Ollama model name
    role: ModelRole = ModelRole.GENERAL
    context_window: int = 4096
    description: str = ""
@@ -141,10 +144,16 @@ class ModelRegistry:
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                """,
                (
-                    model.name, model.format.value, model.path,
-                    model.role.value, model.context_window, model.description,
-                    model.registered_at, int(model.active),
-                    model.default_temperature, model.max_tokens,
+                    model.name,
+                    model.format.value,
+                    model.path,
+                    model.role.value,
+                    model.context_window,
+                    model.description,
+                    model.registered_at,
+                    int(model.active),
+                    model.default_temperature,
+                    model.max_tokens,
                ),
            )
            conn.commit()
@@ -160,9 +169,7 @@ class ModelRegistry:
                return False
            conn = _get_conn()
            conn.execute("DELETE FROM custom_models WHERE name = ?", (name,))
-            conn.execute(
-                "DELETE FROM agent_model_assignments WHERE model_name = ?", (name,)
-            )
+            conn.execute("DELETE FROM agent_model_assignments WHERE model_name = ?", (name,))
            conn.commit()
            conn.close()
            del self._models[name]
--- a/src/infrastructure/notifications/push.py
+++ b/src/infrastructure/notifications/push.py
@@ -9,8 +9,8 @@ No cloud push services — everything stays local.
 """

 import logging
-import subprocess
 import platform
+import subprocess
 from collections import deque
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
@@ -25,9 +25,7 @@ class Notification:
    title: str
    message: str
    category: str  # swarm | task | agent | system | payment
-    timestamp: str = field(
-        default_factory=lambda: datetime.now(timezone.utc).isoformat()
-    )
+    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
    read: bool = False


@@ -74,9 +72,11 @@ class PushNotifier:
    def _native_notify(self, title: str, message: str) -> None:
        """Send a native macOS notification via osascript."""
        try:
+            safe_message = message.replace("\\", "\\\\").replace('"', '\\"')
+            safe_title = title.replace("\\", "\\\\").replace('"', '\\"')
            script = (
-                f'display notification "{message}" '
-                f'with title "Agent Dashboard" subtitle "{title}"'
+                f'display notification "{safe_message}" '
+                f'with title "Agent Dashboard" subtitle "{safe_title}"'
            )
            subprocess.Popen(
                ["osascript", "-e", script],
@@ -114,7 +114,7 @@ class PushNotifier:
    def clear(self) -> None:
        self._notifications.clear()

-    def add_listener(self, callback) -> None:
+    def add_listener(self, callback: "Callable[[Notification], None]") -> None:
        """Register a callback for real-time notification delivery."""
        self._listeners.append(callback)

@@ -139,10 +139,7 @@ async def notify_briefing_ready(briefing) -> None:
        logger.info("Briefing ready but no pending approvals — skipping native notification")
        return

-    message = (
-        f"Your morning briefing is ready. "
-        f"{n_approvals} item(s) await your approval."
-    )
+    message = f"Your morning briefing is ready. " f"{n_approvals} item(s) await your approval."
    notifier.notify(
        title="Morning Briefing Ready",
        message=message,
--- a/src/infrastructure/openfang/client.py
+++ b/src/infrastructure/openfang/client.py
@@ -156,33 +156,23 @@ class OpenFangClient:

    async def browse(self, url: str, instruction: str = "") -> HandResult:
        """Web automation via OpenFang's Browser hand."""
-        return await self.execute_hand(
-            "browser", {"url": url, "instruction": instruction}
-        )
+        return await self.execute_hand("browser", {"url": url, "instruction": instruction})

    async def collect(self, target: str, depth: str = "shallow") -> HandResult:
        """OSINT collection via OpenFang's Collector hand."""
-        return await self.execute_hand(
-            "collector", {"target": target, "depth": depth}
-        )
+        return await self.execute_hand("collector", {"target": target, "depth": depth})

    async def predict(self, question: str, horizon: str = "1w") -> HandResult:
        """Superforecasting via OpenFang's Predictor hand."""
-        return await self.execute_hand(
-            "predictor", {"question": question, "horizon": horizon}
-        )
+        return await self.execute_hand("predictor", {"question": question, "horizon": horizon})

    async def find_leads(self, icp: str, max_results: int = 10) -> HandResult:
        """Prospect discovery via OpenFang's Lead hand."""
-        return await self.execute_hand(
-            "lead", {"icp": icp, "max_results": max_results}
-        )
+        return await self.execute_hand("lead", {"icp": icp, "max_results": max_results})

    async def research(self, topic: str, depth: str = "standard") -> HandResult:
        """Deep research via OpenFang's Researcher hand."""
-        return await self.execute_hand(
-            "researcher", {"topic": topic, "depth": depth}
-        )
+        return await self.execute_hand("researcher", {"topic": topic, "depth": depth})

    # ── Inventory ────────────────────────────────────────────────────────────

--- a/src/infrastructure/openfang/tools.py
+++ b/src/infrastructure/openfang/tools.py
@@ -22,9 +22,11 @@ from infrastructure.openfang.client import OPENFANG_HANDS, openfang_client
 try:
    from mcp.schemas.base import create_tool_schema
 except ImportError:
+
    def create_tool_schema(**kwargs):
        return kwargs

+
 logger = logging.getLogger(__name__)

 # ── Tool schemas ─────────────────────────────────────────────────────────────
--- a/src/infrastructure/router/init.py
+++ b/src/infrastructure/router/init.py
@@ -1,7 +1,7 @@
 """Cascade LLM Router — Automatic failover between providers."""

-from .cascade import CascadeRouter, Provider, ProviderStatus, get_router
 from .api import router
+from .cascade import CascadeRouter, Provider, ProviderStatus, get_router

 __all__ = [
    "CascadeRouter",
--- a/src/infrastructure/router/api.py
+++ b/src/infrastructure/router/api.py
@@ -15,6 +15,7 @@ router = APIRouter(prefix="/api/v1/router", tags=["router"])

 class CompletionRequest(BaseModel):
    """Request body for completions."""
+
    messages: list[dict[str, str]]
    model: str | None = None
    temperature: float = 0.7
@@ -23,6 +24,7 @@ class CompletionRequest(BaseModel):

 class CompletionResponse(BaseModel):
    """Response from completion endpoint."""
+
    content: str
    provider: str
    model: str
@@ -31,6 +33,7 @@ class CompletionResponse(BaseModel):

 class ProviderControl(BaseModel):
    """Control a provider's status."""
+
    action: str  # "enable", "disable", "reset_circuit"


@@ -120,11 +123,13 @@ async def control_provider(
    elif control.action == "disable":
        provider.enabled = False
        from .cascade import ProviderStatus
+
        provider.status = ProviderStatus.DISABLED
        return {"message": f"Provider {provider_name} disabled"}

    elif control.action == "reset_circuit":
        from .cascade import CircuitState, ProviderStatus
+
        provider.circuit_state = CircuitState.CLOSED
        provider.circuit_opened_at = None
        provider.half_open_calls = 0
@@ -148,21 +153,28 @@ async def run_health_check(
        is_healthy = cascade._check_provider_available(provider)

        from .cascade import ProviderStatus
+
        if is_healthy:
            if provider.status == ProviderStatus.UNHEALTHY:
                # Reset circuit if it was open but now healthy
                provider.circuit_state = provider.circuit_state.__class__.CLOSED
                provider.circuit_opened_at = None
-            provider.status = ProviderStatus.HEALTHY if provider.metrics.error_rate < 0.1 else ProviderStatus.DEGRADED
+            provider.status = (
+                ProviderStatus.HEALTHY
+                if provider.metrics.error_rate < 0.1
+                else ProviderStatus.DEGRADED
+            )
        else:
            provider.status = ProviderStatus.UNHEALTHY

-        results.append({
-            "name": provider.name,
-            "type": provider.type,
-            "healthy": is_healthy,
-            "status": provider.status.value,
-        })
+        results.append(
+            {
+                "name": provider.name,
+                "type": provider.type,
+                "healthy": is_healthy,
+                "status": provider.status.value,
+            }
+        )

    return {
        "checked_at": asyncio.get_event_loop().time(),
--- a/src/infrastructure/router/cascade.py
+++ b/src/infrastructure/router/cascade.py
@@ -33,6 +33,7 @@ logger = logging.getLogger(__name__)

 class ProviderStatus(Enum):
    """Health status of a provider."""
+
    HEALTHY = "healthy"
    DEGRADED = "degraded"  # Working but slow or occasional errors
    UNHEALTHY = "unhealthy"  # Circuit breaker open
@@ -41,22 +42,25 @@ class ProviderStatus(Enum):

 class CircuitState(Enum):
    """Circuit breaker state."""
-    CLOSED = "closed"      # Normal operation
-    OPEN = "open"          # Failing, rejecting requests
+
+    CLOSED = "closed"  # Normal operation
+    OPEN = "open"  # Failing, rejecting requests
    HALF_OPEN = "half_open"  # Testing if recovered


 class ContentType(Enum):
    """Type of content in the request."""
+
    TEXT = "text"
-    VISION = "vision"      # Contains images
-    AUDIO = "audio"        # Contains audio
+    VISION = "vision"  # Contains images
+    AUDIO = "audio"  # Contains audio
    MULTIMODAL = "multimodal"  # Multiple content types


@dataclass
 class ProviderMetrics:
    """Metrics for a single provider."""
+
    total_requests: int = 0
    successful_requests: int = 0
    failed_requests: int = 0
@@ -81,6 +85,7 @@ class ProviderMetrics:
@dataclass
 class ModelCapability:
    """Capabilities a model supports."""
+
    name: str
    supports_vision: bool = False
    supports_audio: bool = False
@@ -93,6 +98,7 @@ class ModelCapability:
@dataclass
 class Provider:
    """LLM provider configuration and state."""
+
    name: str
    type: str  # ollama, openai, anthropic, airllm
    enabled: bool
@@ -139,6 +145,7 @@ class Provider:
@dataclass
 class RouterConfig:
    """Cascade router configuration."""
+
    timeout_seconds: int = 30
    max_retries_per_provider: int = 2
    retry_delay_seconds: int = 1
@@ -194,6 +201,7 @@ class CascadeRouter:
        self._mm_manager: Optional[Any] = None
        try:
            from infrastructure.models.multimodal import get_multimodal_manager
+
            self._mm_manager = get_multimodal_manager()
        except Exception as exc:
            logger.debug("Multi-modal manager not available: %s", exc)
@@ -228,9 +236,15 @@ class CascadeRouter:
                timeout_seconds=cascade.get("timeout_seconds", 30),
                max_retries_per_provider=cascade.get("max_retries_per_provider", 2),
                retry_delay_seconds=cascade.get("retry_delay_seconds", 1),
-                circuit_breaker_failure_threshold=cascade.get("circuit_breaker", {}).get("failure_threshold", 5),
-                circuit_breaker_recovery_timeout=cascade.get("circuit_breaker", {}).get("recovery_timeout", 60),
-                circuit_breaker_half_open_max_calls=cascade.get("circuit_breaker", {}).get("half_open_max_calls", 2),
+                circuit_breaker_failure_threshold=cascade.get("circuit_breaker", {}).get(
+                    "failure_threshold", 5
+                ),
+                circuit_breaker_recovery_timeout=cascade.get("circuit_breaker", {}).get(
+                    "recovery_timeout", 60
+                ),
+                circuit_breaker_half_open_max_calls=cascade.get("circuit_breaker", {}).get(
+                    "half_open_max_calls", 2
+                ),
                auto_pull_models=multimodal.get("auto_pull", True),
                fallback_chains=fallback_chains,
            )
@@ -265,11 +279,15 @@ class CascadeRouter:
            logger.error("Failed to load config: %s", exc)

    def _expand_env_vars(self, content: str) -> str:
-        """Expand ${VAR} syntax in YAML content."""
+        """Expand ${VAR} syntax in YAML content.
+
+        Uses os.environ directly (not settings) because this is a generic
+        YAML config loader that must expand arbitrary variable references.
+        """
        import os
        import re

-        def replace_var(match):
+        def replace_var(match: "re.Match[str]") -> str:
            var_name = match.group(1)
            return os.environ.get(var_name, match.group(0))

@@ -293,6 +311,7 @@ class CascadeRouter:
            # Check if airllm is installed
            try:
                import airllm
+
                return True
            except ImportError:
                return False
@@ -320,7 +339,7 @@ class CascadeRouter:

            # Check for image URLs in content
            if isinstance(content, str):
-                image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp')
+                image_extensions = (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp")
                if any(ext in content.lower() for ext in image_extensions):
                    has_image = True
                if content.startswith("data:image/"):
@@ -348,10 +367,7 @@ class CascadeRouter:
        return ContentType.TEXT

    def _get_fallback_model(
-        self, 
-        provider: Provider, 
-        original_model: str,
-        content_type: ContentType
+        self, provider: Provider, original_model: str, content_type: ContentType
    ) -> Optional[str]:
        """Get a fallback model for the given content type."""
        # Map content type to capability
@@ -450,14 +466,15 @@ class CascadeRouter:
                            if fallback:
                                logger.info(
                                    "Model %s doesn't support vision, falling back to %s",
-                                    selected_model, fallback
+                                    selected_model,
+                                    fallback,
                                )
                                selected_model = fallback
                                is_fallback_model = True
                            else:
                                logger.warning(
                                    "No vision-capable model found on %s, trying anyway",
-                                    provider.name
+                                    provider.name,
                                )

            # Try this provider
@@ -477,7 +494,9 @@ class CascadeRouter:
                    return {
                        "content": result["content"],
                        "provider": provider.name,
-                        "model": result.get("model", selected_model or provider.get_default_model()),
+                        "model": result.get(
+                            "model", selected_model or provider.get_default_model()
+                        ),
                        "latency_ms": result.get("latency_ms", 0),
                        "is_fallback_model": is_fallback_model,
                    }
@@ -485,8 +504,7 @@ class CascadeRouter:
                except Exception as exc:
                    error_msg = str(exc)
                    logger.warning(
-                        "Provider %s attempt %d failed: %s",
-                        provider.name, attempt + 1, error_msg
+                        "Provider %s attempt %d failed: %s", provider.name, attempt + 1, error_msg
                    )
                    errors.append(f"{provider.name}: {error_msg}")

@@ -680,10 +698,12 @@ class CascadeRouter:
            if msg["role"] == "system":
                system_msg = msg["content"]
            else:
-                conversation.append({
-                    "role": msg["role"],
-                    "content": msg["content"],
-                })
+                conversation.append(
+                    {
+                        "role": msg["role"],
+                        "content": msg["content"],
+                    }
+                )

        kwargs = {
            "model": model,
@@ -822,8 +842,12 @@ class CascadeRouter:
        return {
            "total_providers": len(self.providers),
            "healthy_providers": healthy,
-            "degraded_providers": sum(1 for p in self.providers if p.status == ProviderStatus.DEGRADED),
-            "unhealthy_providers": sum(1 for p in self.providers if p.status == ProviderStatus.UNHEALTHY),
+            "degraded_providers": sum(
+                1 for p in self.providers if p.status == ProviderStatus.DEGRADED
+            ),
+            "unhealthy_providers": sum(
+                1 for p in self.providers if p.status == ProviderStatus.UNHEALTHY
+            ),
            "providers": [
                {
                    "name": p.name,
@@ -854,11 +878,13 @@ class CascadeRouter:
        Returns:
            Response dict with content and metadata
        """
-        messages = [{
-            "role": "user",
-            "content": prompt,
-            "images": [image_path],
-        }]
+        messages = [
+            {
+                "role": "user",
+                "content": prompt,
+                "images": [image_path],
+            }
+        ]
        return await self.complete(
            messages=messages,
            model=model,
--- a/src/infrastructure/ws_manager/handler.py
+++ b/src/infrastructure/ws_manager/handler.py
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
@dataclass
 class WSEvent:
    """A WebSocket event to broadcast to connected clients."""
+
    event: str
    data: dict
    timestamp: str
@@ -93,28 +94,42 @@ class WebSocketManager:
        await self.broadcast("agent_left", {"agent_id": agent_id, "name": name})

    async def broadcast_task_posted(self, task_id: str, description: str) -> None:
-        await self.broadcast("task_posted", {
-            "task_id": task_id, "description": description,
-        })
+        await self.broadcast(
+            "task_posted",
+            {
+                "task_id": task_id,
+                "description": description,
+            },
+        )

-    async def broadcast_bid_submitted(
-        self, task_id: str, agent_id: str, bid_sats: int
-    ) -> None:
-        await self.broadcast("bid_submitted", {
-            "task_id": task_id, "agent_id": agent_id, "bid_sats": bid_sats,
-        })
+    async def broadcast_bid_submitted(self, task_id: str, agent_id: str, bid_sats: int) -> None:
+        await self.broadcast(
+            "bid_submitted",
+            {
+                "task_id": task_id,
+                "agent_id": agent_id,
+                "bid_sats": bid_sats,
+            },
+        )

    async def broadcast_task_assigned(self, task_id: str, agent_id: str) -> None:
-        await self.broadcast("task_assigned", {
-            "task_id": task_id, "agent_id": agent_id,
-        })
+        await self.broadcast(
+            "task_assigned",
+            {
+                "task_id": task_id,
+                "agent_id": agent_id,
+            },
+        )

-    async def broadcast_task_completed(
-        self, task_id: str, agent_id: str, result: str
-    ) -> None:
-        await self.broadcast("task_completed", {
-            "task_id": task_id, "agent_id": agent_id, "result": result[:200],
-        })
+    async def broadcast_task_completed(self, task_id: str, agent_id: str, result: str) -> None:
+        await self.broadcast(
+            "task_completed",
+            {
+                "task_id": task_id,
+                "agent_id": agent_id,
+                "result": result[:200],
+            },
+        )

    @property
    def connection_count(self) -> int:
--- a/src/integrations/chat_bridge/base.py
+++ b/src/integrations/chat_bridge/base.py
@@ -21,6 +21,7 @@ from typing import Any, Optional

 class PlatformState(Enum):
    """Lifecycle state of a chat platform connection."""
+
    DISCONNECTED = auto()
    CONNECTING = auto()
    CONNECTED = auto()
@@ -30,13 +31,12 @@ class PlatformState(Enum):
@dataclass
 class ChatMessage:
    """Vendor-agnostic representation of a chat message."""
+
    content: str
    author: str
    channel_id: str
    platform: str
-    timestamp: str = field(
-        default_factory=lambda: datetime.now(timezone.utc).isoformat()
-    )
+    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
    message_id: Optional[str] = None
    thread_id: Optional[str] = None
    attachments: list[str] = field(default_factory=list)
@@ -46,13 +46,12 @@ class ChatMessage:
@dataclass
 class ChatThread:
    """Vendor-agnostic representation of a conversation thread."""
+
    thread_id: str
    title: str
    channel_id: str
    platform: str
-    created_at: str = field(
-        default_factory=lambda: datetime.now(timezone.utc).isoformat()
-    )
+    created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
    archived: bool = False
    message_count: int = 0
    metadata: dict[str, Any] = field(default_factory=dict)
@@ -61,6 +60,7 @@ class ChatThread:
@dataclass
 class InviteInfo:
    """Parsed invite extracted from an image or text."""
+
    url: str
    code: str
    platform: str
@@ -71,6 +71,7 @@ class InviteInfo:
@dataclass
 class PlatformStatus:
    """Current status of a chat platform connection."""
+
    platform: str
    state: PlatformState
    token_set: bool
--- a/src/integrations/chat_bridge/invite_parser.py
+++ b/src/integrations/chat_bridge/invite_parser.py
@@ -115,7 +115,9 @@ class InviteParser:
        """Strategy 2: Use Ollama vision model for local OCR."""
        try:
            import base64
+
            import httpx
+
            from config import settings
        except ImportError:
            logger.debug("httpx not available for Ollama vision.")
--- a/src/integrations/chat_bridge/vendors/discord.py
+++ b/src/integrations/chat_bridge/vendors/discord.py
@@ -90,10 +90,7 @@ class DiscordVendor(ChatPlatform):
        try:
            import discord
        except ImportError:
-            logger.error(
-                "discord.py is not installed. "
-                'Run: pip install ".[discord]"'
-            )
+            logger.error("discord.py is not installed. " 'Run: pip install ".[discord]"')
            return False

        try:
@@ -267,6 +264,7 @@ class DiscordVendor(ChatPlatform):

        try:
            from config import settings
+
            return settings.discord_token or None
        except Exception:
            return None
@@ -363,9 +361,7 @@ class DiscordVendor(ChatPlatform):
            # Show typing indicator while the agent processes
            async with target.typing():
                run = await asyncio.wait_for(
-                    asyncio.to_thread(
-                        agent.run, content, stream=False, session_id=session_id
-                    ),
+                    asyncio.to_thread(agent.run, content, stream=False, session_id=session_id),
                    timeout=300,
                )
            response = run.content if hasattr(run, "content") else str(run)
@@ -374,7 +370,9 @@ class DiscordVendor(ChatPlatform):
            response = "Sorry, that took too long. Please try a simpler request."
        except Exception as exc:
            logger.error("Discord: agent.run() failed: %s", exc)
-            response = "I'm having trouble reaching my language model right now. Please try again shortly."
+            response = (
+                "I'm having trouble reaching my language model right now. Please try again shortly."
+            )

        # Strip hallucinated tool-call JSON and chain-of-thought narration
        from timmy.session import _clean_response
@@ -408,6 +406,7 @@ class DiscordVendor(ChatPlatform):

            # Create a thread from this message
            from config import settings
+
            thread_name = f"{settings.agent_name} | {message.author.display_name}"
            thread = await message.create_thread(
                name=thread_name[:100],
--- a/src/integrations/paperclip/models.py
+++ b/src/integrations/paperclip/models.py
@@ -7,7 +7,6 @@ from typing import Any, Dict, List, Optional

 from pydantic import BaseModel, Field

-
 # ── Inbound: Paperclip → Timmy ──────────────────────────────────────────────


--- a/src/integrations/paperclip/task_runner.py
+++ b/src/integrations/paperclip/task_runner.py
@@ -20,7 +20,8 @@ import logging
 from typing import Any, Callable, Coroutine, Dict, List, Optional, Protocol, runtime_checkable

 from config import settings
-from integrations.paperclip.bridge import PaperclipBridge, bridge as default_bridge
+from integrations.paperclip.bridge import PaperclipBridge
+from integrations.paperclip.bridge import bridge as default_bridge
 from integrations.paperclip.models import PaperclipIssue

 logger = logging.getLogger(__name__)
@@ -30,9 +31,8 @@ logger = logging.getLogger(__name__)
 class Orchestrator(Protocol):
    """Anything with an ``execute_task`` matching Timmy's orchestrator."""

-    async def execute_task(
-        self, task_id: str, description: str, context: dict
-    ) -> Any: ...
+    async def execute_task(self, task_id: str, description: str, context: dict) -> Any:
+        ...


 def _wrap_orchestrator(orch: Orchestrator) -> Callable:
@@ -125,7 +125,9 @@ class TaskRunner:
        # Mark the issue as done
        return await self.bridge.close_issue(issue.id, comment=None)

-    async def create_follow_up(self, original: PaperclipIssue, result: str) -> Optional[PaperclipIssue]:
+    async def create_follow_up(
+        self, original: PaperclipIssue, result: str
+    ) -> Optional[PaperclipIssue]:
        """Create a recursive follow-up task for Timmy.

        Timmy muses about task automation and writes a follow-up issue
--- a/src/integrations/shortcuts/siri.py
+++ b/src/integrations/shortcuts/siri.py
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
@dataclass
 class ShortcutAction:
    """Describes a Siri Shortcut action for the setup guide."""
+
    name: str
    endpoint: str
    method: str
--- a/src/integrations/telegram_bot/bot.py
+++ b/src/integrations/telegram_bot/bot.py
@@ -54,6 +54,7 @@ class TelegramBot:
            return from_file
        try:
            from config import settings
+
            return settings.telegram_token or None
        except Exception:
            return None
@@ -94,10 +95,7 @@ class TelegramBot:
                filters,
            )
        except ImportError:
-            logger.error(
-                "python-telegram-bot is not installed. "
-                'Run: pip install ".[telegram]"'
-            )
+            logger.error("python-telegram-bot is not installed. " 'Run: pip install ".[telegram]"')
            return False

        try:
@@ -149,6 +147,7 @@ class TelegramBot:
        user_text = update.message.text
        try:
            from timmy.agent import create_timmy
+
            agent = create_timmy()
            run = await asyncio.to_thread(agent.run, user_text, stream=False)
            response = run.content if hasattr(run, "content") else str(run)
--- a/src/integrations/voice/nlu.py
+++ b/src/integrations/voice/nlu.py
@@ -15,8 +15,8 @@ Intents:
  - unknown:    Unrecognized intent
 """

-import re
 import logging
+import re
 from dataclasses import dataclass
 from typing import Optional

@@ -35,47 +35,68 @@ class Intent:

 _PATTERNS: list[tuple[str, re.Pattern, float]] = [
    # Status queries
-    ("status", re.compile(
-        r"\b(status|health|how are you|are you (running|online|alive)|check)\b",
-        re.IGNORECASE,
-    ), 0.9),
-
+    (
+        "status",
+        re.compile(
+            r"\b(status|health|how are you|are you (running|online|alive)|check)\b",
+            re.IGNORECASE,
+        ),
+        0.9,
+    ),
    # Swarm commands
-    ("swarm", re.compile(
-        r"\b(swarm|spawn|agents?|sub-?agents?|workers?)\b",
-        re.IGNORECASE,
-    ), 0.85),
-
+    (
+        "swarm",
+        re.compile(
+            r"\b(swarm|spawn|agents?|sub-?agents?|workers?)\b",
+            re.IGNORECASE,
+        ),
+        0.85,
+    ),
    # Task commands
-    ("task", re.compile(
-        r"\b(task|assign|create task|new task|post task|bid)\b",
-        re.IGNORECASE,
-    ), 0.85),
-
+    (
+        "task",
+        re.compile(
+            r"\b(task|assign|create task|new task|post task|bid)\b",
+            re.IGNORECASE,
+        ),
+        0.85,
+    ),
    # Help
-    ("help", re.compile(
-        r"\b(help|commands?|what can you do|capabilities)\b",
-        re.IGNORECASE,
-    ), 0.9),
-
+    (
+        "help",
+        re.compile(
+            r"\b(help|commands?|what can you do|capabilities)\b",
+            re.IGNORECASE,
+        ),
+        0.9,
+    ),
    # Voice settings
-    ("voice", re.compile(
-        r"\b(voice|speak|volume|rate|speed|louder|quieter|faster|slower|mute|unmute)\b",
-        re.IGNORECASE,
-    ), 0.85),
-
+    (
+        "voice",
+        re.compile(
+            r"\b(voice|speak|volume|rate|speed|louder|quieter|faster|slower|mute|unmute)\b",
+            re.IGNORECASE,
+        ),
+        0.85,
+    ),
    # Code modification / self-modify
-    ("code", re.compile(
-        r"\b(modify|edit|change|update|fix|refactor|implement|patch)\s+(the\s+)?(code|file|function|class|module|source)\b"
-        r"|\bself[- ]?modify\b"
-        r"|\b(update|change|edit)\s+(your|the)\s+(code|source)\b",
-        re.IGNORECASE,
-    ), 0.9),
+    (
+        "code",
+        re.compile(
+            r"\b(modify|edit|change|update|fix|refactor|implement|patch)\s+(the\s+)?(code|file|function|class|module|source)\b"
+            r"|\bself[- ]?modify\b"
+            r"|\b(update|change|edit)\s+(your|the)\s+(code|source)\b",
+            re.IGNORECASE,
+        ),
+        0.9,
+    ),
 ]

 # Keywords for entity extraction
 _ENTITY_PATTERNS = {
-    "agent_name": re.compile(r"(?:spawn|start)\s+(?:agent\s+)?(\w+)|(?:agent)\s+(\w+)", re.IGNORECASE),
+    "agent_name": re.compile(
+        r"(?:spawn|start)\s+(?:agent\s+)?(\w+)|(?:agent)\s+(\w+)", re.IGNORECASE
+    ),
    "task_description": re.compile(r"(?:task|assign)[:;]?\s+(.+)", re.IGNORECASE),
    "number": re.compile(r"\b(\d+)\b"),
    "target_file": re.compile(r"(?:in|file|modify)\s+(?:the\s+)?([/\w._-]+\.py)", re.IGNORECASE),
--- a/src/spark/advisor.py
+++ b/src/spark/advisor.py
@@ -17,8 +17,8 @@ from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Optional

-from spark import memory as spark_memory
 from spark import eidos as spark_eidos
+from spark import memory as spark_memory

 logger = logging.getLogger(__name__)

@@ -29,10 +29,11 @@ _MIN_EVENTS = 3
@dataclass
 class Advisory:
    """A single ranked recommendation."""
-    category: str          # agent_performance, bid_optimization, etc.
-    priority: float        # 0.0–1.0 (higher = more urgent)
-    title: str             # Short headline
-    detail: str            # Longer explanation
+
+    category: str  # agent_performance, bid_optimization, etc.
+    priority: float  # 0.0–1.0 (higher = more urgent)
+    title: str  # Short headline
+    detail: str  # Longer explanation
    suggested_action: str  # What to do about it
    subject: Optional[str] = None  # agent_id or None for system-level
    evidence_count: int = 0  # Number of supporting events
@@ -47,15 +48,17 @@ def generate_advisories() -> list[Advisory]:

    event_count = spark_memory.count_events()
    if event_count < _MIN_EVENTS:
-        advisories.append(Advisory(
-            category="system_health",
-            priority=0.3,
-            title="Insufficient data",
-            detail=f"Only {event_count} events captured. "
-                   f"Spark needs at least {_MIN_EVENTS} events to generate insights.",
-            suggested_action="Run more swarm tasks to build intelligence.",
-            evidence_count=event_count,
-        ))
+        advisories.append(
+            Advisory(
+                category="system_health",
+                priority=0.3,
+                title="Insufficient data",
+                detail=f"Only {event_count} events captured. "
+                f"Spark needs at least {_MIN_EVENTS} events to generate insights.",
+                suggested_action="Run more swarm tasks to build intelligence.",
+                evidence_count=event_count,
+            )
+        )
        return advisories

    advisories.extend(_check_failure_patterns())
@@ -82,18 +85,20 @@ def _check_failure_patterns() -> list[Advisory]:

    for aid, count in agent_failures.items():
        if count >= 2:
-            results.append(Advisory(
-                category="failure_prevention",
-                priority=min(1.0, 0.5 + count * 0.15),
-                title=f"Agent {aid[:8]} has {count} failures",
-                detail=f"Agent {aid[:8]}... has failed {count} recent tasks. "
-                       f"This pattern may indicate a capability mismatch or "
-                       f"configuration issue.",
-                suggested_action=f"Review task types assigned to {aid[:8]}... "
-                                 f"and consider adjusting routing preferences.",
-                subject=aid,
-                evidence_count=count,
-            ))
+            results.append(
+                Advisory(
+                    category="failure_prevention",
+                    priority=min(1.0, 0.5 + count * 0.15),
+                    title=f"Agent {aid[:8]} has {count} failures",
+                    detail=f"Agent {aid[:8]}... has failed {count} recent tasks. "
+                    f"This pattern may indicate a capability mismatch or "
+                    f"configuration issue.",
+                    suggested_action=f"Review task types assigned to {aid[:8]}... "
+                    f"and consider adjusting routing preferences.",
+                    subject=aid,
+                    evidence_count=count,
+                )
+            )

    return results

@@ -128,27 +133,31 @@ def _check_agent_performance() -> list[Advisory]:

        rate = wins / total
        if rate >= 0.8 and total >= 3:
-            results.append(Advisory(
-                category="agent_performance",
-                priority=0.6,
-                title=f"Agent {aid[:8]} excels ({rate:.0%} success)",
-                detail=f"Agent {aid[:8]}... has completed {wins}/{total} tasks "
-                       f"successfully. Consider routing more tasks to this agent.",
-                suggested_action="Increase task routing weight for this agent.",
-                subject=aid,
-                evidence_count=total,
-            ))
+            results.append(
+                Advisory(
+                    category="agent_performance",
+                    priority=0.6,
+                    title=f"Agent {aid[:8]} excels ({rate:.0%} success)",
+                    detail=f"Agent {aid[:8]}... has completed {wins}/{total} tasks "
+                    f"successfully. Consider routing more tasks to this agent.",
+                    suggested_action="Increase task routing weight for this agent.",
+                    subject=aid,
+                    evidence_count=total,
+                )
+            )
        elif rate <= 0.3 and total >= 3:
-            results.append(Advisory(
-                category="agent_performance",
-                priority=0.75,
-                title=f"Agent {aid[:8]} struggling ({rate:.0%} success)",
-                detail=f"Agent {aid[:8]}... has only succeeded on {wins}/{total} tasks. "
-                       f"May need different task types or capability updates.",
-                suggested_action="Review this agent's capabilities and assigned task types.",
-                subject=aid,
-                evidence_count=total,
-            ))
+            results.append(
+                Advisory(
+                    category="agent_performance",
+                    priority=0.75,
+                    title=f"Agent {aid[:8]} struggling ({rate:.0%} success)",
+                    detail=f"Agent {aid[:8]}... has only succeeded on {wins}/{total} tasks. "
+                    f"May need different task types or capability updates.",
+                    suggested_action="Review this agent's capabilities and assigned task types.",
+                    subject=aid,
+                    evidence_count=total,
+                )
+            )

    return results

@@ -181,27 +190,31 @@ def _check_bid_patterns() -> list[Advisory]:
    spread = max_bid - min_bid

    if spread > avg_bid * 1.5:
-        results.append(Advisory(
-            category="bid_optimization",
-            priority=0.5,
-            title=f"Wide bid spread ({min_bid}–{max_bid} sats)",
-            detail=f"Bids range from {min_bid} to {max_bid} sats "
-                   f"(avg {avg_bid:.0f}). Large spread may indicate "
-                   f"inefficient auction dynamics.",
-            suggested_action="Review agent bid strategies for consistency.",
-            evidence_count=len(bid_amounts),
-        ))
+        results.append(
+            Advisory(
+                category="bid_optimization",
+                priority=0.5,
+                title=f"Wide bid spread ({min_bid}–{max_bid} sats)",
+                detail=f"Bids range from {min_bid} to {max_bid} sats "
+                f"(avg {avg_bid:.0f}). Large spread may indicate "
+                f"inefficient auction dynamics.",
+                suggested_action="Review agent bid strategies for consistency.",
+                evidence_count=len(bid_amounts),
+            )
+        )

    if avg_bid > 70:
-        results.append(Advisory(
-            category="bid_optimization",
-            priority=0.45,
-            title=f"High average bid ({avg_bid:.0f} sats)",
-            detail=f"The swarm average bid is {avg_bid:.0f} sats across "
-                   f"{len(bid_amounts)} bids. This may be above optimal.",
-            suggested_action="Consider adjusting base bid rates for persona agents.",
-            evidence_count=len(bid_amounts),
-        ))
+        results.append(
+            Advisory(
+                category="bid_optimization",
+                priority=0.45,
+                title=f"High average bid ({avg_bid:.0f} sats)",
+                detail=f"The swarm average bid is {avg_bid:.0f} sats across "
+                f"{len(bid_amounts)} bids. This may be above optimal.",
+                suggested_action="Consider adjusting base bid rates for persona agents.",
+                evidence_count=len(bid_amounts),
+            )
+        )

    return results

@@ -216,27 +229,31 @@ def _check_prediction_accuracy() -> list[Advisory]:

    avg = stats["avg_accuracy"]
    if avg < 0.4:
-        results.append(Advisory(
-            category="system_health",
-            priority=0.65,
-            title=f"Low prediction accuracy ({avg:.0%})",
-            detail=f"EIDOS predictions have averaged {avg:.0%} accuracy "
-                   f"over {stats['evaluated']} evaluations. The learning "
-                   f"model needs more data or the swarm behaviour is changing.",
-            suggested_action="Continue running tasks; accuracy should improve "
-                             "as the model accumulates more training data.",
-            evidence_count=stats["evaluated"],
-        ))
+        results.append(
+            Advisory(
+                category="system_health",
+                priority=0.65,
+                title=f"Low prediction accuracy ({avg:.0%})",
+                detail=f"EIDOS predictions have averaged {avg:.0%} accuracy "
+                f"over {stats['evaluated']} evaluations. The learning "
+                f"model needs more data or the swarm behaviour is changing.",
+                suggested_action="Continue running tasks; accuracy should improve "
+                "as the model accumulates more training data.",
+                evidence_count=stats["evaluated"],
+            )
+        )
    elif avg >= 0.75:
-        results.append(Advisory(
-            category="system_health",
-            priority=0.3,
-            title=f"Strong prediction accuracy ({avg:.0%})",
-            detail=f"EIDOS predictions are performing well at {avg:.0%} "
-                   f"average accuracy over {stats['evaluated']} evaluations.",
-            suggested_action="No action needed. Spark intelligence is learning effectively.",
-            evidence_count=stats["evaluated"],
-        ))
+        results.append(
+            Advisory(
+                category="system_health",
+                priority=0.3,
+                title=f"Strong prediction accuracy ({avg:.0%})",
+                detail=f"EIDOS predictions are performing well at {avg:.0%} "
+                f"average accuracy over {stats['evaluated']} evaluations.",
+                suggested_action="No action needed. Spark intelligence is learning effectively.",
+                evidence_count=stats["evaluated"],
+            )
+        )

    return results

@@ -247,14 +264,16 @@ def _check_system_activity() -> list[Advisory]:
    recent = spark_memory.get_events(limit=5)

    if not recent:
-        results.append(Advisory(
-            category="system_health",
-            priority=0.4,
-            title="No swarm activity detected",
-            detail="Spark has not captured any events. "
-                   "The swarm may be idle or Spark event capture is not active.",
-            suggested_action="Post a task to the swarm to activate the pipeline.",
-        ))
+        results.append(
+            Advisory(
+                category="system_health",
+                priority=0.4,
+                title="No swarm activity detected",
+                detail="Spark has not captured any events. "
+                "The swarm may be idle or Spark event capture is not active.",
+                suggested_action="Post a task to the swarm to activate the pipeline.",
+            )
+        )
        return results

    # Check event type distribution
@@ -265,14 +284,16 @@ def _check_system_activity() -> list[Advisory]:

    if "task_completed" not in type_counts and "task_failed" not in type_counts:
        if type_counts.get("task_posted", 0) > 3:
-            results.append(Advisory(
-                category="system_health",
-                priority=0.6,
-                title="Tasks posted but none completing",
-                detail=f"{type_counts.get('task_posted', 0)} tasks posted "
-                       f"but no completions or failures recorded.",
-                suggested_action="Check agent availability and auction configuration.",
-                evidence_count=type_counts.get("task_posted", 0),
-            ))
+            results.append(
+                Advisory(
+                    category="system_health",
+                    priority=0.6,
+                    title="Tasks posted but none completing",
+                    detail=f"{type_counts.get('task_posted', 0)} tasks posted "
+                    f"but no completions or failures recorded.",
+                    suggested_action="Check agent availability and auction configuration.",
+                    evidence_count=type_counts.get("task_posted", 0),
+                )
+            )

    return results
--- a/src/spark/eidos.py
+++ b/src/spark/eidos.py
@@ -29,12 +29,13 @@ DB_PATH = Path("data/spark.db")
@dataclass
 class Prediction:
    """A prediction made by the EIDOS loop."""
+
    id: str
    task_id: str
-    prediction_type: str     # outcome, best_agent, bid_range
-    predicted_value: str     # JSON-encoded prediction
-    actual_value: Optional[str]   # JSON-encoded actual (filled on evaluation)
-    accuracy: Optional[float]     # 0.0–1.0 (filled on evaluation)
+    prediction_type: str  # outcome, best_agent, bid_range
+    predicted_value: str  # JSON-encoded prediction
+    actual_value: Optional[str]  # JSON-encoded actual (filled on evaluation)
+    accuracy: Optional[float]  # 0.0–1.0 (filled on evaluation)
    created_at: str
    evaluated_at: Optional[str]

@@ -57,18 +58,15 @@ def _get_conn() -> sqlite3.Connection:
        )
        """
    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_pred_task ON spark_predictions(task_id)"
-    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_pred_type ON spark_predictions(prediction_type)"
-    )
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_pred_task ON spark_predictions(task_id)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_pred_type ON spark_predictions(prediction_type)")
    conn.commit()
    return conn


 # ── Prediction phase ────────────────────────────────────────────────────────

+
 def predict_task_outcome(
    task_id: str,
    task_description: str,
@@ -104,12 +102,8 @@ def predict_task_outcome(

        if best_agent:
            prediction["likely_winner"] = best_agent
-            prediction["success_probability"] = round(
-                min(1.0, 0.5 + best_rate * 0.4), 2
-            )
-            prediction["reasoning"] = (
-                f"agent {best_agent[:8]} has {best_rate:.0%} success rate"
-            )
+            prediction["success_probability"] = round(min(1.0, 0.5 + best_rate * 0.4), 2)
+            prediction["reasoning"] = f"agent {best_agent[:8]} has {best_rate:.0%} success rate"

        # Adjust bid range from history
        all_bids = []
@@ -144,6 +138,7 @@ def predict_task_outcome(

 # ── Evaluation phase ────────────────────────────────────────────────────────

+
 def evaluate_prediction(
    task_id: str,
    actual_winner: Optional[str],
@@ -242,6 +237,7 @@ def _compute_accuracy(predicted: dict, actual: dict) -> float:

 # ── Query helpers ──────────────────────────────────────────────────────────

+
 def get_predictions(
    task_id: Optional[str] = None,
    evaluated_only: bool = False,
--- a/src/spark/engine.py
+++ b/src/spark/engine.py
@@ -76,7 +76,10 @@ class SparkEngine:
        return event_id

    def on_bid_submitted(
-        self, task_id: str, agent_id: str, bid_sats: int,
+        self,
+        task_id: str,
+        agent_id: str,
+        bid_sats: int,
    ) -> Optional[str]:
        """Capture a bid event."""
        if not self._enabled:
@@ -90,12 +93,13 @@ class SparkEngine:
            data=json.dumps({"bid_sats": bid_sats}),
        )

-        logger.debug("Spark: captured bid %s→%s (%d sats)",
-                      agent_id[:8], task_id[:8], bid_sats)
+        logger.debug("Spark: captured bid %s→%s (%d sats)", agent_id[:8], task_id[:8], bid_sats)
        return event_id

    def on_task_assigned(
-        self, task_id: str, agent_id: str,
+        self,
+        task_id: str,
+        agent_id: str,
    ) -> Optional[str]:
        """Capture a task-assigned event."""
        if not self._enabled:
@@ -108,8 +112,7 @@ class SparkEngine:
            task_id=task_id,
        )

-        logger.debug("Spark: captured assignment %s→%s",
-                      task_id[:8], agent_id[:8])
+        logger.debug("Spark: captured assignment %s→%s", task_id[:8], agent_id[:8])
        return event_id

    def on_task_completed(
@@ -128,10 +131,12 @@ class SparkEngine:
            description=f"Task completed by {agent_id[:8]}",
            agent_id=agent_id,
            task_id=task_id,
-            data=json.dumps({
-                "result_length": len(result),
-                "winning_bid": winning_bid,
-            }),
+            data=json.dumps(
+                {
+                    "result_length": len(result),
+                    "winning_bid": winning_bid,
+                }
+            ),
        )

        # Evaluate EIDOS prediction
@@ -154,8 +159,7 @@ class SparkEngine:
        # Consolidate memory if enough events for this agent
        self._maybe_consolidate(agent_id)

-        logger.debug("Spark: captured completion %s by %s",
-                      task_id[:8], agent_id[:8])
+        logger.debug("Spark: captured completion %s by %s", task_id[:8], agent_id[:8])
        return event_id

    def on_task_failed(
@@ -186,8 +190,7 @@ class SparkEngine:
        # Failures always worth consolidating
        self._maybe_consolidate(agent_id)

-        logger.debug("Spark: captured failure %s by %s",
-                      task_id[:8], agent_id[:8])
+        logger.debug("Spark: captured failure %s by %s", task_id[:8], agent_id[:8])
        return event_id

    def on_agent_joined(self, agent_id: str, name: str) -> Optional[str]:
@@ -288,7 +291,7 @@ class SparkEngine:
                memory_type="pattern",
                subject=agent_id,
                content=f"Agent {agent_id[:8]} has a strong track record: "
-                        f"{len(completions)}/{total} tasks completed successfully.",
+                f"{len(completions)}/{total} tasks completed successfully.",
                confidence=min(0.95, 0.6 + total * 0.05),
                source_events=total,
            )
@@ -297,7 +300,7 @@ class SparkEngine:
                memory_type="anomaly",
                subject=agent_id,
                content=f"Agent {agent_id[:8]} is struggling: only "
-                        f"{len(completions)}/{total} tasks completed.",
+                f"{len(completions)}/{total} tasks completed.",
                confidence=min(0.95, 0.6 + total * 0.05),
                source_events=total,
            )
@@ -347,6 +350,7 @@ class SparkEngine:
 def _create_engine() -> SparkEngine:
    try:
        from config import settings
+
        return SparkEngine(enabled=settings.spark_enabled)
    except Exception:
        return SparkEngine(enabled=True)
--- a/src/spark/memory.py
+++ b/src/spark/memory.py
@@ -28,25 +28,27 @@ IMPORTANCE_HIGH = 0.8
@dataclass
 class SparkEvent:
    """A single captured swarm event."""
+
    id: str
-    event_type: str          # task_posted, bid, assignment, completion, failure
+    event_type: str  # task_posted, bid, assignment, completion, failure
    agent_id: Optional[str]
    task_id: Optional[str]
    description: str
-    data: str                # JSON payload
-    importance: float        # 0.0–1.0
+    data: str  # JSON payload
+    importance: float  # 0.0–1.0
    created_at: str


@dataclass
 class SparkMemory:
    """A consolidated memory distilled from event patterns."""
+
    id: str
-    memory_type: str         # pattern, insight, anomaly
-    subject: str             # agent_id or "system"
-    content: str             # Human-readable insight
-    confidence: float        # 0.0–1.0
-    source_events: int       # How many events contributed
+    memory_type: str  # pattern, insight, anomaly
+    subject: str  # agent_id or "system"
+    content: str  # Human-readable insight
+    confidence: float  # 0.0–1.0
+    source_events: int  # How many events contributed
    created_at: str
    expires_at: Optional[str]

@@ -83,24 +85,17 @@ def _get_conn() -> sqlite3.Connection:
        )
        """
    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_events_type ON spark_events(event_type)"
-    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_events_agent ON spark_events(agent_id)"
-    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_events_task ON spark_events(task_id)"
-    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_memories_subject ON spark_memories(subject)"
-    )
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_events_type ON spark_events(event_type)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_events_agent ON spark_events(agent_id)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_events_task ON spark_events(task_id)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_subject ON spark_memories(subject)")
    conn.commit()
    return conn


 # ── Importance scoring ──────────────────────────────────────────────────────

+
 def score_importance(event_type: str, data: dict) -> float:
    """Compute importance score for an event (0.0–1.0).

@@ -132,6 +127,7 @@ def score_importance(event_type: str, data: dict) -> float:

 # ── Event recording ─────────────────────────────────────────────────────────

+
 def record_event(
    event_type: str,
    description: str,
@@ -142,6 +138,7 @@ def record_event(
 ) -> str:
    """Record a swarm event.  Returns the event id."""
    import json
+
    event_id = str(uuid.uuid4())
    now = datetime.now(timezone.utc).isoformat()

@@ -224,6 +221,7 @@ def count_events(event_type: Optional[str] = None) -> int:

 # ── Memory consolidation ───────────────────────────────────────────────────

+
 def store_memory(
    memory_type: str,
    subject: str,
--- a/src/swarm/event_log.py
+++ b/src/swarm/event_log.py
@@ -73,7 +73,8 @@ def _ensure_db() -> sqlite3.Connection:
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(DB_PATH))
    conn.row_factory = sqlite3.Row
-    conn.execute("""
+    conn.execute(
+        """
        CREATE TABLE IF NOT EXISTS events (
            id TEXT PRIMARY KEY,
            event_type TEXT NOT NULL,
@@ -83,7 +84,8 @@ def _ensure_db() -> sqlite3.Connection:
            data TEXT DEFAULT '{}',
            timestamp TEXT NOT NULL
        )
-    """)
+    """
+    )
    conn.commit()
    return conn

@@ -119,8 +121,15 @@ def log_event(
            db.execute(
                "INSERT INTO events (id, event_type, source, task_id, agent_id, data, timestamp) "
                "VALUES (?, ?, ?, ?, ?, ?, ?)",
-                (entry.id, event_type.value, source, task_id, agent_id,
-                 json.dumps(data or {}), entry.timestamp),
+                (
+                    entry.id,
+                    event_type.value,
+                    source,
+                    task_id,
+                    agent_id,
+                    json.dumps(data or {}),
+                    entry.timestamp,
+                ),
            )
            db.commit()
        finally:
@@ -131,6 +140,7 @@ def log_event(
    # Broadcast to WebSocket clients (non-blocking)
    try:
        from infrastructure.events.broadcaster import event_broadcaster
+
        event_broadcaster.broadcast_sync(entry)
    except Exception:
        pass
@@ -157,13 +167,15 @@ def get_task_events(task_id: str, limit: int = 50) -> list[EventLogEntry]:
            et = EventType(r["event_type"])
        except ValueError:
            et = EventType.SYSTEM_INFO
-        entries.append(EventLogEntry(
-            id=r["id"],
-            event_type=et,
-            source=r["source"],
-            timestamp=r["timestamp"],
-            data=json.loads(r["data"]) if r["data"] else {},
-            task_id=r["task_id"],
-            agent_id=r["agent_id"],
-        ))
+        entries.append(
+            EventLogEntry(
+                id=r["id"],
+                event_type=et,
+                source=r["source"],
+                timestamp=r["timestamp"],
+                data=json.loads(r["data"]) if r["data"] else {},
+                task_id=r["task_id"],
+                agent_id=r["agent_id"],
+            )
+        )
    return entries
--- a/src/swarm/task_queue/models.py
+++ b/src/swarm/task_queue/models.py
@@ -29,7 +29,8 @@ def _ensure_db() -> sqlite3.Connection:
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(DB_PATH))
    conn.row_factory = sqlite3.Row
-    conn.execute("""
+    conn.execute(
+        """
        CREATE TABLE IF NOT EXISTS tasks (
            id TEXT PRIMARY KEY,
            title TEXT NOT NULL,
@@ -42,7 +43,8 @@ def _ensure_db() -> sqlite3.Connection:
            created_at TEXT DEFAULT (datetime('now')),
            completed_at TEXT
        )
-    """)
+    """
+    )
    conn.commit()
    return conn

@@ -103,9 +105,7 @@ def get_task_summary_for_briefing() -> dict:
    """Return a summary of task counts by status for the morning briefing."""
    db = _ensure_db()
    try:
-        rows = db.execute(
-            "SELECT status, COUNT(*) as cnt FROM tasks GROUP BY status"
-        ).fetchall()
+        rows = db.execute("SELECT status, COUNT(*) as cnt FROM tasks GROUP BY status").fetchall()
    finally:
        db.close()

--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -74,8 +74,8 @@ def _pull_model(model_name: str) -> bool:
        True if successful or model already exists
    """
    try:
-        import urllib.request
        import json
+        import urllib.request

        logger.info("Pulling model: %s", model_name)

@@ -134,25 +134,16 @@ def _resolve_model_with_fallback(

    for fallback_model in fallback_chain:
        if _check_model_available(fallback_model):
-            logger.warning(
-                "Using fallback model %s (requested: %s)",
-                fallback_model, model
-            )
+            logger.warning("Using fallback model %s (requested: %s)", fallback_model, model)
            return fallback_model, True

        # Try to pull the fallback
        if auto_pull and _pull_model(fallback_model):
-            logger.info(
-                "Pulled and using fallback model %s (requested: %s)",
-                fallback_model, model
-            )
+            logger.info("Pulled and using fallback model %s (requested: %s)", fallback_model, model)
            return fallback_model, True

    # Absolute last resort - return the requested model and hope for the best
-    logger.error(
-        "No models available in fallback chain. Requested: %s",
-        model
-    )
+    logger.error("No models available in fallback chain. Requested: %s", model)
    return model, False


@@ -190,6 +181,7 @@ def _resolve_backend(requested: str | None) -> str:

    # "auto" path — lazy import to keep startup fast and tests clean.
    from timmy.backends import airllm_available, claude_available, grok_available, is_apple_silicon
+
    if is_apple_silicon() and airllm_available():
        return "airllm"
    return "ollama"
@@ -215,14 +207,17 @@ def create_timmy(

    if resolved == "claude":
        from timmy.backends import ClaudeBackend
+
        return ClaudeBackend()

    if resolved == "grok":
        from timmy.backends import GrokBackend
+
        return GrokBackend()

    if resolved == "airllm":
        from timmy.backends import TimmyAirLLMAgent
+
        return TimmyAirLLMAgent(model_size=size)

    # Default: Ollama via Agno.
@@ -236,11 +231,11 @@ def create_timmy(
    # If Ollama is completely unreachable, fall back to Claude if available
    if not _check_model_available(model_name):
        from timmy.backends import claude_available
+
        if claude_available():
-            logger.warning(
-                "Ollama unreachable — falling back to Claude backend"
-            )
+            logger.warning("Ollama unreachable — falling back to Claude backend")
            from timmy.backends import ClaudeBackend
+
            return ClaudeBackend()

    if is_fallback:
@@ -259,6 +254,7 @@ def create_timmy(
    # Try to load memory context
    try:
        from timmy.memory_system import memory_system
+
        memory_context = memory_system.get_system_context()
        if memory_context:
            # Truncate if too long — smaller budget for small models
--- a/src/timmy/agent_core/interface.py
+++ b/src/timmy/agent_core/interface.py
@@ -16,38 +16,41 @@ Architecture:
 All methods return effects that can be logged, audited, and replayed.
 """

+import uuid
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from enum import Enum, auto
 from typing import Any, Optional
-import uuid


 class PerceptionType(Enum):
    """Types of sensory input an agent can receive."""
-    TEXT = auto()        # Natural language
-    IMAGE = auto()       # Visual input
-    AUDIO = auto()       # Sound/speech
-    SENSOR = auto()      # Temperature, distance, etc.
-    MOTION = auto()      # Accelerometer, gyroscope
-    NETWORK = auto()     # API calls, messages
-    INTERNAL = auto()    # Self-monitoring (battery, temp)
+
+    TEXT = auto()  # Natural language
+    IMAGE = auto()  # Visual input
+    AUDIO = auto()  # Sound/speech
+    SENSOR = auto()  # Temperature, distance, etc.
+    MOTION = auto()  # Accelerometer, gyroscope
+    NETWORK = auto()  # API calls, messages
+    INTERNAL = auto()  # Self-monitoring (battery, temp)


 class ActionType(Enum):
    """Types of actions an agent can perform."""
-    TEXT = auto()        # Generate text response
-    SPEAK = auto()       # Text-to-speech
-    MOVE = auto()        # Physical movement
-    GRIP = auto()        # Manipulate objects
-    CALL = auto()        # API/network call
-    EMIT = auto()        # Signal/light/sound
-    SLEEP = auto()       # Power management
+
+    TEXT = auto()  # Generate text response
+    SPEAK = auto()  # Text-to-speech
+    MOVE = auto()  # Physical movement
+    GRIP = auto()  # Manipulate objects
+    CALL = auto()  # API/network call
+    EMIT = auto()  # Signal/light/sound
+    SLEEP = auto()  # Power management


 class AgentCapability(Enum):
    """High-level capabilities a TimAgent may possess."""
+
    REASONING = "reasoning"
    CODING = "coding"
    WRITING = "writing"
@@ -67,6 +70,7 @@ class AgentIdentity:
    This persists across sessions and substrates. If Timmy moves
    from cloud to robot, the identity follows.
    """
+
    id: str
    name: str
    version: str
@@ -89,6 +93,7 @@ class Perception:
    Substrate-agnostic representation. A camera image and a
    LiDAR point cloud are both Perception instances.
    """
+
    type: PerceptionType
    data: Any  # Content depends on type
    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
@@ -121,6 +126,7 @@ class Action:
    Actions are effects — they describe what should happen,
    not how. The substrate implements the "how."
    """
+
    type: ActionType
    payload: Any  # Action-specific data
    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
@@ -152,6 +158,7 @@ class Memory:
    Memories are substrate-agnostic. A conversation history
    and a video recording are both Memory instances.
    """
+
    id: str
    content: Any
    created_at: str
@@ -169,6 +176,7 @@ class Memory:
@dataclass
 class Communication:
    """A message to/from another agent or human."""
+
    sender: str
    recipient: str
    content: Any
@@ -336,32 +344,38 @@ class AgentEffect:

    def log_perceive(self, perception: Perception, memory_id: str) -> None:
        """Log a perception event."""
-        self._effects.append({
-            "type": "perceive",
-            "perception_type": perception.type.name,
-            "source": perception.source,
-            "memory_id": memory_id,
-            "timestamp": datetime.now(timezone.utc).isoformat(),
-        })
+        self._effects.append(
+            {
+                "type": "perceive",
+                "perception_type": perception.type.name,
+                "source": perception.source,
+                "memory_id": memory_id,
+                "timestamp": datetime.now(timezone.utc).isoformat(),
+            }
+        )

    def log_reason(self, query: str, action_type: ActionType) -> None:
        """Log a reasoning event."""
-        self._effects.append({
-            "type": "reason",
-            "query": query,
-            "action_type": action_type.name,
-            "timestamp": datetime.now(timezone.utc).isoformat(),
-        })
+        self._effects.append(
+            {
+                "type": "reason",
+                "query": query,
+                "action_type": action_type.name,
+                "timestamp": datetime.now(timezone.utc).isoformat(),
+            }
+        )

    def log_act(self, action: Action, result: Any) -> None:
        """Log an action event."""
-        self._effects.append({
-            "type": "act",
-            "action_type": action.type.name,
-            "confidence": action.confidence,
-            "result_type": type(result).__name__,
-            "timestamp": datetime.now(timezone.utc).isoformat(),
-        })
+        self._effects.append(
+            {
+                "type": "act",
+                "action_type": action.type.name,
+                "confidence": action.confidence,
+                "result_type": type(result).__name__,
+                "timestamp": datetime.now(timezone.utc).isoformat(),
+            }
+        )

    def export(self) -> list[dict]:
        """Export effect log for analysis."""
--- a/src/timmy/agent_core/ollama_adapter.py
+++ b/src/timmy/agent_core/ollama_adapter.py
@@ -19,19 +19,19 @@ Usage:

 from typing import Any, Optional

+from timmy.agent import _resolve_model_with_fallback, create_timmy
 from timmy.agent_core.interface import (
-    AgentCapability,
-    AgentIdentity,
-    Perception,
-    PerceptionType,
    Action,
    ActionType,
-    Memory,
-    Communication,
-    TimAgent,
+    AgentCapability,
    AgentEffect,
+    AgentIdentity,
+    Communication,
+    Memory,
+    Perception,
+    PerceptionType,
+    TimAgent,
 )
-from timmy.agent import create_timmy, _resolve_model_with_fallback


 class OllamaAgent(TimAgent):
@@ -74,6 +74,7 @@ class OllamaAgent(TimAgent):

        if is_fallback:
            import logging
+
            logging.getLogger(__name__).info(
                "OllamaAdapter using fallback model %s", resolved_model
            )
--- a/src/timmy/agentic_loop.py
+++ b/src/timmy/agentic_loop.py
@@ -30,9 +30,11 @@ logger = logging.getLogger(__name__)
 # Data structures
 # ---------------------------------------------------------------------------

+
@dataclass
 class AgenticStep:
    """Result of a single step in the agentic loop."""
+
    step_num: int
    description: str
    result: str
@@ -43,6 +45,7 @@ class AgenticStep:
@dataclass
 class AgenticResult:
    """Final result of the entire agentic loop."""
+
    task_id: str
    task: str
    summary: str
@@ -55,6 +58,7 @@ class AgenticResult:
 # Agent factory
 # ---------------------------------------------------------------------------

+
 def _get_loop_agent():
    """Create a fresh agent for the agentic loop.

@@ -62,6 +66,7 @@ def _get_loop_agent():
    dedicated session so it doesn't pollute the main chat history.
    """
    from timmy.agent import create_timmy
+
    return create_timmy()


@@ -85,6 +90,7 @@ def _parse_steps(plan_text: str) -> list[str]:
 # Core loop
 # ---------------------------------------------------------------------------

+
 async def run_agentic_loop(
    task: str,
    *,
@@ -146,12 +152,15 @@ async def run_agentic_loop(
    was_truncated = planned_steps > total_steps

    # Broadcast plan
-    await _broadcast_progress("agentic.plan_ready", {
-        "task_id": task_id,
-        "task": task,
-        "steps": steps,
-        "total": total_steps,
-    })
+    await _broadcast_progress(
+        "agentic.plan_ready",
+        {
+            "task_id": task_id,
+            "task": task,
+            "steps": steps,
+            "total": total_steps,
+        },
+    )

    # ── Phase 2: Execution ─────────────────────────────────────────────────
    completed_results: list[str] = []
@@ -175,6 +184,7 @@ async def run_agentic_loop(

            # Clean the response
            from timmy.session import _clean_response
+
            step_result = _clean_response(step_result)

            step = AgenticStep(
@@ -188,13 +198,16 @@ async def run_agentic_loop(
            completed_results.append(f"Step {i}: {step_result[:200]}")

            # Broadcast progress
-            await _broadcast_progress("agentic.step_complete", {
-                "task_id": task_id,
-                "step": i,
-                "total": total_steps,
-                "description": step_desc,
-                "result": step_result[:200],
-            })
+            await _broadcast_progress(
+                "agentic.step_complete",
+                {
+                    "task_id": task_id,
+                    "step": i,
+                    "total": total_steps,
+                    "description": step_desc,
+                    "result": step_result[:200],
+                },
+            )

            if on_progress:
                await on_progress(step_desc, i, total_steps)
@@ -210,11 +223,16 @@ async def run_agentic_loop(
            )
            try:
                adapt_run = await asyncio.to_thread(
-                    agent.run, adapt_prompt, stream=False,
+                    agent.run,
+                    adapt_prompt,
+                    stream=False,
                    session_id=f"{session_id}_adapt{i}",
                )
-                adapt_result = adapt_run.content if hasattr(adapt_run, "content") else str(adapt_run)
+                adapt_result = (
+                    adapt_run.content if hasattr(adapt_run, "content") else str(adapt_run)
+                )
                from timmy.session import _clean_response
+
                adapt_result = _clean_response(adapt_result)

                step = AgenticStep(
@@ -227,14 +245,17 @@ async def run_agentic_loop(
                result.steps.append(step)
                completed_results.append(f"Step {i} (adapted): {adapt_result[:200]}")

-                await _broadcast_progress("agentic.step_adapted", {
-                    "task_id": task_id,
-                    "step": i,
-                    "total": total_steps,
-                    "description": step_desc,
-                    "error": str(exc),
-                    "adaptation": adapt_result[:200],
-                })
+                await _broadcast_progress(
+                    "agentic.step_adapted",
+                    {
+                        "task_id": task_id,
+                        "step": i,
+                        "total": total_steps,
+                        "description": step_desc,
+                        "error": str(exc),
+                        "adaptation": adapt_result[:200],
+                    },
+                )

                if on_progress:
                    await on_progress(f"[Adapted] {step_desc}", i, total_steps)
@@ -259,11 +280,16 @@ async def run_agentic_loop(
    )
    try:
        summary_run = await asyncio.to_thread(
-            agent.run, summary_prompt, stream=False,
+            agent.run,
+            summary_prompt,
+            stream=False,
            session_id=f"{session_id}_summary",
        )
-        result.summary = summary_run.content if hasattr(summary_run, "content") else str(summary_run)
+        result.summary = (
+            summary_run.content if hasattr(summary_run, "content") else str(summary_run)
+        )
        from timmy.session import _clean_response
+
        result.summary = _clean_response(result.summary)
    except Exception as exc:
        logger.error("Agentic loop summary failed: %s", exc)
@@ -281,13 +307,16 @@ async def run_agentic_loop(

    result.total_duration_ms = int((time.monotonic() - start_time) * 1000)

-    await _broadcast_progress("agentic.task_complete", {
-        "task_id": task_id,
-        "status": result.status,
-        "steps_completed": len(result.steps),
-        "summary": result.summary[:300],
-        "duration_ms": result.total_duration_ms,
-    })
+    await _broadcast_progress(
+        "agentic.task_complete",
+        {
+            "task_id": task_id,
+            "status": result.status,
+            "steps_completed": len(result.steps),
+            "summary": result.summary[:300],
+            "duration_ms": result.total_duration_ms,
+        },
+    )

    return result

@@ -296,10 +325,12 @@ async def run_agentic_loop(
 # WebSocket broadcast helper
 # ---------------------------------------------------------------------------

+
 async def _broadcast_progress(event: str, data: dict) -> None:
    """Broadcast agentic loop progress via WebSocket (best-effort)."""
    try:
        from infrastructure.ws_manager.handler import ws_manager
+
        await ws_manager.broadcast(event, data)
    except Exception:
        logger.debug("Agentic loop: WS broadcast failed for %s", event)
--- a/src/timmy/agents/base.py
+++ b/src/timmy/agents/base.py
@@ -18,7 +18,7 @@ from agno.agent import Agent
 from agno.models.ollama import Ollama

 from config import settings
-from infrastructure.events.bus import EventBus, Event
+from infrastructure.events.bus import Event, EventBus

 try:
    from mcp.registry import tool_registry
@@ -114,11 +114,13 @@ class BaseAgent(ABC):

        # Emit completion event
        if self.event_bus:
-            await self.event_bus.publish(Event(
-                type=f"agent.{self.agent_id}.response",
-                source=self.agent_id,
-                data={"input": message, "output": response},
-            ))
+            await self.event_bus.publish(
+                Event(
+                    type=f"agent.{self.agent_id}.response",
+                    source=self.agent_id,
+                    data={"input": message, "output": response},
+                )
+            )

        return response

--- a/src/timmy/agents/timmy.py
+++ b/src/timmy/agents/timmy.py
@@ -12,9 +12,9 @@ from typing import Any, Optional
 from agno.agent import Agent
 from agno.models.ollama import Ollama

-from timmy.agents.base import BaseAgent, SubAgent
 from config import settings
 from infrastructure.events.bus import EventBus, event_bus
+from timmy.agents.base import BaseAgent, SubAgent

 logger = logging.getLogger(__name__)

@@ -55,13 +55,13 @@ def build_timmy_context_sync() -> dict[str, Any]:
    # 1. Get recent git commits
    try:
        from tools.git_tools import git_log
+
        result = git_log(max_count=20)
        if result.get("success"):
            commits = result.get("commits", [])
-            ctx["git_log"] = "\n".join([
-                f"{c['short_sha']} {c['message'].split(chr(10))[0]}"
-                for c in commits[:20]
-            ])
+            ctx["git_log"] = "\n".join(
+                [f"{c['short_sha']} {c['message'].split(chr(10))[0]}" for c in commits[:20]]
+            )
    except Exception as exc:
        logger.warning("Could not load git log for context: %s", exc)
        ctx["git_log"] = "(Git log unavailable)"
@@ -69,12 +69,18 @@ def build_timmy_context_sync() -> dict[str, Any]:
    # 2. Get active sub-agents
    try:
        from swarm import registry as swarm_registry
+
        conn = swarm_registry._get_conn()
        rows = conn.execute(
            "SELECT id, name, status, capabilities FROM agents ORDER BY name"
        ).fetchall()
        ctx["agents"] = [
-            {"id": r["id"], "name": r["name"], "status": r["status"], "capabilities": r["capabilities"]}
+            {
+                "id": r["id"],
+                "name": r["name"],
+                "status": r["status"],
+                "capabilities": r["capabilities"],
+            }
            for r in rows
        ]
        conn.close()
@@ -85,6 +91,7 @@ def build_timmy_context_sync() -> dict[str, Any]:
    # 3. Read hot memory (via HotMemory to auto-create if missing)
    try:
        from timmy.memory_system import memory_system
+
        ctx["memory"] = memory_system.hot.read()[:2000]
    except Exception as exc:
        logger.warning("Could not load memory for context: %s", exc)
@@ -112,18 +119,28 @@ def format_timmy_prompt(base_prompt: str, context: dict[str, Any]) -> str:
    """Format the system prompt with dynamic context."""

    # Format agents list
-    agents_list = "\n".join([
-        f"| {a['name']} | {a['capabilities'] or 'general'} | {a['status']} |"
-        for a in context.get("agents", [])
-    ]) or "(No agents registered yet)"
+    agents_list = (
+        "\n".join(
+            [
+                f"| {a['name']} | {a['capabilities'] or 'general'} | {a['status']} |"
+                for a in context.get("agents", [])
+            ]
+        )
+        or "(No agents registered yet)"
+    )

    # Format hands list
-    hands_list = "\n".join([
-        f"| {h['name']} | {h['schedule']} | {'enabled' if h['enabled'] else 'disabled'} |"
-        for h in context.get("hands", [])
-    ]) or "(No hands configured)"
+    hands_list = (
+        "\n".join(
+            [
+                f"| {h['name']} | {h['schedule']} | {'enabled' if h['enabled'] else 'disabled'} |"
+                for h in context.get("hands", [])
+            ]
+        )
+        or "(No hands configured)"
+    )

-    repo_root = context.get('repo_root', settings.repo_root)
+    repo_root = context.get("repo_root", settings.repo_root)

    context_block = f"""
 ## Current System Context (as of {context.get('timestamp', datetime.now(timezone.utc).isoformat())})
@@ -227,7 +244,15 @@ class TimmyOrchestrator(BaseAgent):
            name="Orchestrator",
            role="orchestrator",
            system_prompt=formatted_prompt,
-            tools=["web_search", "read_file", "write_file", "python", "memory_search", "memory_write", "system_status"],
+            tools=[
+                "web_search",
+                "read_file",
+                "write_file",
+                "python",
+                "memory_search",
+                "memory_write",
+                "system_status",
+            ],
        )

        # Sub-agent registry
@@ -268,15 +293,15 @@ class TimmyOrchestrator(BaseAgent):
        # Read recent git log --oneline -15 from repo root
        try:
            from tools.git_tools import git_log
+
            git_result = git_log(max_count=15)
            if git_result.get("success"):
                commits = git_result.get("commits", [])
                self._session_context["git_log_commits"] = commits
                # Format as oneline for easy reading
-                self._session_context["git_log_oneline"] = "\n".join([
-                    f"{c['short_sha']} {c['message'].split(chr(10))[0]}"
-                    for c in commits
-                ])
+                self._session_context["git_log_oneline"] = "\n".join(
+                    [f"{c['short_sha']} {c['message'].split(chr(10))[0]}" for c in commits]
+                )
                logger.debug(f"Session init: loaded {len(commits)} commits from git log")
            else:
                self._session_context["git_log_oneline"] = "Git log unavailable"
@@ -303,7 +328,9 @@ class TimmyOrchestrator(BaseAgent):
        # Build session-specific context block for the prompt
        recent_changes = self._session_context.get("git_log_oneline", "")
        if recent_changes and recent_changes != "Git log unavailable":
-            self._session_context["recent_changes_block"] = f"""
+            self._session_context[
+                "recent_changes_block"
+            ] = f"""
 ## Recent Changes to Your Codebase (last 15 commits):
 ```
 {recent_changes}
@@ -346,9 +373,14 @@ When asked "what's new?" or similar, refer to these commits for actual changes.

        # Direct response patterns (no delegation needed)
        direct_patterns = [
-            "your name", "who are you", "what are you",
-            "hello", "hi", "how are you",
-            "help", "what can you do",
+            "your name",
+            "who are you",
+            "what are you",
+            "hello",
+            "hi",
+            "how are you",
+            "help",
+            "what can you do",
        ]

        for pattern in direct_patterns:
@@ -357,9 +389,13 @@ When asked "what's new?" or similar, refer to these commits for actual changes.

        # Check for memory references — delegate to Echo
        memory_patterns = [
-            "we talked about", "we discussed", "remember",
-            "what did i say", "what did we decide",
-            "remind me", "have we",
+            "we talked about",
+            "we discussed",
+            "remember",
+            "what did i say",
+            "what did we decide",
+            "remind me",
+            "have we",
        ]

        for pattern in memory_patterns:
@@ -404,10 +440,7 @@ When asked "what's new?" or similar, refer to these commits for actual changes.
        """Get status of all agents in the swarm."""
        return {
            "orchestrator": self.get_status(),
-            "sub_agents": {
-                aid: agent.get_status()
-                for aid, agent in self.sub_agents.items()
-            },
+            "sub_agents": {aid: agent.get_status() for aid, agent in self.sub_agents.items()},
            "total_agents": 1 + len(self.sub_agents),
        }

@@ -468,10 +501,29 @@ _PERSONAS: list[dict[str, Any]] = [
        "system_prompt": (
            "You are Helm, a routing and orchestration specialist.\n"
            "Analyze tasks and decide how to route them to other agents.\n"
-            "Available agents: Seer (research), Forge (code), Quill (writing), Echo (memory).\n"
+            "Available agents: Seer (research), Forge (code), Quill (writing), Echo (memory), Lab (experiments).\n"
            "Respond with: Primary Agent: [agent name]"
        ),
    },
+    {
+        "agent_id": "lab",
+        "name": "Lab",
+        "role": "experiment",
+        "tools": [
+            "run_experiment",
+            "prepare_experiment",
+            "shell",
+            "python",
+            "read_file",
+            "write_file",
+        ],
+        "system_prompt": (
+            "You are Lab, an autonomous ML experimentation specialist.\n"
+            "You run time-boxed training experiments, evaluate metrics,\n"
+            "modify training code to improve results, and iterate.\n"
+            "Always report the metric delta. Never exceed the time budget."
+        ),
+    },
 ]


--- a/src/timmy/approvals.py
+++ b/src/timmy/approvals.py
@@ -38,10 +38,10 @@ class ApprovalItem:
    id: str
    title: str
    description: str
-    proposed_action: str   # what Timmy wants to do
-    impact: str            # "low" | "medium" | "high"
+    proposed_action: str  # what Timmy wants to do
+    impact: str  # "low" | "medium" | "high"
    created_at: datetime
-    status: str            # "pending" | "approved" | "rejected"
+    status: str  # "pending" | "approved" | "rejected"


 def _get_conn(db_path: Path = _DEFAULT_DB) -> sqlite3.Connection:
@@ -81,6 +81,7 @@ def _row_to_item(row: sqlite3.Row) -> ApprovalItem:
 # Public API
 # ---------------------------------------------------------------------------

+
 def create_item(
    title: str,
    description: str,
@@ -133,18 +134,14 @@ def list_pending(db_path: Path = _DEFAULT_DB) -> list[ApprovalItem]:
 def list_all(db_path: Path = _DEFAULT_DB) -> list[ApprovalItem]:
    """Return all approval items regardless of status, newest first."""
    conn = _get_conn(db_path)
-    rows = conn.execute(
-        "SELECT * FROM approval_items ORDER BY created_at DESC"
-    ).fetchall()
+    rows = conn.execute("SELECT * FROM approval_items ORDER BY created_at DESC").fetchall()
    conn.close()
    return [_row_to_item(r) for r in rows]


 def get_item(item_id: str, db_path: Path = _DEFAULT_DB) -> Optional[ApprovalItem]:
    conn = _get_conn(db_path)
-    row = conn.execute(
-        "SELECT * FROM approval_items WHERE id = ?", (item_id,)
-    ).fetchone()
+    row = conn.execute("SELECT * FROM approval_items WHERE id = ?", (item_id,)).fetchone()
    conn.close()
    return _row_to_item(row) if row else None

@@ -152,9 +149,7 @@ def get_item(item_id: str, db_path: Path = _DEFAULT_DB) -> Optional[ApprovalItem
 def approve(item_id: str, db_path: Path = _DEFAULT_DB) -> Optional[ApprovalItem]:
    """Mark an approval item as approved."""
    conn = _get_conn(db_path)
-    conn.execute(
-        "UPDATE approval_items SET status = 'approved' WHERE id = ?", (item_id,)
-    )
+    conn.execute("UPDATE approval_items SET status = 'approved' WHERE id = ?", (item_id,))
    conn.commit()
    conn.close()
    return get_item(item_id, db_path)
@@ -163,9 +158,7 @@ def approve(item_id: str, db_path: Path = _DEFAULT_DB) -> Optional[ApprovalItem]
 def reject(item_id: str, db_path: Path = _DEFAULT_DB) -> Optional[ApprovalItem]:
    """Mark an approval item as rejected."""
    conn = _get_conn(db_path)
-    conn.execute(
-        "UPDATE approval_items SET status = 'rejected' WHERE id = ?", (item_id,)
-    )
+    conn.execute("UPDATE approval_items SET status = 'rejected' WHERE id = ?", (item_id,))
    conn.commit()
    conn.close()
    return get_item(item_id, db_path)
--- a/src/timmy/autoresearch.py
+++ b/src/timmy/autoresearch.py
@@ -0,0 +1,214 @@
+"""Autoresearch — autonomous ML experiment loops.
+
+Integrates Karpathy's autoresearch pattern: an agent modifies training
+code, runs time-boxed GPU experiments, evaluates a target metric
+(val_bpb by default), and iterates to find improvements.
+
+Flow:
+  1. prepare_experiment  — clone repo + run data prep
+  2. run_experiment      — execute train.py with wall-clock timeout
+  3. evaluate_result     — compare metric against baseline
+  4. experiment_loop     — orchestrate the full cycle
+
+All subprocess calls are guarded with timeouts for graceful degradation.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+import subprocess
+import time
+from pathlib import Path
+from typing import Any, Callable, Optional
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_REPO = "https://github.com/karpathy/autoresearch.git"
+_METRIC_RE = re.compile(r"val_bpb[:\s]+([0-9]+\.?[0-9]*)")
+
+
+def prepare_experiment(
+    workspace: Path,
+    repo_url: str = DEFAULT_REPO,
+) -> str:
+    """Clone autoresearch repo and run data preparation.
+
+    Args:
+        workspace: Directory to set up the experiment in.
+        repo_url: Git URL for the autoresearch repository.
+
+    Returns:
+        Status message describing what was prepared.
+    """
+    workspace = Path(workspace)
+    workspace.mkdir(parents=True, exist_ok=True)
+
+    repo_dir = workspace / "autoresearch"
+    if not repo_dir.exists():
+        logger.info("Cloning autoresearch into %s", repo_dir)
+        result = subprocess.run(
+            ["git", "clone", "--depth", "1", repo_url, str(repo_dir)],
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+        if result.returncode != 0:
+            return f"Clone failed: {result.stderr.strip()}"
+    else:
+        logger.info("Autoresearch repo already present at %s", repo_dir)
+
+    # Run prepare.py (data download + tokeniser training)
+    prepare_script = repo_dir / "prepare.py"
+    if prepare_script.exists():
+        logger.info("Running prepare.py …")
+        result = subprocess.run(
+            ["python", str(prepare_script)],
+            capture_output=True,
+            text=True,
+            cwd=str(repo_dir),
+            timeout=300,
+        )
+        if result.returncode != 0:
+            return f"Preparation failed: {result.stderr.strip()[:500]}"
+        return "Preparation complete — data downloaded and tokeniser trained."
+
+    return "Preparation skipped — no prepare.py found."
+
+
+def run_experiment(
+    workspace: Path,
+    timeout: int = 300,
+    metric_name: str = "val_bpb",
+) -> dict[str, Any]:
+    """Run a single training experiment with a wall-clock timeout.
+
+    Args:
+        workspace: Experiment workspace (contains autoresearch/ subdir).
+        timeout: Maximum wall-clock seconds for the run.
+        metric_name: Name of the metric to extract from stdout.
+
+    Returns:
+        Dict with keys: metric (float|None), log (str), duration_s (int),
+        success (bool), error (str|None).
+    """
+    repo_dir = Path(workspace) / "autoresearch"
+    train_script = repo_dir / "train.py"
+
+    if not train_script.exists():
+        return {
+            "metric": None,
+            "log": "",
+            "duration_s": 0,
+            "success": False,
+            "error": f"train.py not found in {repo_dir}",
+        }
+
+    start = time.monotonic()
+    try:
+        result = subprocess.run(
+            ["python", str(train_script)],
+            capture_output=True,
+            text=True,
+            cwd=str(repo_dir),
+            timeout=timeout,
+        )
+        duration = int(time.monotonic() - start)
+        output = result.stdout + result.stderr
+
+        # Extract metric from output
+        metric_val = _extract_metric(output, metric_name)
+
+        return {
+            "metric": metric_val,
+            "log": output[-2000:],  # Keep last 2k chars
+            "duration_s": duration,
+            "success": result.returncode == 0,
+            "error": None if result.returncode == 0 else f"Exit code {result.returncode}",
+        }
+    except subprocess.TimeoutExpired:
+        duration = int(time.monotonic() - start)
+        return {
+            "metric": None,
+            "log": f"Experiment timed out after {timeout}s",
+            "duration_s": duration,
+            "success": False,
+            "error": f"Timed out after {timeout}s",
+        }
+    except OSError as exc:
+        return {
+            "metric": None,
+            "log": "",
+            "duration_s": 0,
+            "success": False,
+            "error": str(exc),
+        }
+
+
+def _extract_metric(output: str, metric_name: str = "val_bpb") -> Optional[float]:
+    """Extract the last occurrence of a metric value from training output."""
+    pattern = re.compile(rf"{re.escape(metric_name)}[:\s]+([0-9]+\.?[0-9]*)")
+    matches = pattern.findall(output)
+    if matches:
+        try:
+            return float(matches[-1])
+        except ValueError:
+            pass
+    return None
+
+
+def evaluate_result(
+    current: float,
+    baseline: float,
+    metric_name: str = "val_bpb",
+) -> str:
+    """Compare a metric against baseline and return an assessment.
+
+    For val_bpb, lower is better.
+
+    Args:
+        current: Current experiment's metric value.
+        baseline: Baseline metric to compare against.
+        metric_name: Name of the metric (for display).
+
+    Returns:
+        Human-readable assessment string.
+    """
+    delta = current - baseline
+    pct = (delta / baseline) * 100 if baseline != 0 else 0.0
+
+    if delta < 0:
+        return f"Improvement: {metric_name} {baseline:.4f} -> {current:.4f} " f"({pct:+.2f}%)"
+    elif delta > 0:
+        return f"Regression: {metric_name} {baseline:.4f} -> {current:.4f} " f"({pct:+.2f}%)"
+    else:
+        return f"No change: {metric_name} = {current:.4f}"
+
+
+def get_experiment_history(workspace: Path) -> list[dict[str, Any]]:
+    """Read experiment history from the workspace results file.
+
+    Returns:
+        List of experiment result dicts, most recent first.
+    """
+    results_file = Path(workspace) / "results.jsonl"
+    if not results_file.exists():
+        return []
+
+    history: list[dict[str, Any]] = []
+    for line in results_file.read_text().strip().splitlines():
+        try:
+            history.append(json.loads(line))
+        except json.JSONDecodeError:
+            continue
+
+    return list(reversed(history))
+
+
+def _append_result(workspace: Path, result: dict[str, Any]) -> None:
+    """Append a result to the workspace JSONL log."""
+    results_file = Path(workspace) / "results.jsonl"
+    results_file.parent.mkdir(parents=True, exist_ok=True)
+    with results_file.open("a") as f:
+        f.write(json.dumps(result) + "\n")
--- a/src/timmy/backends.py
+++ b/src/timmy/backends.py
@@ -24,8 +24,8 @@ logger = logging.getLogger(__name__)

 # HuggingFace model IDs for each supported size.
 _AIRLLM_MODELS: dict[str, str] = {
-    "8b":   "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    "70b":  "meta-llama/Meta-Llama-3.1-70B-Instruct",
+    "8b": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "70b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
    "405b": "meta-llama/Meta-Llama-3.1-405B-Instruct",
 }

@@ -35,6 +35,7 @@ ModelSize = Literal["8b", "70b", "405b"]
@dataclass
 class RunResult:
    """Minimal Agno-compatible run result — carries the model's response text."""
+
    content: str


@@ -47,6 +48,7 @@ def airllm_available() -> bool:
    """Return True when the airllm package is importable."""
    try:
        import airllm  # noqa: F401
+
        return True
    except ImportError:
        return False
@@ -67,15 +69,16 @@ class TimmyAirLLMAgent:
        model_id = _AIRLLM_MODELS.get(model_size)
        if model_id is None:
            raise ValueError(
-                f"Unknown model size {model_size!r}. "
-                f"Choose from: {list(_AIRLLM_MODELS)}"
+                f"Unknown model size {model_size!r}. " f"Choose from: {list(_AIRLLM_MODELS)}"
            )

        if is_apple_silicon():
            from airllm import AirLLMMLX  # type: ignore[import]
+
            self._model = AirLLMMLX(model_id)
        else:
            from airllm import AutoModel  # type: ignore[import]
+
            self._model = AutoModel.from_pretrained(model_id)

        self._history: list[str] = []
@@ -137,6 +140,7 @@ class TimmyAirLLMAgent:
        try:
            from rich.console import Console
            from rich.markdown import Markdown
+
            Console().print(Markdown(text))
        except ImportError:
            print(text)
@@ -157,6 +161,7 @@ GROK_MODELS: dict[str, str] = {
@dataclass
 class GrokUsageStats:
    """Tracks Grok API usage for cost monitoring and Spark logging."""
+
    total_requests: int = 0
    total_prompt_tokens: int = 0
    total_completion_tokens: int = 0
@@ -240,9 +245,7 @@ class GrokBackend:
            RunResult with response content
        """
        if not self._api_key:
-            return RunResult(
-                content="Grok is not configured. Set XAI_API_KEY to enable."
-            )
+            return RunResult(content="Grok is not configured. Set XAI_API_KEY to enable.")

        start = time.time()
        messages = self._build_messages(message)
@@ -285,16 +288,12 @@ class GrokBackend:
        except Exception as exc:
            self.stats.errors += 1
            logger.error("Grok API error: %s", exc)
-            return RunResult(
-                content=f"Grok temporarily unavailable: {exc}"
-            )
+            return RunResult(content=f"Grok temporarily unavailable: {exc}")

    async def arun(self, message: str) -> RunResult:
        """Async inference via Grok API — used by cascade router and tools."""
        if not self._api_key:
-            return RunResult(
-                content="Grok is not configured. Set XAI_API_KEY to enable."
-            )
+            return RunResult(content="Grok is not configured. Set XAI_API_KEY to enable.")

        start = time.time()
        messages = self._build_messages(message)
@@ -336,9 +335,7 @@ class GrokBackend:
        except Exception as exc:
            self.stats.errors += 1
            logger.error("Grok async API error: %s", exc)
-            return RunResult(
-                content=f"Grok temporarily unavailable: {exc}"
-            )
+            return RunResult(content=f"Grok temporarily unavailable: {exc}")

    def print_response(self, message: str, *, stream: bool = True) -> None:
        """Run inference and render the response to stdout (CLI interface)."""
@@ -346,6 +343,7 @@ class GrokBackend:
        try:
            from rich.console import Console
            from rich.markdown import Markdown
+
            Console().print(Markdown(result.content))
        except ImportError:
            print(result.content)
@@ -415,6 +413,7 @@ def grok_available() -> bool:
    """Return True when Grok is enabled and API key is configured."""
    try:
        from config import settings
+
        return settings.grok_enabled and bool(settings.xai_api_key)
    except Exception:
        return False
@@ -472,9 +471,7 @@ class ClaudeBackend:
    def run(self, message: str, *, stream: bool = False, **kwargs) -> RunResult:
        """Synchronous inference via Claude API."""
        if not self._api_key:
-            return RunResult(
-                content="Claude is not configured. Set ANTHROPIC_API_KEY to enable."
-            )
+            return RunResult(content="Claude is not configured. Set ANTHROPIC_API_KEY to enable.")

        start = time.time()
        messages = self._build_messages(message)
@@ -508,9 +505,7 @@ class ClaudeBackend:

        except Exception as exc:
            logger.error("Claude API error: %s", exc)
-            return RunResult(
-                content=f"Claude temporarily unavailable: {exc}"
-            )
+            return RunResult(content=f"Claude temporarily unavailable: {exc}")

    def print_response(self, message: str, *, stream: bool = True) -> None:
        """Run inference and render the response to stdout (CLI interface)."""
@@ -518,6 +513,7 @@ class ClaudeBackend:
        try:
            from rich.console import Console
            from rich.markdown import Markdown
+
            Console().print(Markdown(result.content))
        except ImportError:
            print(result.content)
@@ -569,6 +565,7 @@ def claude_available() -> bool:
    """Return True when Anthropic API key is configured."""
    try:
        from config import settings
+
        return bool(settings.anthropic_api_key)
    except Exception:
        return False
--- a/src/timmy/briefing.py
+++ b/src/timmy/briefing.py
@@ -25,6 +25,7 @@ _CACHE_MINUTES = 30
 # Data structures
 # ---------------------------------------------------------------------------

+
@dataclass
 class ApprovalItem:
    """Lightweight representation used inside a Briefing.
@@ -32,6 +33,7 @@ class ApprovalItem:
    The canonical mutable version (with persistence) lives in timmy.approvals.
    This one travels with the Briefing dataclass as a read-only snapshot.
    """
+
    id: str
    title: str
    description: str
@@ -44,20 +46,19 @@ class ApprovalItem:
@dataclass
 class Briefing:
    generated_at: datetime
-    summary: str                           # 150-300 words
+    summary: str  # 150-300 words
    approval_items: list[ApprovalItem] = field(default_factory=list)
    period_start: datetime = field(
        default_factory=lambda: datetime.now(timezone.utc) - timedelta(hours=6)
    )
-    period_end: datetime = field(
-        default_factory=lambda: datetime.now(timezone.utc)
-    )
+    period_end: datetime = field(default_factory=lambda: datetime.now(timezone.utc))


 # ---------------------------------------------------------------------------
 # SQLite cache
 # ---------------------------------------------------------------------------

+
 def _get_cache_conn(db_path: Path = _DEFAULT_DB) -> sqlite3.Connection:
    db_path.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(str(db_path))
@@ -98,9 +99,7 @@ def _save_briefing(briefing: Briefing, db_path: Path = _DEFAULT_DB) -> None:
 def _load_latest(db_path: Path = _DEFAULT_DB) -> Optional[Briefing]:
    """Load the most-recently cached briefing, or None if there is none."""
    conn = _get_cache_conn(db_path)
-    row = conn.execute(
-        "SELECT * FROM briefings ORDER BY generated_at DESC LIMIT 1"
-    ).fetchone()
+    row = conn.execute("SELECT * FROM briefings ORDER BY generated_at DESC LIMIT 1").fetchone()
    conn.close()
    if row is None:
        return None
@@ -115,7 +114,11 @@ def _load_latest(db_path: Path = _DEFAULT_DB) -> Optional[Briefing]:
 def is_fresh(briefing: Briefing, max_age_minutes: int = _CACHE_MINUTES) -> bool:
    """Return True if the briefing was generated within max_age_minutes."""
    now = datetime.now(timezone.utc)
-    age = now - briefing.generated_at.replace(tzinfo=timezone.utc) if briefing.generated_at.tzinfo is None else now - briefing.generated_at
+    age = (
+        now - briefing.generated_at.replace(tzinfo=timezone.utc)
+        if briefing.generated_at.tzinfo is None
+        else now - briefing.generated_at
+    )
    return age.total_seconds() < max_age_minutes * 60


@@ -123,6 +126,7 @@ def is_fresh(briefing: Briefing, max_age_minutes: int = _CACHE_MINUTES) -> bool:
 # Activity gathering helpers
 # ---------------------------------------------------------------------------

+
 def _gather_swarm_summary(since: datetime) -> str:
    """Pull recent task/agent stats from swarm.db.  Graceful if DB missing."""
    swarm_db = Path("data/swarm.db")
@@ -170,6 +174,7 @@ def _gather_task_queue_summary() -> str:
    """Pull task queue stats for the briefing.  Graceful if unavailable."""
    try:
        from swarm.task_queue.models import get_task_summary_for_briefing
+
        stats = get_task_summary_for_briefing()
        parts = []
        if stats["pending_approval"]:
@@ -194,6 +199,7 @@ def _gather_chat_summary(since: datetime) -> str:
    """Pull recent chat messages from the in-memory log."""
    try:
        from dashboard.store import message_log
+
        messages = message_log.all()
        # Filter to messages in the briefing window (best-effort: no timestamps)
        recent = messages[-10:] if len(messages) > 10 else messages
@@ -213,6 +219,7 @@ def _gather_chat_summary(since: datetime) -> str:
 # BriefingEngine
 # ---------------------------------------------------------------------------

+
 class BriefingEngine:
    """Generates morning briefings by querying activity and asking Timmy."""

@@ -297,6 +304,7 @@ class BriefingEngine:
        """Call Timmy's Agno agent and return the response text."""
        try:
            from timmy.agent import create_timmy
+
            agent = create_timmy()
            run = agent.run(prompt, stream=False)
            result = run.content if hasattr(run, "content") else str(run)
@@ -317,6 +325,7 @@ class BriefingEngine:
        """Return pending ApprovalItems from the approvals DB."""
        try:
            from timmy import approvals as _approvals
+
            raw_items = _approvals.list_pending()
            return [
                ApprovalItem(
--- a/src/timmy/cascade_adapter.py
+++ b/src/timmy/cascade_adapter.py
@@ -19,6 +19,7 @@ logger = logging.getLogger(__name__)
@dataclass
 class TimmyResponse:
    """Response from Timmy via Cascade Router."""
+
    content: str
    provider_used: str
    latency_ms: float
@@ -42,8 +43,7 @@ class TimmyCascadeAdapter:
            router: CascadeRouter instance. If None, creates default.
        """
        self.router = router or CascadeRouter()
-        logger.info("TimmyCascadeAdapter initialized with %d providers", 
-                   len(self.router.providers))
+        logger.info("TimmyCascadeAdapter initialized with %d providers", len(self.router.providers))

    async def chat(self, message: str, context: Optional[str] = None) -> TimmyResponse:
        """Send message through cascade router with automatic failover.
@@ -63,6 +63,7 @@ class TimmyCascadeAdapter:

        # Route through cascade
        import time
+
        start = time.time()

        try:
--- a/src/timmy/conversation.py
+++ b/src/timmy/conversation.py
@@ -17,6 +17,7 @@ logger = logging.getLogger(__name__)
@dataclass
 class ConversationContext:
    """Tracks the current conversation state."""
+
    user_name: Optional[str] = None
    current_topic: Optional[str] = None
    last_intent: Optional[str] = None
@@ -63,19 +64,72 @@ class ConversationManager:
            del self._contexts[session_id]

    # Words that look like names but are actually verbs/UI states
-    _NAME_BLOCKLIST = frozenset({
-        "sending", "loading", "pending", "processing", "typing",
-        "working", "going", "trying", "looking", "getting", "doing",
-        "waiting", "running", "checking", "coming", "leaving",
-        "thinking", "reading", "writing", "watching", "listening",
-        "playing", "eating", "sleeping", "sitting", "standing",
-        "walking", "talking", "asking", "telling", "feeling",
-        "hoping", "wondering", "glad", "happy", "sorry", "sure",
-        "fine", "good", "great", "okay", "here", "there", "back",
-        "done", "ready", "busy", "free", "available", "interested",
-        "confused", "lost", "stuck", "curious", "excited", "tired",
-        "not", "also", "just", "still", "already", "currently",
-    })
+    _NAME_BLOCKLIST = frozenset(
+        {
+            "sending",
+            "loading",
+            "pending",
+            "processing",
+            "typing",
+            "working",
+            "going",
+            "trying",
+            "looking",
+            "getting",
+            "doing",
+            "waiting",
+            "running",
+            "checking",
+            "coming",
+            "leaving",
+            "thinking",
+            "reading",
+            "writing",
+            "watching",
+            "listening",
+            "playing",
+            "eating",
+            "sleeping",
+            "sitting",
+            "standing",
+            "walking",
+            "talking",
+            "asking",
+            "telling",
+            "feeling",
+            "hoping",
+            "wondering",
+            "glad",
+            "happy",
+            "sorry",
+            "sure",
+            "fine",
+            "good",
+            "great",
+            "okay",
+            "here",
+            "there",
+            "back",
+            "done",
+            "ready",
+            "busy",
+            "free",
+            "available",
+            "interested",
+            "confused",
+            "lost",
+            "stuck",
+            "curious",
+            "excited",
+            "tired",
+            "not",
+            "also",
+            "just",
+            "still",
+            "already",
+            "currently",
+        }
+    )

    def extract_user_name(self, message: str) -> Optional[str]:
        """Try to extract user's name from message."""
@@ -116,18 +170,44 @@ class ConversationManager:

        # Tool keywords that suggest tool usage is needed
        tool_keywords = [
-            "search", "look up", "find", "google", "current price",
-            "latest", "today's", "news", "weather", "stock price",
-            "read file", "write file", "save", "calculate", "compute",
-            "run ", "execute", "shell", "command", "install",
+            "search",
+            "look up",
+            "find",
+            "google",
+            "current price",
+            "latest",
+            "today's",
+            "news",
+            "weather",
+            "stock price",
+            "read file",
+            "write file",
+            "save",
+            "calculate",
+            "compute",
+            "run ",
+            "execute",
+            "shell",
+            "command",
+            "install",
        ]

        # Chat-only keywords that definitely don't need tools
        chat_only = [
-            "hello", "hi ", "hey", "how are you", "what's up",
-            "your name", "who are you", "what are you",
-            "thanks", "thank you", "bye", "goodbye",
-            "tell me about yourself", "what can you do",
+            "hello",
+            "hi ",
+            "hey",
+            "how are you",
+            "what's up",
+            "your name",
+            "who are you",
+            "what are you",
+            "thanks",
+            "thank you",
+            "bye",
+            "goodbye",
+            "tell me about yourself",
+            "what can you do",
        ]

        # Check for chat-only patterns first
--- a/src/timmy/memory/vector_store.py
+++ b/src/timmy/memory/vector_store.py
@@ -30,6 +30,7 @@ def _get_model():
        return _model

    from config import settings
+
    # In test mode or low-memory environments, skip embedding model load
    if settings.timmy_skip_embeddings:
        _has_embeddings = False
@@ -37,7 +38,8 @@ def _get_model():

    try:
        from sentence_transformers import SentenceTransformer
-        _model = SentenceTransformer('all-MiniLM-L6-v2')
+
+        _model = SentenceTransformer("all-MiniLM-L6-v2")
        _has_embeddings = True
        return _model
    except (ImportError, RuntimeError, Exception):
@@ -75,14 +77,14 @@ def _compute_embedding(text: str) -> list[float]:

    # Generate character trigram features
    for i in range(len(text) - 2):
-        trigram = text[i:i+3]
+        trigram = text[i : i + 3]
        hash_val = hash(trigram) % dim
        vec[hash_val] += 1.0

    # Normalize
-    norm = sum(x*x for x in vec) ** 0.5
+    norm = sum(x * x for x in vec) ** 0.5
    if norm > 0:
-        vec = [x/norm for x in vec]
+        vec = [x / norm for x in vec]

    return vec

@@ -90,6 +92,7 @@ def _compute_embedding(text: str) -> list[float]:
@dataclass
 class MemoryEntry:
    """A memory entry with vector embedding."""
+
    id: str = field(default_factory=lambda: str(uuid.uuid4()))
    content: str = ""  # The actual text content
    source: str = ""  # Where it came from (agent, user, system)
@@ -99,9 +102,7 @@ class MemoryEntry:
    session_id: Optional[str] = None
    metadata: Optional[dict] = None
    embedding: Optional[list[float]] = None
-    timestamp: str = field(
-        default_factory=lambda: datetime.now(timezone.utc).isoformat()
-    )
+    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
    relevance_score: Optional[float] = None  # Set during search


@@ -139,21 +140,11 @@ def _get_conn() -> sqlite3.Connection:
    )

    # Create indexes
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_memory_agent ON memory_entries(agent_id)"
-    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_memory_task ON memory_entries(task_id)"
-    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_memory_session ON memory_entries(session_id)"
-    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_memory_time ON memory_entries(timestamp)"
-    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_memory_type ON memory_entries(context_type)"
-    )
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_agent ON memory_entries(agent_id)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_task ON memory_entries(task_id)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_session ON memory_entries(session_id)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_time ON memory_entries(timestamp)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_type ON memory_entries(context_type)")

    conn.commit()
    return conn
@@ -316,9 +307,9 @@ def search_memories(

 def _cosine_similarity(a: list[float], b: list[float]) -> float:
    """Compute cosine similarity between two vectors."""
-    dot = sum(x*y for x, y in zip(a, b))
-    norm_a = sum(x*x for x in a) ** 0.5
-    norm_b = sum(x*x for x in b) ** 0.5
+    dot = sum(x * y for x, y in zip(a, b))
+    norm_a = sum(x * x for x in a) ** 0.5
+    norm_b = sum(x * x for x in b) ** 0.5
    if norm_a == 0 or norm_b == 0:
        return 0.0
    return dot / (norm_a * norm_b)
@@ -334,11 +325,7 @@ def _keyword_overlap(query: str, content: str) -> float:
    return overlap / len(query_words)


-def get_memory_context(
-    query: str,
-    max_tokens: int = 2000,
-    **filters
-) -> str:
+def get_memory_context(query: str, max_tokens: int = 2000, **filters) -> str:
    """Get relevant memory context as formatted text for LLM prompts.

    Args:
@@ -476,9 +463,7 @@ def get_memory_stats() -> dict:
    """
    conn = _get_conn()

-    total = conn.execute(
-        "SELECT COUNT(*) as count FROM memory_entries"
-    ).fetchone()["count"]
+    total = conn.execute("SELECT COUNT(*) as count FROM memory_entries").fetchone()["count"]

    by_type = {}
    rows = conn.execute(
--- a/src/timmy/memory_system.py
+++ b/src/timmy/memory_system.py
@@ -60,12 +60,14 @@ class HotMemory:
        if match:
            # Replace section
            new_section = f"## {section}\n\n{content}\n\n"
-            full_content = full_content[:match.start()] + new_section + full_content[match.end():]
+            full_content = full_content[: match.start()] + new_section + full_content[match.end() :]
        else:
            # Append section before last updated line
            insert_point = full_content.rfind("*Prune date:")
            new_section = f"## {section}\n\n{content}\n\n"
-            full_content = full_content[:insert_point] + new_section + "\n" + full_content[insert_point:]
+            full_content = (
+                full_content[:insert_point] + new_section + "\n" + full_content[insert_point:]
+            )

        self.path.write_text(full_content)
        self._content = full_content
@@ -130,7 +132,7 @@ class HotMemory:
 *Prune date: {prune_date}*
 """.format(
            date=datetime.now(timezone.utc).strftime("%Y-%m-%d"),
-            prune_date=(datetime.now(timezone.utc).replace(day=25)).strftime("%Y-%m-%d")
+            prune_date=(datetime.now(timezone.utc).replace(day=25)).strftime("%Y-%m-%d"),
        )

        self.path.write_text(default_content)
@@ -219,7 +221,7 @@ class VaultMemory:
        content = re.sub(
            r"\*Last updated:.*\*",
            f"*Last updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d')}*",
-            content
+            content,
        )

        profile_path.write_text(content)
@@ -254,7 +256,9 @@ class VaultMemory:
 ---

 *Last updated: {date}*
-""".format(date=datetime.now(timezone.utc).strftime("%Y-%m-%d"))
+""".format(
+            date=datetime.now(timezone.utc).strftime("%Y-%m-%d")
+        )

        profile_path.write_text(default)

@@ -271,7 +275,7 @@ class HandoffProtocol:
        session_summary: str,
        key_decisions: list[str],
        open_items: list[str],
-        next_steps: list[str]
+        next_steps: list[str],
    ) -> None:
        """Write handoff at session end."""
        content = f"""# Last Session Handoff
@@ -307,14 +311,13 @@ The user was last working on: {session_summary[:200]}...
        self.path.write_text(content)

        # Also archive to notes
-        self.vault.write_note(
-            "session_handoff",
-            content,
-            namespace="notes"
-        )
+        self.vault.write_note("session_handoff", content, namespace="notes")

-        logger.info("HandoffProtocol: Wrote handoff with %d decisions, %d open items",
-                   len(key_decisions), len(open_items))
+        logger.info(
+            "HandoffProtocol: Wrote handoff with %d decisions, %d open items",
+            len(key_decisions),
+            len(open_items),
+        )

    def read_handoff(self) -> Optional[str]:
        """Read handoff if exists."""
@@ -373,14 +376,14 @@ class MemorySystem:
            session_summary=summary,
            key_decisions=self.session_decisions,
            open_items=self.session_open_items,
-            next_steps=[]
+            next_steps=[],
        )

        # Update hot memory
        self.hot.update_section(
            "Current Session",
-            f"**Last Session:** {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M')}\n" +
-            f"**Summary:** {summary[:100]}..."
+            f"**Last Session:** {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M')}\n"
+            + f"**Summary:** {summary[:100]}...",
        )

        logger.info("MemorySystem: Session ended, handoff written")
@@ -424,7 +427,11 @@ class MemorySystem:
        # Interests
        interests_section = re.search(r"## Interests.*?\n- (.+?)(?=\n## |\Z)", content, re.DOTALL)
        if interests_section:
-            interests = [i.strip() for i in interests_section.group(1).split("\n-") if i.strip() and "to be" not in i]
+            interests = [
+                i.strip()
+                for i in interests_section.group(1).split("\n-")
+                if i.strip() and "to be" not in i
+            ]
            if interests:
                summary_parts.append(f"Interests: {', '.join(interests[:3])}")

--- a/src/timmy/semantic_memory.py
+++ b/src/timmy/semantic_memory.py
@@ -38,12 +38,14 @@ def _get_embedding_model():
    global EMBEDDING_MODEL
    if EMBEDDING_MODEL is None:
        from config import settings
+
        if settings.timmy_skip_embeddings:
            EMBEDDING_MODEL = False
            return EMBEDDING_MODEL
        try:
            from sentence_transformers import SentenceTransformer
-            EMBEDDING_MODEL = SentenceTransformer('all-MiniLM-L6-v2')
+
+            EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
            logger.info("SemanticMemory: Loaded embedding model")
        except ImportError:
            logger.warning("SemanticMemory: sentence-transformers not installed, using fallback")
@@ -60,11 +62,12 @@ def _simple_hash_embedding(text: str) -> list[float]:
        h = hashlib.md5(word.encode()).hexdigest()
        for j in range(8):
            idx = (i * 8 + j) % 128
-            vec[idx] += int(h[j*2:j*2+2], 16) / 255.0
+            vec[idx] += int(h[j * 2 : j * 2 + 2], 16) / 255.0
    # Normalize
    import math
-    mag = math.sqrt(sum(x*x for x in vec)) or 1.0
-    return [x/mag for x in vec]
+
+    mag = math.sqrt(sum(x * x for x in vec)) or 1.0
+    return [x / mag for x in vec]


 def embed_text(text: str) -> list[float]:
@@ -80,9 +83,10 @@ def embed_text(text: str) -> list[float]:
 def cosine_similarity(a: list[float], b: list[float]) -> float:
    """Calculate cosine similarity between two vectors."""
    import math
-    dot = sum(x*y for x, y in zip(a, b))
-    mag_a = math.sqrt(sum(x*x for x in a))
-    mag_b = math.sqrt(sum(x*x for x in b))
+
+    dot = sum(x * y for x, y in zip(a, b))
+    mag_a = math.sqrt(sum(x * x for x in a))
+    mag_b = math.sqrt(sum(x * x for x in b))
    if mag_a == 0 or mag_b == 0:
        return 0.0
    return dot / (mag_a * mag_b)
@@ -91,6 +95,7 @@ def cosine_similarity(a: list[float], b: list[float]) -> float:
@dataclass
 class MemoryChunk:
    """A searchable chunk of memory."""
+
    id: str
    source: str  # filepath
    content: str
@@ -110,7 +115,8 @@ class SemanticMemory:
        """Initialize SQLite with vector storage."""
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        conn = sqlite3.connect(str(self.db_path))
-        conn.execute("""
+        conn.execute(
+            """
            CREATE TABLE IF NOT EXISTS chunks (
                id TEXT PRIMARY KEY,
                source TEXT NOT NULL,
@@ -119,7 +125,8 @@ class SemanticMemory:
                created_at TEXT NOT NULL,
                source_hash TEXT NOT NULL
            )
-        """)
+        """
+        )
        conn.execute("CREATE INDEX IF NOT EXISTS idx_source ON chunks(source)")
        conn.commit()
        conn.close()
@@ -135,8 +142,7 @@ class SemanticMemory:
        # Check if already indexed with same hash
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.execute(
-            "SELECT source_hash FROM chunks WHERE source = ? LIMIT 1",
-            (str(filepath),)
+            "SELECT source_hash FROM chunks WHERE source = ? LIMIT 1", (str(filepath),)
        )
        existing = cursor.fetchone()
        if existing and existing[0] == file_hash:
@@ -161,7 +167,7 @@ class SemanticMemory:
            conn.execute(
                """INSERT INTO chunks (id, source, content, embedding, created_at, source_hash)
                   VALUES (?, ?, ?, ?, ?, ?)""",
-                (chunk_id, str(filepath), chunk_text, json.dumps(embedding), now, file_hash)
+                (chunk_id, str(filepath), chunk_text, json.dumps(embedding), now, file_hash),
            )

        conn.commit()
@@ -173,7 +179,7 @@ class SemanticMemory:
    def _split_into_chunks(self, text: str, max_chunk_size: int = 500) -> list[str]:
        """Split text into semantic chunks."""
        # Split by paragraphs first
-        paragraphs = text.split('\n\n')
+        paragraphs = text.split("\n\n")
        chunks = []

        for para in paragraphs:
@@ -186,7 +192,7 @@ class SemanticMemory:
                chunks.append(para)
            else:
                # Split long paragraphs by sentences
-                sentences = para.replace('. ', '.\n').split('\n')
+                sentences = para.replace(". ", ".\n").split("\n")
                current_chunk = ""

                for sent in sentences:
@@ -223,9 +229,7 @@ class SemanticMemory:
        conn.row_factory = sqlite3.Row

        # Get all chunks (in production, use vector index)
-        rows = conn.execute(
-            "SELECT source, content, embedding FROM chunks"
-        ).fetchall()
+        rows = conn.execute("SELECT source, content, embedding FROM chunks").fetchall()

        conn.close()

@@ -301,8 +305,7 @@ class MemorySearcher:
        if "semantic" in tiers:
            semantic_results = self.semantic.search(query, top_k=5)
            results["semantic"] = [
-                {"content": content, "score": score}
-                for content, score in semantic_results
+                {"content": content, "score": score} for content, score in semantic_results
            ]

        return results
@@ -353,6 +356,7 @@ def memory_search(query: str, top_k: int = 5) -> str:
    # 2. Search runtime vector store (stored facts/conversations)
    try:
        from timmy.memory.vector_store import search_memories
+
        runtime_results = search_memories(query, limit=top_k, min_relevance=0.2)
        for entry in runtime_results:
            label = entry.context_type or "memory"
@@ -387,6 +391,7 @@ def memory_read(query: str = "", top_k: int = 5) -> str:
    # Always include personal facts first
    try:
        from timmy.memory.vector_store import search_memories
+
        facts = search_memories(query or "", limit=top_k, min_relevance=0.0)
        fact_entries = [e for e in facts if (e.context_type or "") == "fact"]
        if fact_entries:
@@ -433,6 +438,7 @@ def memory_write(content: str, context_type: str = "fact") -> str:

    try:
        from timmy.memory.vector_store import store_memory
+
        entry = store_memory(
            content=content.strip(),
            source="agent",
--- a/src/timmy/session.py
+++ b/src/timmy/session.py
@@ -32,13 +32,15 @@ _TOOL_CALL_JSON = re.compile(

 # Matches function-call-style text: memory_search(query="...") etc.
 _FUNC_CALL_TEXT = re.compile(
-    r'\b(?:memory_search|web_search|shell|python|read_file|write_file|list_files|calculator)'
-    r'\s*\([^)]*\)',
+    r"\b(?:memory_search|web_search|shell|python|read_file|write_file|list_files|calculator)"
+    r"\s*\([^)]*\)",
 )

 # Matches chain-of-thought narration lines the model should keep internal
 _COT_PATTERNS = [
-    re.compile(r"^(?:Since |Using |Let me |I'll use |I will use |Here's a possible ).*$", re.MULTILINE),
+    re.compile(
+        r"^(?:Since |Using |Let me |I'll use |I will use |Here's a possible ).*$", re.MULTILINE
+    ),
    re.compile(r"^(?:I found a relevant |This context suggests ).*$", re.MULTILINE),
 ]

@@ -48,6 +50,7 @@ def _get_agent():
    global _agent
    if _agent is None:
        from timmy.agent import create_timmy
+
        try:
            _agent = create_timmy()
            logger.info("Session: Timmy agent initialized (singleton)")
@@ -99,6 +102,7 @@ def reset_session(session_id: Optional[str] = None) -> None:
    sid = session_id or _DEFAULT_SESSION_ID
    try:
        from timmy.conversation import conversation_manager
+
        conversation_manager.clear_context(sid)
    except Exception as exc:
        logger.debug("Session: context clear failed for %s: %s", sid, exc)
@@ -112,10 +116,12 @@ def _extract_facts(message: str) -> None:
    """
    try:
        from timmy.conversation import conversation_manager
+
        name = conversation_manager.extract_user_name(message)
        if name:
            try:
                from timmy.memory_system import memory_system
+
                memory_system.update_user_fact("Name", name)
                logger.info("Session: Learned user name: %s", name)
            except Exception as exc:
--- a/src/timmy/session_logger.py
+++ b/src/timmy/session_logger.py
@@ -6,7 +6,7 @@ including any mistakes or errors that occur during the session."

 import json
 import logging
-from datetime import datetime, date
+from datetime import date, datetime
 from pathlib import Path
 from typing import Any

--- a/src/timmy/thinking.py
+++ b/src/timmy/thinking.py
@@ -75,6 +75,7 @@ Continue your train of thought."""
@dataclass
 class Thought:
    """A single thought in Timmy's inner stream."""
+
    id: str
    content: str
    seed_type: str
@@ -98,9 +99,7 @@ def _get_conn(db_path: Path = _DEFAULT_DB) -> sqlite3.Connection:
        )
        """
    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_thoughts_time ON thoughts(created_at)"
-    )
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_thoughts_time ON thoughts(created_at)")
    conn.commit()
    return conn

@@ -190,9 +189,7 @@ class ThinkingEngine:
    def get_thought(self, thought_id: str) -> Optional[Thought]:
        """Retrieve a single thought by ID."""
        conn = _get_conn(self._db_path)
-        row = conn.execute(
-            "SELECT * FROM thoughts WHERE id = ?", (thought_id,)
-        ).fetchone()
+        row = conn.execute("SELECT * FROM thoughts WHERE id = ?", (thought_id,)).fetchone()
        conn.close()
        return _row_to_thought(row) if row else None

@@ -208,9 +205,7 @@ class ThinkingEngine:
        for _ in range(max_depth):
            if not current_id:
                break
-            row = conn.execute(
-                "SELECT * FROM thoughts WHERE id = ?", (current_id,)
-            ).fetchone()
+            row = conn.execute("SELECT * FROM thoughts WHERE id = ?", (current_id,)).fetchone()
            if not row:
                break
            chain.append(_row_to_thought(row))
@@ -254,8 +249,10 @@ class ThinkingEngine:
    def _seed_from_swarm(self) -> str:
        """Gather recent swarm activity as thought seed."""
        try:
-            from timmy.briefing import _gather_swarm_summary, _gather_task_queue_summary
            from datetime import timedelta
+
+            from timmy.briefing import _gather_swarm_summary, _gather_task_queue_summary
+
            since = datetime.now(timezone.utc) - timedelta(hours=1)
            swarm = _gather_swarm_summary(since)
            tasks = _gather_task_queue_summary()
@@ -272,6 +269,7 @@ class ThinkingEngine:
        """Gather memory context as thought seed."""
        try:
            from timmy.memory_system import memory_system
+
            context = memory_system.get_system_context()
            if context:
                # Truncate to a reasonable size for a thought seed
@@ -299,10 +297,12 @@ class ThinkingEngine:
        """
        try:
            from timmy.session import chat
+
            return chat(prompt, session_id="thinking")
        except Exception:
            # Fallback: create a fresh agent
            from timmy.agent import create_timmy
+
            agent = create_timmy()
            run = agent.run(prompt, stream=False)
            return run.content if hasattr(run, "content") else str(run)
@@ -323,8 +323,7 @@ class ThinkingEngine:
            INSERT INTO thoughts (id, content, seed_type, parent_id, created_at)
            VALUES (?, ?, ?, ?, ?)
            """,
-            (thought.id, thought.content, thought.seed_type,
-             thought.parent_id, thought.created_at),
+            (thought.id, thought.content, thought.seed_type, thought.parent_id, thought.created_at),
        )
        conn.commit()
        conn.close()
@@ -333,7 +332,8 @@ class ThinkingEngine:
    def _log_event(self, thought: Thought) -> None:
        """Log the thought as a swarm event."""
        try:
-            from swarm.event_log import log_event, EventType
+            from swarm.event_log import EventType, log_event
+
            log_event(
                EventType.TIMMY_THOUGHT,
                source="thinking-engine",
@@ -351,12 +351,16 @@ class ThinkingEngine:
        """Broadcast the thought to WebSocket clients."""
        try:
            from infrastructure.ws_manager.handler import ws_manager
-            await ws_manager.broadcast("timmy_thought", {
-                "thought_id": thought.id,
-                "content": thought.content,
-                "seed_type": thought.seed_type,
-                "created_at": thought.created_at,
-            })
+
+            await ws_manager.broadcast(
+                "timmy_thought",
+                {
+                    "thought_id": thought.id,
+                    "content": thought.content,
+                    "seed_type": thought.seed_type,
+                    "created_at": thought.created_at,
+                },
+            )
        except Exception as exc:
            logger.debug("Failed to broadcast thought: %s", exc)

--- a/src/timmy/tools.py
+++ b/src/timmy/tools.py
@@ -227,11 +227,7 @@ def create_aider_tool(base_path: Path):
                )

                if result.returncode == 0:
-                    return (
-                        result.stdout
-                        if result.stdout
-                        else "Code changes applied successfully"
-                    )
+                    return result.stdout if result.stdout else "Code changes applied successfully"
                else:
                    return f"Aider error: {result.stderr}"
            except FileNotFoundError:
@@ -354,7 +350,7 @@ def consult_grok(query: str) -> str:
        Grok's response text, or an error/status message.
    """
    from config import settings
-    from timmy.backends import grok_available, get_grok_backend
+    from timmy.backends import get_grok_backend, grok_available

    if not grok_available():
        return (
@@ -385,9 +381,7 @@ def consult_grok(query: str) -> str:
            ln = get_ln_backend()
            sats = min(settings.grok_max_sats_per_query, 100)
            inv = ln.create_invoice(sats, f"Grok query: {query[:50]}")
-            invoice_info = (
-                f"\n[Lightning invoice: {sats} sats — {inv.payment_request[:40]}...]"
-            )
+            invoice_info = f"\n[Lightning invoice: {sats} sats — {inv.payment_request[:40]}...]"
        except Exception:
            pass

@@ -447,7 +441,7 @@ def create_full_toolkit(base_dir: str | Path | None = None):

    # Memory search and write — persistent recall across all channels
    try:
-        from timmy.semantic_memory import memory_search, memory_write, memory_read
+        from timmy.semantic_memory import memory_read, memory_search, memory_write

        toolkit.register(memory_search, name="memory_search")
        toolkit.register(memory_write, name="memory_write")
@@ -473,6 +467,7 @@ def create_full_toolkit(base_dir: str | Path | None = None):
                Task ID and confirmation that background execution has started.
            """
            import asyncio
+
            task_id = None

            async def _launch():
@@ -502,11 +497,7 @@ def create_full_toolkit(base_dir: str | Path | None = None):

    # System introspection - query runtime environment (sovereign self-knowledge)
    try:
-        from timmy.tools_intro import (
-            get_system_info,
-            check_ollama_health,
-            get_memory_status,
-        )
+        from timmy.tools_intro import check_ollama_health, get_memory_status, get_system_info

        toolkit.register(get_system_info, name="get_system_info")
        toolkit.register(check_ollama_health, name="check_ollama_health")
@@ -526,6 +517,60 @@ def create_full_toolkit(base_dir: str | Path | None = None):
    return toolkit


+def create_experiment_tools(base_dir: str | Path | None = None):
+    """Create tools for the experiment agent (Lab).
+
+    Includes: prepare_experiment, run_experiment, evaluate_result,
+    plus shell + file ops for editing training code.
+    """
+    if not _AGNO_TOOLS_AVAILABLE:
+        raise ImportError(f"Agno tools not available: {_ImportError}")
+
+    from config import settings
+
+    toolkit = Toolkit(name="experiment")
+
+    from timmy.autoresearch import evaluate_result, prepare_experiment, run_experiment
+
+    workspace = (
+        Path(base_dir) if base_dir else Path(settings.repo_root) / settings.autoresearch_workspace
+    )
+
+    def _prepare(repo_url: str = "https://github.com/karpathy/autoresearch.git") -> str:
+        """Clone and prepare an autoresearch experiment workspace."""
+        return prepare_experiment(workspace, repo_url)
+
+    def _run(timeout: int = 0) -> str:
+        """Run a single training experiment with wall-clock timeout."""
+        t = timeout or settings.autoresearch_time_budget
+        result = run_experiment(workspace, timeout=t, metric_name=settings.autoresearch_metric)
+        if result["success"] and result["metric"] is not None:
+            return (
+                f"{settings.autoresearch_metric}: {result['metric']:.4f} ({result['duration_s']}s)"
+            )
+        return result.get("error") or "Experiment failed"
+
+    def _evaluate(current: float, baseline: float) -> str:
+        """Compare current metric against baseline."""
+        return evaluate_result(current, baseline, metric_name=settings.autoresearch_metric)
+
+    toolkit.register(_prepare, name="prepare_experiment")
+    toolkit.register(_run, name="run_experiment")
+    toolkit.register(_evaluate, name="evaluate_result")
+
+    # Also give Lab access to file + shell tools for editing train.py
+    shell_tools = ShellTools()
+    toolkit.register(shell_tools.run_shell_command, name="shell")
+
+    base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
+    file_tools = FileTools(base_dir=base_path)
+    toolkit.register(file_tools.read_file, name="read_file")
+    toolkit.register(file_tools.save_file, name="write_file")
+    toolkit.register(file_tools.list_files, name="list_files")
+
+    return toolkit
+
+
 # Mapping of agent IDs to their toolkits
 AGENT_TOOLKITS: dict[str, Callable[[], Toolkit]] = {
    "echo": create_research_tools,
@@ -534,6 +579,7 @@ AGENT_TOOLKITS: dict[str, Callable[[], Toolkit]] = {
    "seer": create_data_tools,
    "forge": create_code_tools,
    "quill": create_writing_tools,
+    "lab": create_experiment_tools,
    "pixel": lambda base_dir=None: _create_stub_toolkit("pixel"),
    "lyra": lambda base_dir=None: _create_stub_toolkit("lyra"),
    "reel": lambda base_dir=None: _create_stub_toolkit("reel"),
@@ -553,9 +599,7 @@ def _create_stub_toolkit(name: str):
    return toolkit


-def get_tools_for_agent(
-    agent_id: str, base_dir: str | Path | None = None
-) -> Toolkit | None:
+def get_tools_for_agent(agent_id: str, base_dir: str | Path | None = None) -> Toolkit | None:
    """Get the appropriate toolkit for an agent.

    Args:
@@ -643,6 +687,21 @@ def get_all_available_tools() -> dict[str, dict]:
            "description": "Local AI coding assistant using Ollama (qwen2.5:14b or deepseek-coder)",
            "available_in": ["forge", "orchestrator"],
        },
+        "prepare_experiment": {
+            "name": "Prepare Experiment",
+            "description": "Clone autoresearch repo and run data preparation for ML experiments",
+            "available_in": ["lab", "orchestrator"],
+        },
+        "run_experiment": {
+            "name": "Run Experiment",
+            "description": "Execute a time-boxed ML training experiment and capture metrics",
+            "available_in": ["lab", "orchestrator"],
+        },
+        "evaluate_result": {
+            "name": "Evaluate Result",
+            "description": "Compare experiment metric against baseline to assess improvement",
+            "available_in": ["lab", "orchestrator"],
+        },
    }

    # ── Git tools ─────────────────────────────────────────────────────────────
--- a/src/timmy/tools_delegation/init.py
+++ b/src/timmy/tools_delegation/init.py
@@ -20,7 +20,9 @@ _VALID_AGENTS: dict[str, str] = {
 }


-def delegate_task(agent_name: str, task_description: str, priority: str = "normal") -> dict[str, Any]:
+def delegate_task(
+    agent_name: str, task_description: str, priority: str = "normal"
+) -> dict[str, Any]:
    """Record a delegation intent to another agent.

    Args:
@@ -44,7 +46,9 @@ def delegate_task(agent_name: str, task_description: str, priority: str = "norma
    if priority not in valid_priorities:
        priority = "normal"

-    logger.info("Delegation intent: %s → %s (priority=%s)", agent_name, task_description[:80], priority)
+    logger.info(
+        "Delegation intent: %s → %s (priority=%s)", agent_name, task_description[:80], priority
+    )

    return {
        "success": True,
--- a/src/timmy/tools_intro/init.py
+++ b/src/timmy/tools_intro/init.py
@@ -65,9 +65,7 @@ def _get_ollama_model() -> str:
            models = response.json().get("models", [])
            # Check if configured model is available
            for model in models:
-                if model.get("name", "").startswith(
-                    settings.ollama_model.split(":")[0]
-                ):
+                if model.get("name", "").startswith(settings.ollama_model.split(":")[0]):
                    return settings.ollama_model

            # Fallback: return configured model
@@ -139,9 +137,7 @@ def get_memory_status() -> dict[str, Any]:
    if tier1_exists:
        lines = memory_md.read_text().splitlines()
        tier1_info["line_count"] = len(lines)
-        tier1_info["sections"] = [
-            ln.lstrip("# ").strip() for ln in lines if ln.startswith("## ")
-        ]
+        tier1_info["sections"] = [ln.lstrip("# ").strip() for ln in lines if ln.startswith("## ")]

    # Vault — scan all subdirs under memory/
    vault_root = repo_root / "memory"
@@ -233,13 +229,15 @@ def get_agent_roster() -> dict[str, Any]:

        roster = []
        for persona in _PERSONAS:
-            roster.append({
-                "id": persona["agent_id"],
-                "name": persona["name"],
-                "status": "available",
-                "capabilities": ", ".join(persona.get("tools", [])),
-                "role": persona.get("role", ""),
-            })
+            roster.append(
+                {
+                    "id": persona["agent_id"],
+                    "name": persona["name"],
+                    "status": "available",
+                    "capabilities": ", ".join(persona.get("tools", [])),
+                    "role": persona.get("role", ""),
+                }
+            )

        return {
            "agents": roster,
--- a/src/timmy_serve/app.py
+++ b/src/timmy_serve/app.py
@@ -56,15 +56,13 @@ class RateLimitMiddleware(BaseHTTPMiddleware):

            # Clean up old requests
            self.requests[client_ip] = [
-                t for t in self.requests[client_ip] 
-                if now - t < self.window
+                t for t in self.requests[client_ip] if now - t < self.window
            ]

            if len(self.requests[client_ip]) >= self.limit:
                logger.warning("Rate limit exceeded for %s", client_ip)
                return JSONResponse(
-                    status_code=429,
-                    content={"error": "Rate limit exceeded. Try again later."}
+                    status_code=429, content={"error": "Rate limit exceeded. Try again later."}
                )

            self.requests[client_ip].append(now)
--- a/src/timmy_serve/cli.py
+++ b/src/timmy_serve/cli.py
@@ -33,6 +33,7 @@ def start(
        return

    import uvicorn
+
    from timmy_serve.app import create_timmy_serve_app

    serve_app = create_timmy_serve_app()
--- a/src/timmy_serve/inter_agent.py
+++ b/src/timmy_serve/inter_agent.py
@@ -23,9 +23,7 @@ class AgentMessage:
    to_agent: str = ""
    content: str = ""
    message_type: str = "text"  # text | command | response | error
-    timestamp: str = field(
-        default_factory=lambda: datetime.now(timezone.utc).isoformat()
-    )
+    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
    replied: bool = False


@@ -56,7 +54,10 @@ class InterAgentMessenger:
        self._all_messages.append(msg)
        logger.info(
            "Message %s → %s: %s (%s)",
-            from_agent, to_agent, content[:50], message_type,
+            from_agent,
+            to_agent,
+            content[:50],
+            message_type,
        )
        return msg

--- a/src/timmy_serve/voice_tts.py
+++ b/src/timmy_serve/voice_tts.py
@@ -26,6 +26,7 @@ class VoiceTTS:
    def _init_engine(self) -> None:
        try:
            import pyttsx3
+
            self._engine = pyttsx3.init()
            self._engine.setProperty("rate", self._rate)
            self._engine.setProperty("volume", self._volume)
--- a/Show More
+++ b/Show More