feat(#667 ): codebase_genome.py — test stub generator for uncovered functions

AST-based tool that scans Python files, extracts function/method signatures, and generates pytest test stubs for functions without existing tests. Usage: python3 codebase_genome.py /path/to/repo python3 codebase_genome.py /path/to/repo -o tests/test_genome_generated.py Features: - AST parsing (no imports required, handles syntax errors gracefully) - Extracts: function name, args, return type, decorators, class context - Detects existing tests to avoid duplicates - Generates: basic test + edge case test per function - Skips private/dunder methods - Configurable limit (--limit N) Generated 30 test stubs for timmy-home as proof of concept.
feat: Big Brain quality benchmark — gemma3:1b vs gemma3:27b (issue #576 )
2026-04-14 23:39:13 -04:00 · 2026-04-13 20:49:02 -04:00
6 changed files with 1490 additions and 0 deletions
--- a/codebase_genome.py
+++ b/codebase_genome.py
@@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+"""
+codebase_genome.py — Analyze a repo and generate test stubs for uncovered functions.
+
+Scans Python files, extracts function/class/method signatures via AST,
+and generates pytest test cases with edge cases.
+
+Usage:
+    python3 codebase_genome.py /path/to/repo
+    python3 codebase_genome.py /path/to/repo --output tests/test_genome_generated.py
+"""
+import ast
+import os
+import sys
+import argparse
+from pathlib import Path
+
+
+class FunctionInfo:
+    def __init__(self, name, filepath, lineno, args, returns, decorators, is_method=False, class_name=None):
+        self.name = name
+        self.filepath = filepath
+        self.lineno = lineno
+        self.args = args  # list of arg names
+        self.returns = returns  # return annotation or None
+        self.decorators = decorators
+        self.is_method = is_method
+        self.class_name = class_name
+
+    @property
+    def qualified_name(self):
+        if self.class_name:
+            return f"{self.class_name}.{self.name}"
+        return self.name
+
+    @property
+    def import_path(self):
+        """Module path for import (e.g., 'mymodule.sub.Class.method')."""
+        rel = Path(self.filepath).with_suffix('')
+        parts = list(rel.parts)
+        # Remove common prefixes
+        if parts and parts[0] in ('src', 'lib'):
+            parts = parts[1:]
+        module = '.'.join(parts)
+        if self.class_name:
+            return f"{module}.{self.class_name}.{self.name}"
+        return f"{module}.{self.name}"
+
+    @property
+    def module_path(self):
+        rel = Path(self.filepath).with_suffix('')
+        parts = list(rel.parts)
+        if parts and parts[0] in ('src', 'lib'):
+            parts = parts[1:]
+        return '.'.join(parts)
+
+
+def extract_functions(filepath: str) -> list:
+    """Extract all function definitions from a Python file via AST."""
+    try:
+        source = open(filepath).read()
+        tree = ast.parse(source, filename=filepath)
+    except (SyntaxError, UnicodeDecodeError):
+        return []
+
+    functions = []
+
+    class FuncVisitor(ast.NodeVisitor):
+        def __init__(self):
+            self.current_class = None
+
+        def visit_ClassDef(self, node):
+            old_class = self.current_class
+            self.current_class = node.name
+            self.generic_visit(node)
+            self.current_class = old_class
+
+        def visit_FunctionDef(self, node):
+            args = [a.arg for a in node.args.args]
+            if args and args[0] == 'self':
+                args = args[1:]
+
+            returns = None
+            if node.returns:
+                if isinstance(node.returns, ast.Name):
+                    returns = node.returns.id
+                elif isinstance(node.returns, ast.Constant):
+                    returns = str(node.returns.value)
+
+            decorators = []
+            for d in node.decorator_list:
+                if isinstance(d, ast.Name):
+                    decorators.append(d.id)
+                elif isinstance(d, ast.Attribute):
+                    decorators.append(d.attr)
+
+            functions.append(FunctionInfo(
+                name=node.name,
+                filepath=filepath,
+                lineno=node.lineno,
+                args=args,
+                returns=returns,
+                decorators=decorators,
+                is_method=self.current_class is not None,
+                class_name=self.current_class,
+            ))
+            self.generic_visit(node)
+
+        visit_AsyncFunctionDef = visit_FunctionDef
+
+    visitor = FuncVisitor()
+    visitor.visit(tree)
+    return functions
+
+
+def generate_test(func: FunctionInfo, existing_tests: set) -> str:
+    """Generate a pytest test function for a given function."""
+    if func.name in existing_tests:
+        return ''
+
+    # Skip private/dunder methods
+    if func.name.startswith('_') and not func.name.startswith('__'):
+        return ''
+    if func.name.startswith('__') and func.name.endswith('__'):
+        return ''
+
+    lines = []
+
+    # Generate imports
+    module = func.module_path.replace('/', '.').lstrip('.')
+    if func.class_name:
+        lines.append(f"from {module} import {func.class_name}")
+    else:
+        lines.append(f"from {module} import {func.name}")
+    lines.append('')
+    lines.append('')
+
+    # Test function name
+    test_name = f"test_{func.qualified_name.replace('.', '_')}"
+
+    # Determine args for the test call
+    args_str = ', '.join(func.args)
+
+    lines.append(f"def {test_name}():")
+    lines.append(f'    """Test {func.qualified_name} (line {func.lineno} in {func.filepath})."""')
+
+    if func.is_method:
+        lines.append(f"    # TODO: instantiate {func.class_name} with valid args")
+        lines.append(f"    obj = {func.class_name}()")
+        lines.append(f"    result = obj.{func.name}({', '.join('None' for _ in func.args) if func.args else ''})")
+    else:
+        if func.args:
+            lines.append(f"    # TODO: provide valid arguments for: {args_str}")
+            lines.append(f"    result = {func.name}({', '.join('None' for _ in func.args)})")
+        else:
+            lines.append(f"    result = {func.name}()")
+
+    lines.append(f"    assert result is not None or result is None  # TODO: real assertion")
+    lines.append('')
+    lines.append('')
+
+    # Edge cases
+    lines.append(f"def {test_name}_edge_cases():")
+    lines.append(f'    """Edge cases for {func.qualified_name}."""')
+    if func.args:
+        lines.append(f"    # Test with empty/zero/None args")
+        if func.is_method:
+            lines.append(f"    obj = {func.class_name}()")
+            for arg in func.args:
+                lines.append(f"    # obj.{func.name}({arg}=...)  # TODO: test with invalid {arg}")
+        else:
+            for arg in func.args:
+                lines.append(f"    # {func.name}({arg}=...)  # TODO: test with invalid {arg}")
+    else:
+        lines.append(f"    # {func.qualified_name} takes no args — test idempotency")
+        if func.is_method:
+            lines.append(f"    obj = {func.class_name}()")
+            lines.append(f"    r1 = obj.{func.name}()")
+            lines.append(f"    r2 = obj.{func.name}()")
+            lines.append(f"    # assert r1 == r2  # TODO: uncomment if deterministic")
+        else:
+            lines.append(f"    r1 = {func.name}()")
+            lines.append(f"    r2 = {func.name}()")
+            lines.append(f"    # assert r1 == r2  # TODO: uncomment if deterministic")
+    lines.append('')
+    lines.append('')
+
+    return '\n'.join(lines)
+
+
+def scan_repo(repo_path: str) -> list:
+    """Scan all Python files in a repo and extract functions."""
+    all_functions = []
+    for root, dirs, files in os.walk(repo_path):
+        # Skip hidden dirs, __pycache__, .git, venv, node_modules
+        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('__pycache__', 'venv', 'node_modules', 'env')]
+        for f in files:
+            if f.endswith('.py') and not f.startswith('_'):
+                filepath = os.path.join(root, f)
+                relpath = os.path.relpath(filepath, repo_path)
+                funcs = extract_functions(filepath)
+                # Update filepath to relative
+                for func in funcs:
+                    func.filepath = relpath
+                all_functions.extend(funcs)
+    return all_functions
+
+
+def find_existing_tests(repo_path: str) -> set:
+    """Find function names that already have tests."""
+    tested = set()
+    tests_dir = os.path.join(repo_path, 'tests')
+    if not os.path.isdir(tests_dir):
+        return tested
+    for root, dirs, files in os.walk(tests_dir):
+        for f in files:
+            if f.startswith('test_') and f.endswith('.py'):
+                try:
+                    source = open(os.path.join(root, f)).read()
+                    tree = ast.parse(source)
+                    for node in ast.walk(tree):
+                        if isinstance(node, ast.FunctionDef) and node.name.startswith('test_'):
+                            # Extract function name from test name
+                            name = node.name[5:]  # strip 'test_'
+                            tested.add(name)
+                except (SyntaxError, UnicodeDecodeError):
+                    pass
+    return tested
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Generate test stubs for uncovered functions')
+    parser.add_argument('repo', help='Path to repository')
+    parser.add_argument('--output', '-o', default=None, help='Output file (default: stdout)')
+    parser.add_argument('--limit', '-n', type=int, default=50, help='Max tests to generate')
+    args = parser.parse_args()
+
+    repo = os.path.abspath(args.repo)
+    if not os.path.isdir(repo):
+        print(f"Error: {repo} is not a directory", file=sys.stderr)
+        sys.exit(1)
+
+    functions = scan_repo(repo)
+    existing = find_existing_tests(repo)
+
+    # Filter to untested functions
+    untested = [f for f in functions if f.name not in existing and not f.name.startswith('_')]
+    print(f"Found {len(functions)} functions, {len(untested)} untested", file=sys.stderr)
+
+    # Generate tests
+    output = []
+    output.append('"""Auto-generated test stubs from codebase_genome.py.\n')
+    output.append('These are starting points — fill in real assertions and args.\n"""')
+    output.append('import pytest')
+    output.append('')
+
+    generated = 0
+    for func in untested[:args.limit]:
+        test = generate_test(func, set())
+        if test:
+            output.append(test)
+            generated += 1
+
+    content = '\n'.join(output)
+
+    if args.output:
+        with open(args.output, 'w') as f:
+            f.write(content)
+        print(f"Generated {generated} test stubs → {args.output}", file=sys.stderr)
+    else:
+        print(content)
+
+
+if __name__ == '__main__':
+    main()
--- a/infrastructure/emacs-control-plane/README.md
+++ b/infrastructure/emacs-control-plane/README.md
@@ -0,0 +1,142 @@
+# Emacs Sovereign Control Plane
+
+Real-time, programmable orchestration hub for the Timmy Foundation fleet.
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                    Emacs Control Plane                       │
+│                                                             │
+│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐        │
+│  │  dispatch.org│  │  shared     │  │  org-babel  │        │
+│  │  (Task Queue)│  │  buffers    │  │  notebooks   │        │
+│  └──────┬──────┘  └──────┬──────┘  └──────┬──────┘        │
+│         │                │                │                │
+│         └────────────────┼────────────────┘                │
+│                          │                                  │
+│                    ┌─────▼─────┐                           │
+│                    │  Emacs    │                           │
+│                    │  Daemon   │                           │
+│                    │  (bezalel)│                           │
+│                    └─────┬─────┘                           │
+└──────────────────────────┼──────────────────────────────────┘
+                           │
+        ┌──────────────────┼──────────────────┐
+        │                  │                  │
+   ┌────▼────┐       ┌────▼────┐       ┌────▼────┐
+   │  Ezra   │       │ Allegro │       │  Timmy  │
+   │  (VPS)  │       │  (VPS)  │       │  (Mac)  │
+   └─────────┘       └─────────┘       └─────────┘
+```
+
+## Infrastructure
+
+| Component | Location | Details |
+|-----------|----------|---------|
+| Daemon Host | Bezalel (`159.203.146.185`) | Shared Emacs daemon |
+| Socket Path | `/root/.emacs.d/server/bezalel` | emacsclient socket |
+| Dispatch Hub | `/srv/fleet/workspace/dispatch.org` | Central task queue |
+| Wrapper | `/usr/local/bin/fleet-append` | Quick message append |
+
+## Quick Start
+
+### From Local Machine (Timmy)
+```bash
+# Append a message to the fleet log
+scripts/fleet_dispatch.sh append "Status: all systems nominal"
+
+# Check for pending tasks assigned to Timmy
+scripts/fleet_dispatch.sh poll timmy
+
+# Claim a task
+scripts/fleet_dispatch.sh claim 42 timmy
+
+# Report task completion
+scripts/emacs_fleet_bridge.py complete 42 "PR merged: #123"
+```
+
+### From Other VPS Agents (Ezra, Allegro, etc.)
+```bash
+# Direct emacsclient via SSH
+ssh root@bezalel 'emacsclient -s /root/.emacs.d/server/bezalel -e "(your-elisp-here)"'
+
+# Or use the wrapper
+ssh root@bezalel '/usr/local/bin/fleet-append "Ezra: task #42 complete"'
+```
+
+## dispatch.org Structure
+
+The central dispatch hub uses Org mode format:
+
+```org
+* TODO [timmy] Review PR #123 from gitea
+  SCHEDULED: <2026-04-13 Sun>
+  :PROPERTIES:
+  :PRIORITY: A
+  :ASSIGNEE: timmy
+  :GITEA_PR: https://forge.alexanderwhitestone.com/...
+  :END:
+
+* IN_PROGRESS [ezra] Deploy monitoring to VPS
+  SCHEDULED: <2026-04-13 Sun>
+  :PROPERTIES:
+  :PRIORITY: B
+  :ASSIGNEE: ezra
+  :STARTED: 2026-04-13T15:30:00Z
+  :END:
+
+* DONE [allegro] Fix cron reliability
+  CLOSED: [2026-04-13 Sun 14:00]
+  :PROPERTIES:
+  :ASSIGNEE: allegro
+  :RESULT: PR #456 merged
+  :END:
+```
+
+### Status Keywords
+- `TODO` — Available for claiming
+- `IN_PROGRESS` — Being worked on
+- `WAITING` — Blocked on external dependency
+- `DONE` — Completed
+- `CANCELLED` — No longer needed
+
+### Priority Levels
+- `[#A]` — Critical / P0
+- `[#B]` — Important / P1
+- `[#C]` — Normal / P2
+
+## Agent Workflow
+
+1. **Poll:** Check `dispatch.org` for `TODO` items matching your agent name
+2. **Claim:** Update status from `TODO` to `IN_PROGRESS`, add `:STARTED:` timestamp
+3. **Execute:** Do the work (implement, deploy, test, etc.)
+4. **Report:** Update status to `DONE`, add `:RESULT:` property with outcome
+
+## Integration with Existing Systems
+
+### Gitea Issues
+- `dispatch.org` tasks can reference Gitea issues via `:GITEA_PR:` or `:GITEA_ISSUE:` properties
+- Completion can auto-close Gitea issues via API
+
+### Hermes Cron
+- Hermes cron jobs can check `dispatch.org` before running
+- Tasks in `dispatch.org` take priority over ambient issue burning
+
+### Nostr Protocol
+- Heartbeats still go through Nostr (kind 1)
+- `dispatch.org` is for tactical coordination, Nostr is for strategic announcements
+
+## Files
+
+```
+infrastructure/emacs-control-plane/
+├── README.md                   # This file
+├── dispatch.org.template       # Template dispatch file
+└── fleet_bridge.el             # Emacs Lisp helpers
+
+scripts/
+├── fleet_dispatch.sh           # Shell wrapper for fleet operations
+├── emacs_fleet_bridge.py       # Python bridge for Emacs daemon
+└── emacs_task_poller.py        # Poll for tasks assigned to an agent
+```
--- a/infrastructure/emacs-control-plane/dispatch.org.template
+++ b/infrastructure/emacs-control-plane/dispatch.org.template
@@ -0,0 +1,50 @@
+#+TITLE: Fleet Dispatch Hub
+#+AUTHOR: Timmy Foundation
+#+DATE: 2026-04-13
+#+PROPERTY: header-args :tangle no
+
+* Overview
+This is the central task queue for the Timmy Foundation fleet.
+Agents poll this file for =TODO= items matching their name.
+
+* How to Use
+1. Agents: Poll for =TODO= items with your assignee tag
+2. Claim: Move to =IN_PROGRESS= with =:STARTED:= timestamp
+3. Complete: Move to =DONE= with =:RESULT:= property
+
+* Fleet Status
+** Heartbeats
+- timmy: LAST_HEARTBEAT <2026-04-13 Sun 15:00>
+- ezra: LAST_HEARTBEAT <2026-04-13 Sun 15:00>
+- allegro: LAST_HEARTBEAT <2026-04-13 Sun 14:55>
+- bezalel: LAST_HEARTBEAT <2026-04-13 Sun 15:00>
+
+* Tasks
+** TODO [timmy] Example task — review pending PRs
+   SCHEDULED: <2026-04-13 Sun>
+   :PROPERTIES:
+   :PRIORITY: B
+   :ASSIGNEE: timmy
+   :GITEA_ISSUE: https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-home/issues/590
+   :END:
+   Check all open PRs across fleet repos and triage.
+
+** TODO [ezra] Example task — run fleet health check
+   SCHEDULED: <2026-04-13 Sun>
+   :PROPERTIES:
+   :PRIORITY: C
+   :ASSIGNEE: ezra
+   :END:
+   SSH into each VPS and verify services are running.
+
+** TODO [allegro] Example task — update cron job configs
+   SCHEDULED: <2026-04-13 Sun>
+   :PROPERTIES:
+   :PRIORITY: C
+   :ASSIGNEE: allegro
+   :END:
+   Review and update cron job definitions in timmy-config.
+
+* Completed
+#+BEGIN: clocktable :scope file :maxlevel 2
+#+END:
--- a/scripts/fleet_dispatch.sh
+++ b/scripts/fleet_dispatch.sh
@@ -0,0 +1,202 @@
+#!/bin/bash
+# ============================================================================
+# Fleet Dispatch — Shell wrapper for Emacs Control Plane operations
+# ============================================================================
+#
+# Usage:
+#   scripts/fleet_dispatch.sh append "Message text"
+#   scripts/fleet_dispatch.sh poll [agent_name]
+#   scripts/fleet_dispatch.sh claim TASK_ID agent_name
+#   scripts/fleet_dispatch.sh complete TASK_ID "Result text"
+#   scripts/fleet_dispatch.sh status
+#
+# Environment:
+#   FLEET_DAEMON_HOST    — Bezalel host (default: 159.203.146.185)
+#   FLEET_DAEMON_USER    — SSH user (default: root)
+#   FLEET_DAEMON_SOCKET  — Emacs socket path (default: /root/.emacs.d/server/bezalel)
+#   FLEET_DISPATCH_FILE  — Path to dispatch.org on remote (default: /srv/fleet/workspace/dispatch.org)
+# ============================================================================
+
+set -euo pipefail
+
+# ── Configuration ──────────────────────────────────────────────────────────
+FLEET_DAEMON_HOST="${FLEET_DAEMON_HOST:-159.203.146.185}"
+FLEET_DAEMON_USER="${FLEET_DAEMON_USER:-root}"
+FLEET_DAEMON_SOCKET="${FLEET_DAEMON_SOCKET:-/root/.emacs.d/server/bezalel}"
+FLEET_DISPATCH_FILE="${FLEET_DISPATCH_FILE:-/srv/fleet/workspace/dispatch.org}"
+
+# Colors
+GREEN='\033[0;32m'
+CYAN='\033[0;36m'
+YELLOW='\033[0;33m'
+RED='\033[0;31m'
+NC='\033[0m'
+
+# ── Helper: Run emacsclient command on Bezalel ─────────────────────────────
+run_emacs() {
+    local elisp="$1"
+    ssh "${FLEET_DAEMON_USER}@${FLEET_DAEMON_HOST}" \
+        "emacsclient -s ${FLEET_DAEMON_SOCKET} -e '${elisp}'" 2>/dev/null
+}
+
+# ── Helper: Read dispatch.org via SSH ──────────────────────────────────────
+read_dispatch() {
+    ssh "${FLEET_DAEMON_USER}@${FLEET_DAEMON_HOST}" \
+        "cat ${FLEET_DISPATCH_FILE}" 2>/dev/null
+}
+
+# ── Helper: Write dispatch.org via SSH ─────────────────────────────────────
+write_dispatch() {
+    ssh "${FLEET_DAEMON_USER}@${FLEET_DAEMON_HOST}" \
+        "cat > ${FLEET_DISPATCH_FILE}" 2>/dev/null
+}
+
+# ── Commands ───────────────────────────────────────────────────────────────
+
+cmd_append() {
+    local message="${1:?Usage: fleet_dispatch.sh append \"message\"}"
+    local timestamp
+    timestamp=$(date -u +"%Y-%m-%d %H:%M:%S UTC")
+    
+    echo -e "${CYAN}Appending to fleet log...${NC}"
+    
+    # Use the fleet-append wrapper on Bezalel if available, otherwise emacsclient
+    if ssh "${FLEET_DAEMON_USER}@${FLEET_DAEMON_HOST}" "which fleet-append" &>/dev/null; then
+        ssh "${FLEET_DAEMON_USER}@${FLEET_DAEMON_HOST}" \
+            "fleet-append '${timestamp} — ${message}'"
+    else
+        run_emacs "(with-current-buffer (find-file-noselect \"${FLEET_DISPATCH_FILE}\") (goto-char (point-max)) (insert \"\\n- ${timestamp} — ${message}\") (save-buffer))"
+    fi
+    
+    echo -e "${GREEN}✓ Appended: ${message}${NC}"
+}
+
+cmd_poll() {
+    local agent="${1:-}"
+    
+    echo -e "${CYAN}Polling dispatch.org for tasks...${NC}"
+    
+    local content
+    content=$(read_dispatch)
+    
+    if [ -z "$content" ]; then
+        echo -e "${RED}Could not read dispatch.org${NC}"
+        return 1
+    fi
+    
+    # Filter TODO items, optionally by agent
+    echo -e "${YELLOW}=== Pending Tasks ===${NC}"
+    if [ -n "$agent" ]; then
+        echo "$content" | grep -E "^\*\* TODO \[${agent}\]" || echo "  No tasks for ${agent}"
+    else
+        echo "$content" | grep -E "^\*\* TODO " || echo "  No pending tasks"
+    fi
+}
+
+cmd_claim() {
+    local task_id="${1:?Usage: fleet_dispatch.sh claim TASK_ID agent}"
+    local agent="${2:?Usage: fleet_dispatch.sh claim TASK_ID agent}"
+    local timestamp
+    timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+    
+    echo -e "${CYAN}Claiming task #${task_id} for ${agent}...${NC}"
+    
+    # Use emacsclient to update the Org heading
+    run_emacs "(progn (with-current-buffer (find-file-noselect \"${FLEET_DISPATCH_FILE}\") (org-mode) (goto-char (point-min)) (if (re-search-forward (format \"^\\\\*\\\\* TODO.*#%s\" \"${task_id}\") nil t) (progn (org-todo \"IN_PROGRESS\") (org-set-property \"STARTED\" \"${timestamp}\") (save-buffer) (message \"Task %s claimed\" \"${task_id}\")) (message \"Task %s not found\" \"${task_id}\"))))"
+    
+    echo -e "${GREEN}✓ Task #${task_id} claimed by ${agent}${NC}"
+}
+
+cmd_complete() {
+    local task_id="${1:?Usage: fleet_dispatch.sh complete TASK_ID \"result\"}"
+    local result="${2:-Completed}"
+    local timestamp
+    timestamp=$(date -u +"%Y-%m-%d %H:%M")
+    
+    echo -e "${CYAN}Completing task #${task_id}...${NC}"
+    
+    run_emacs "(progn (with-current-buffer (find-file-noselect \"${FLEET_DISPATCH_FILE}\") (org-mode) (goto-char (point-min)) (if (re-search-forward (format \"^\\\\*\\\\* IN_PROGRESS.*#%s\" \"${task_id}\") nil t) (progn (org-todo \"DONE\") (org-set-property \"RESULT\" \"${result}\") (org-add-planning-info 'closed (org-current-effective-time)) (save-buffer) (message \"Task %s completed\" \"${task_id}\")) (message \"Task %s not found\" \"${task_id}\"))))"
+    
+    echo -e "${GREEN}✓ Task #${task_id} completed: ${result}${NC}"
+}
+
+cmd_status() {
+    echo -e "${CYAN}Fleet Control Plane Status${NC}"
+    echo -e "  Host:    ${FLEET_DAEMON_HOST}"
+    echo -e "  Socket:  ${FLEET_DAEMON_SOCKET}"
+    echo -e "  Dispatch: ${FLEET_DISPATCH_FILE}"
+    echo ""
+    
+    # Test connectivity
+    if ssh -o ConnectTimeout=5 "${FLEET_DAEMON_USER}@${FLEET_DAEMON_HOST}" "echo ok" &>/dev/null; then
+        echo -e "  SSH:     ${GREEN}✓ reachable${NC}"
+    else
+        echo -e "  SSH:     ${RED}✗ unreachable${NC}"
+        return 1
+    fi
+    
+    # Test emacs daemon
+    local daemon_status
+    daemon_status=$(run_emacs "(if (server-running-p) \"running\" \"stopped\")" 2>/dev/null || echo "error")
+    if [ "$daemon_status" = "\"running\"" ]; then
+        echo -e "  Daemon:  ${GREEN}✓ running${NC}"
+    else
+        echo -e "  Daemon:  ${RED}✗ ${daemon_status}${NC}"
+    fi
+    
+    # Count tasks
+    local content
+    content=$(read_dispatch 2>/dev/null || echo "")
+    if [ -n "$content" ]; then
+        local todo_count in_progress_count done_count
+        todo_count=$(echo "$content" | grep -c "^\*\* TODO " || echo 0)
+        in_progress_count=$(echo "$content" | grep -c "^\*\* IN_PROGRESS " || echo 0)
+        done_count=$(echo "$content" | grep -c "^\*\* DONE " || echo 0)
+        
+        echo -e "  Tasks:   ${YELLOW}${todo_count} TODO${NC}, ${CYAN}${in_progress_count} IN_PROGRESS${NC}, ${GREEN}${done_count} DONE${NC}"
+    fi
+}
+
+# ── Main ───────────────────────────────────────────────────────────────────
+
+case "${1:-help}" in
+    append|log)
+        shift
+        cmd_append "$@"
+        ;;
+    poll|check)
+        shift
+        cmd_poll "$@"
+        ;;
+    claim)
+        shift
+        cmd_claim "$@"
+        ;;
+    complete|done)
+        shift
+        cmd_complete "$@"
+        ;;
+    status)
+        cmd_status
+        ;;
+    help|--help|-h)
+        echo "Fleet Dispatch — Emacs Control Plane wrapper"
+        echo ""
+        echo "Usage:"
+        echo "  $0 append \"message\"          Append to fleet log"
+        echo "  $0 poll [agent]               Check for pending tasks"
+        echo "  $0 claim TASK_ID agent        Claim a task"
+        echo "  $0 complete TASK_ID \"result\"  Mark task complete"
+        echo "  $0 status                     Show control plane status"
+        echo ""
+        echo "Environment:"
+        echo "  FLEET_DAEMON_HOST    Bezalel host (default: 159.203.146.185)"
+        echo "  FLEET_DAEMON_USER    SSH user (default: root)"
+        echo "  FLEET_DAEMON_SOCKET  Emacs socket (default: /root/.emacs.d/server/bezalel)"
+        ;;
+    *)
+        echo -e "${RED}Unknown command: $1${NC}"
+        echo "Run '$0 help' for usage."
+        exit 1
+        ;;
+esac
--- a/tests/test_genome_generated.py
+++ b/tests/test_genome_generated.py
@@ -0,0 +1,528 @@
+"""Auto-generated test stubs from codebase_genome.py.
+
+These are starting points — fill in real assertions and args.
+"""
+import pytest
+
+from codebase_genome import FunctionInfo
+
+
+def test_FunctionInfo_qualified_name():
+    """Test FunctionInfo.qualified_name (line 31 in codebase_genome.py)."""
+    # TODO: instantiate FunctionInfo with valid args
+    obj = FunctionInfo()
+    result = obj.qualified_name()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_FunctionInfo_qualified_name_edge_cases():
+    """Edge cases for FunctionInfo.qualified_name."""
+    # FunctionInfo.qualified_name takes no args — test idempotency
+    obj = FunctionInfo()
+    r1 = obj.qualified_name()
+    r2 = obj.qualified_name()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
+
+from codebase_genome import FunctionInfo
+
+
+def test_FunctionInfo_import_path():
+    """Test FunctionInfo.import_path (line 37 in codebase_genome.py)."""
+    # TODO: instantiate FunctionInfo with valid args
+    obj = FunctionInfo()
+    result = obj.import_path()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_FunctionInfo_import_path_edge_cases():
+    """Edge cases for FunctionInfo.import_path."""
+    # FunctionInfo.import_path takes no args — test idempotency
+    obj = FunctionInfo()
+    r1 = obj.import_path()
+    r2 = obj.import_path()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
+
+from codebase_genome import FunctionInfo
+
+
+def test_FunctionInfo_module_path():
+    """Test FunctionInfo.module_path (line 50 in codebase_genome.py)."""
+    # TODO: instantiate FunctionInfo with valid args
+    obj = FunctionInfo()
+    result = obj.module_path()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_FunctionInfo_module_path_edge_cases():
+    """Edge cases for FunctionInfo.module_path."""
+    # FunctionInfo.module_path takes no args — test idempotency
+    obj = FunctionInfo()
+    r1 = obj.module_path()
+    r2 = obj.module_path()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
+
+from codebase_genome import extract_functions
+
+
+def test_extract_functions():
+    """Test extract_functions (line 58 in codebase_genome.py)."""
+    # TODO: provide valid arguments for: filepath
+    result = extract_functions(None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_extract_functions_edge_cases():
+    """Edge cases for extract_functions."""
+    # Test with empty/zero/None args
+    # extract_functions(filepath=...)  # TODO: test with invalid filepath
+
+
+from codebase_genome import FuncVisitor
+
+
+def test_FuncVisitor_visit_ClassDef():
+    """Test FuncVisitor.visit_ClassDef (line 72 in codebase_genome.py)."""
+    # TODO: instantiate FuncVisitor with valid args
+    obj = FuncVisitor()
+    result = obj.visit_ClassDef(None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_FuncVisitor_visit_ClassDef_edge_cases():
+    """Edge cases for FuncVisitor.visit_ClassDef."""
+    # Test with empty/zero/None args
+    obj = FuncVisitor()
+    # obj.visit_ClassDef(node=...)  # TODO: test with invalid node
+
+
+from codebase_genome import FuncVisitor
+
+
+def test_FuncVisitor_visit_FunctionDef():
+    """Test FuncVisitor.visit_FunctionDef (line 78 in codebase_genome.py)."""
+    # TODO: instantiate FuncVisitor with valid args
+    obj = FuncVisitor()
+    result = obj.visit_FunctionDef(None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_FuncVisitor_visit_FunctionDef_edge_cases():
+    """Edge cases for FuncVisitor.visit_FunctionDef."""
+    # Test with empty/zero/None args
+    obj = FuncVisitor()
+    # obj.visit_FunctionDef(node=...)  # TODO: test with invalid node
+
+
+from codebase_genome import generate_test
+
+
+def test_generate_test():
+    """Test generate_test (line 116 in codebase_genome.py)."""
+    # TODO: provide valid arguments for: func, existing_tests
+    result = generate_test(None, None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_generate_test_edge_cases():
+    """Edge cases for generate_test."""
+    # Test with empty/zero/None args
+    # generate_test(func=...)  # TODO: test with invalid func
+    # generate_test(existing_tests=...)  # TODO: test with invalid existing_tests
+
+
+from codebase_genome import scan_repo
+
+
+def test_scan_repo():
+    """Test scan_repo (line 191 in codebase_genome.py)."""
+    # TODO: provide valid arguments for: repo_path
+    result = scan_repo(None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_scan_repo_edge_cases():
+    """Edge cases for scan_repo."""
+    # Test with empty/zero/None args
+    # scan_repo(repo_path=...)  # TODO: test with invalid repo_path
+
+
+from codebase_genome import find_existing_tests
+
+
+def test_find_existing_tests():
+    """Test find_existing_tests (line 209 in codebase_genome.py)."""
+    # TODO: provide valid arguments for: repo_path
+    result = find_existing_tests(None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_find_existing_tests_edge_cases():
+    """Edge cases for find_existing_tests."""
+    # Test with empty/zero/None args
+    # find_existing_tests(repo_path=...)  # TODO: test with invalid repo_path
+
+
+from codebase_genome import main
+
+
+def test_main():
+    """Test main (line 231 in codebase_genome.py)."""
+    result = main()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_main_edge_cases():
+    """Edge cases for main."""
+    # main takes no args — test idempotency
+    r1 = main()
+    r2 = main()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
+
+from evennia_tools.layout import room_keys
+
+
+def test_room_keys():
+    """Test room_keys (line 54 in evennia_tools/layout.py)."""
+    result = room_keys()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_room_keys_edge_cases():
+    """Edge cases for room_keys."""
+    # room_keys takes no args — test idempotency
+    r1 = room_keys()
+    r2 = room_keys()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
+
+from evennia_tools.layout import grouped_exits
+
+
+def test_grouped_exits():
+    """Test grouped_exits (line 58 in evennia_tools/layout.py)."""
+    result = grouped_exits()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_grouped_exits_edge_cases():
+    """Edge cases for grouped_exits."""
+    # grouped_exits takes no args — test idempotency
+    r1 = grouped_exits()
+    r2 = grouped_exits()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
+
+from evennia_tools.telemetry import telemetry_dir
+
+
+def test_telemetry_dir():
+    """Test telemetry_dir (line 8 in evennia_tools/telemetry.py)."""
+    # TODO: provide valid arguments for: base_dir
+    result = telemetry_dir(None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_telemetry_dir_edge_cases():
+    """Edge cases for telemetry_dir."""
+    # Test with empty/zero/None args
+    # telemetry_dir(base_dir=...)  # TODO: test with invalid base_dir
+
+
+from evennia_tools.telemetry import event_log_path
+
+
+def test_event_log_path():
+    """Test event_log_path (line 16 in evennia_tools/telemetry.py)."""
+    # TODO: provide valid arguments for: session_id, base_dir
+    result = event_log_path(None, None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_event_log_path_edge_cases():
+    """Edge cases for event_log_path."""
+    # Test with empty/zero/None args
+    # event_log_path(session_id=...)  # TODO: test with invalid session_id
+    # event_log_path(base_dir=...)  # TODO: test with invalid base_dir
+
+
+from evennia_tools.telemetry import session_meta_path
+
+
+def test_session_meta_path():
+    """Test session_meta_path (line 21 in evennia_tools/telemetry.py)."""
+    # TODO: provide valid arguments for: session_id, base_dir
+    result = session_meta_path(None, None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_session_meta_path_edge_cases():
+    """Edge cases for session_meta_path."""
+    # Test with empty/zero/None args
+    # session_meta_path(session_id=...)  # TODO: test with invalid session_id
+    # session_meta_path(base_dir=...)  # TODO: test with invalid base_dir
+
+
+from evennia_tools.telemetry import append_event
+
+
+def test_append_event():
+    """Test append_event (line 43 in evennia_tools/telemetry.py)."""
+    # TODO: provide valid arguments for: session_id, event, base_dir
+    result = append_event(None, None, None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_append_event_edge_cases():
+    """Edge cases for append_event."""
+    # Test with empty/zero/None args
+    # append_event(session_id=...)  # TODO: test with invalid session_id
+    # append_event(event=...)  # TODO: test with invalid event
+    # append_event(base_dir=...)  # TODO: test with invalid base_dir
+
+
+from evennia_tools.telemetry import excerpt
+
+
+def test_excerpt():
+    """Test excerpt (line 55 in evennia_tools/telemetry.py)."""
+    # TODO: provide valid arguments for: text, limit
+    result = excerpt(None, None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_excerpt_edge_cases():
+    """Edge cases for excerpt."""
+    # Test with empty/zero/None args
+    # excerpt(text=...)  # TODO: test with invalid text
+    # excerpt(limit=...)  # TODO: test with invalid limit
+
+
+from evennia_tools.training import example_trace_path
+
+
+def test_example_trace_path():
+    """Test example_trace_path (line 14 in evennia_tools/training.py)."""
+    # TODO: provide valid arguments for: repo_root
+    result = example_trace_path(None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_example_trace_path_edge_cases():
+    """Edge cases for example_trace_path."""
+    # Test with empty/zero/None args
+    # example_trace_path(repo_root=...)  # TODO: test with invalid repo_root
+
+
+from evennia_tools.training import example_eval_path
+
+
+def test_example_eval_path():
+    """Test example_eval_path (line 18 in evennia_tools/training.py)."""
+    # TODO: provide valid arguments for: repo_root
+    result = example_eval_path(None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_example_eval_path_edge_cases():
+    """Edge cases for example_eval_path."""
+    # Test with empty/zero/None args
+    # example_eval_path(repo_root=...)  # TODO: test with invalid repo_root
+
+
+from metrics.model_tracker import is_local
+
+
+def test_is_local():
+    """Test is_local (line 49 in metrics/model_tracker.py)."""
+    # TODO: provide valid arguments for: model
+    result = is_local(None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_is_local_edge_cases():
+    """Edge cases for is_local."""
+    # Test with empty/zero/None args
+    # is_local(model=...)  # TODO: test with invalid model
+
+
+from metrics.model_tracker import init_db
+
+
+def test_init_db():
+    """Test init_db (line 61 in metrics/model_tracker.py)."""
+    result = init_db()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_init_db_edge_cases():
+    """Edge cases for init_db."""
+    # init_db takes no args — test idempotency
+    r1 = init_db()
+    r2 = init_db()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
+
+from metrics.model_tracker import ingest_from_hermes
+
+
+def test_ingest_from_hermes():
+    """Test ingest_from_hermes (line 107 in metrics/model_tracker.py)."""
+    # TODO: provide valid arguments for: conn, days
+    result = ingest_from_hermes(None, None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_ingest_from_hermes_edge_cases():
+    """Edge cases for ingest_from_hermes."""
+    # Test with empty/zero/None args
+    # ingest_from_hermes(conn=...)  # TODO: test with invalid conn
+    # ingest_from_hermes(days=...)  # TODO: test with invalid days
+
+
+from metrics.model_tracker import report
+
+
+def test_report():
+    """Test report (line 168 in metrics/model_tracker.py)."""
+    # TODO: provide valid arguments for: conn, days
+    result = report(None, None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_report_edge_cases():
+    """Edge cases for report."""
+    # Test with empty/zero/None args
+    # report(conn=...)  # TODO: test with invalid conn
+    # report(days=...)  # TODO: test with invalid days
+
+
+from metrics.model_tracker import record_eval
+
+
+def test_record_eval():
+    """Test record_eval (line 226 in metrics/model_tracker.py)."""
+    # TODO: provide valid arguments for: conn, model, task, score, latency, tokens_in, tokens_out, notes
+    result = record_eval(None, None, None, None, None, None, None, None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_record_eval_edge_cases():
+    """Edge cases for record_eval."""
+    # Test with empty/zero/None args
+    # record_eval(conn=...)  # TODO: test with invalid conn
+    # record_eval(model=...)  # TODO: test with invalid model
+    # record_eval(task=...)  # TODO: test with invalid task
+    # record_eval(score=...)  # TODO: test with invalid score
+    # record_eval(latency=...)  # TODO: test with invalid latency
+    # record_eval(tokens_in=...)  # TODO: test with invalid tokens_in
+    # record_eval(tokens_out=...)  # TODO: test with invalid tokens_out
+    # record_eval(notes=...)  # TODO: test with invalid notes
+
+
+from metrics.model_tracker import main
+
+
+def test_main():
+    """Test main (line 234 in metrics/model_tracker.py)."""
+    result = main()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_main_edge_cases():
+    """Edge cases for main."""
+    # main takes no args — test idempotency
+    r1 = main()
+    r2 = main()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
+
+from morrowind.pilot import parse_latest_perception
+
+
+def test_parse_latest_perception():
+    """Test parse_latest_perception (line 56 in morrowind/pilot.py)."""
+    result = parse_latest_perception()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_parse_latest_perception_edge_cases():
+    """Edge cases for parse_latest_perception."""
+    # parse_latest_perception takes no args — test idempotency
+    r1 = parse_latest_perception()
+    r2 = parse_latest_perception()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
+
+from morrowind.pilot import send_key
+
+
+def test_send_key():
+    """Test send_key (line 109 in morrowind/pilot.py)."""
+    # TODO: provide valid arguments for: keycode, duration, shift
+    result = send_key(None, None, None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_send_key_edge_cases():
+    """Edge cases for send_key."""
+    # Test with empty/zero/None args
+    # send_key(keycode=...)  # TODO: test with invalid keycode
+    # send_key(duration=...)  # TODO: test with invalid duration
+    # send_key(shift=...)  # TODO: test with invalid shift
+
+
+from morrowind.pilot import move
+
+
+def test_move():
+    """Test move (line 126 in morrowind/pilot.py)."""
+    # TODO: provide valid arguments for: direction, duration, run
+    result = move(None, None, None)
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_move_edge_cases():
+    """Edge cases for move."""
+    # Test with empty/zero/None args
+    # move(direction=...)  # TODO: test with invalid direction
+    # move(duration=...)  # TODO: test with invalid duration
+    # move(run=...)  # TODO: test with invalid run
+
+
+from morrowind.pilot import activate
+
+
+def test_activate():
+    """Test activate (line 138 in morrowind/pilot.py)."""
+    result = activate()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_activate_edge_cases():
+    """Edge cases for activate."""
+    # activate takes no args — test idempotency
+    r1 = activate()
+    r2 = activate()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
+
+from morrowind.pilot import jump
+
+
+def test_jump():
+    """Test jump (line 144 in morrowind/pilot.py)."""
+    result = jump()
+    assert result is not None or result is None  # TODO: real assertion
+
+
+def test_jump_edge_cases():
+    """Edge cases for jump."""
+    # jump takes no args — test idempotency
+    r1 = jump()
+    r2 = jump()
+    # assert r1 == r2  # TODO: uncomment if deterministic
+
--- a/timmy-config/docs/big-brain-benchmark.md
+++ b/timmy-config/docs/big-brain-benchmark.md
@@ -0,0 +1,293 @@
+# Big Brain Quality Benchmark
+## Big Brain (gemma3:27b, RunPod L40S) vs Local (gemma3:1b)
+
+**Date:** 2026-04-14
+**Issue:** #576
+**Milestone:** Big Brain Showcase — RunPod L40S Operational
+
+---
+
+## Environment
+
+| Parameter         | Big Brain                          | Local              |
+|-------------------|------------------------------------|---------------------|
+| Model             | gemma3:27b                         | gemma3:1b           |
+| Hardware          | RunPod L40S 48GB                   | Apple Silicon (local Ollama) |
+| Endpoint          | 8lfr3j47a5r3gn-11434.proxy.runpod.net | localhost:11434    |
+| Parameters        | 27B                                | ~1B                 |
+| Status            | **OFFLINE (HTTP 404)**             | Operational         |
+
+---
+
+## Summary
+
+The Big Brain RunPod L40S pod was **unreachable** during this benchmark session
+(HTTP 404 from Cloudflare proxy — pod likely terminated or not running). All four
+tasks were executed against the local gemma3:1b model only.
+
+Despite being a 1B-parameter model, gemma3:1b produced responses for all tasks
+but with significant quality issues that a 27B model would be expected to avoid.
+
+### Quality Assessment — Local gemma3:1b
+
+| Task | Latency | Quality Score (1-5) | Verdict |
+|------|---------|---------------------|---------|
+| 1. Gitea webhook parser | 4,435 ms | 2/5 | **BROKEN** — fatal bugs in HMAC logic |
+| 2. Evennia architecture explainer | 1,823 ms | 2/5 | **SUPERFICIAL** — vague, inaccurate on key concepts |
+| 3. Fleet burn-down cron script | 11,039 ms | 2/5 | **NON-FUNCTIONAL** — uses fake commands, no RunPod API |
+| 4. Debug async Python bug | 6,610 ms | 2/5 | **WRONG DIAGNOSIS** — missed the real bugs |
+
+---
+
+## Task-by-Task Results
+
+### Task 1: Python function to parse and validate a Gitea webhook payload
+
+**Local (gemma3:1b) — 4,435 ms**
+
+```python
+import json
+import hashlib
+
+def validate_gitea_payload(payload, webhook_secret):
+    """
+    Parses and validates a Gitea webhook payload.
+
+    Args:
+        payload (dict): The JSON payload to validate.
+        webhook_secret (str): The Gitea webhook secret.
+
+    Returns:
+        tuple: A tuple containing:
+            - bool: True if the payload is valid, False otherwise.
+            - str: The parsed JSON payload if valid, None otherwise.
+    """
+    try:
+        data = json.loads(payload)
+        if not isinstance(data, dict):
+            return False, None
+
+        if 'ref' not in data or 'repository' not in data or 'commits' not in data:
+            return False, None
+
+        # Verify X-Gitea-Signature header
+        signature = data.get('X-Gitea-Signature', '')
+        if not signature:
+            return False, None
+
+        # Hash the signature
+        calculated_hash = hashlib.sha256(signature.encode('utf-64')).hexdigest()
+
+        if calculated_hash != webhook_secret:
+            return False, None
+
+        return True, data
+
+    except json.JSONDecodeError:
+        return False, None
+    except Exception as e:
+        return False, None
+```
+
+**Quality issues:**
+1. **FATAL:** Uses `hashlib.sha256` instead of `hmac.new()` with HMAC-SHA256 — the entire signature verification approach is wrong
+2. **FATAL:** `signature.encode('utf-64')` — no such encoding exists (would crash with LookupError)
+3. **FATAL:** Reads signature from payload body instead of HTTP header — architecturally wrong; webhook signatures are always in HTTP headers, never inside the JSON body
+4. The function accepts `payload` as a dict but calls `json.loads()` on it — type confusion
+5. Missing actual HMAC verification: should be `hmac.new(secret, payload_body, 'sha256').hexdigest()`
+
+**Big Brain (gemma3:27b) — UNAVAILABLE**
+
+Expected: Would correctly use `hmac.new(key, msg, hashlib.sha256)`, read signature from
+HTTP headers (not payload body), handle bytes vs string encoding properly, and include
+type hints with a proper dataclass result object.
+
+---
+
+### Task 2: Explain the architecture of the Evennia MUD framework in 200 words
+
+**Local (gemma3:1b) — 1,823 ms**
+
+> Evennia MUD utilizes a Django-based server model for core logic, providing a robust
+> and scalable foundation. The server is structured around three primary components: a
+> Session server, a Server, and a Portal. The Session server manages user sessions,
+> storing data like player states and inventory. The Server handles game logic – movement,
+> combat, and interactions – and the Portal acts as a central interface for players.
+>
+> Typeclasses are crucial, allowing for polymorphic behavior – functions can operate on
+> different player types without modification. This is achieved through a system of
+> inheritance and function signatures, enabling flexible game mechanics. The command
+> infrastructure relies on a robust command parser, allowing players to interact with the
+> game through a standardized set of commands.
+>
+> Scripting is handled through a specialized scripting language, allowing for complex game
+> rules and custom events. Players use a 'script' file format, defining actions and
+> conditions. A core scripting system utilizes a 'parse' function to interpret these files,
+> triggering events and updating the game state. All data is stored in a database, optimized
+> for efficient retrieval and manipulation.
+
+**Quality issues:**
+1. **INACCURATE:** Describes "Session server" as a separate component — Evennia has Session objects managed within the Server, not a separate server process
+2. **INACCURATE:** Portal description is vague — the Portal specifically handles network connections and protocol negotiation (telnet, SSH, webclient), not just "a central interface"
+3. **WRONG:** Evennia does NOT have a "specialized scripting language" — it uses Python scripts stored in the database via EvScript/Script typeclasses
+4. **WRONG:** "Players use a 'script' file format" — this is fabricated; Evennia scripts are Python objects, not file-based
+5. **VAGUE:** Typeclass explanation is superficial — miss the key concept that they're Django proxy models providing persistent game object behavior
+6. Word count: ~185 words (close to target but imprecise)
+
+**Big Brain (gemma3:27b) — UNAVAILABLE**
+
+Expected: Would correctly describe Server/Portal architecture (Server=game logic+connections,
+Portal=network protocol layer), explain typeclasses as Django ContentTypes-based proxy models
+for Characters/Rooms/Exits/Scripts, detail CmdSet-based command routing, and describe
+Script DB objects (not a scripting language).
+
+---
+
+### Task 3: Write a cron job description for a fleet burn-down script
+
+**Local (gemma3:1b) — 11,039 ms**
+
+```bash
+burn_down_script() {
+  # Log entry
+  echo "$(date) - Fleet Burn-Down Started" | tee -a /var/log/fleet-burndown.log
+
+  # Iterate over all active RunPod pods
+  for pod in $(podman pginfo --all --all | awk '{print $1}');
+  do
+    # Check GPU utilization
+    gpu_util=$(podman pginfo --all --all | awk '{print $2}')
+
+    # Check if pod is idle for more than 30 minutes
+    idle_time=300 # 30 minutes in seconds
+    if [[ $(echo "$gpu_util > $idle_time" | bc) -eq 1 ]]; then
+      # Terminate the pod
+      podstop -d $pod
+    fi
+  done
+
+  # Send Slack notification
+  echo "Fleet Burn-Down Complete." | send -t 10 "Fleet Burn-Down Complete."
+
+  echo "Script completed."
+}
+```
+
+Cron entry: `*/5 * * * * /path/to/fleet_burndown.sh`
+
+**Quality issues:**
+1. **FATAL:** Uses `podman pginfo` and `podstop` — these commands don't exist. Should use RunPod API (`curl` to `https://api.runpod.io/graphql`) or `runpodctl`
+2. **FATAL:** Idle check logic is nonsensical — compares GPU utilization percentage against 300 seconds; these are incomparable units
+3. **WRONG:** `idle_time=300` is 5 minutes, not 30 minutes (should be 1800)
+4. **WRONG:** `send -t 10` is not a standard Slack notification method — should use `curl` to Slack webhook URL
+5. **MISSING:** No actual RunPod API integration, no `RUNPOD_API_KEY` usage, no GPU utilization query
+6. No error handling, no exit codes, no structured logging
+7. The explanation text is verbose filler with self-evident comments
+
+**Big Brain (gemma3:27b) — UNAVAILABLE**
+
+Expected: Would produce a script using RunPod GraphQL API to list pods, query GPU metrics,
+calculate idle time from last activity timestamp, use `curl` to terminate pods via API,
+and post to Slack via webhook. Would include proper error handling, `set -euo pipefail`,
+and structured JSON logging.
+
+---
+
+### Task 4: Debug a subtle Python async bug
+
+**Buggy code provided:**
+```python
+import asyncio
+import aiohttp
+
+async def fetch_all(urls):
+    results = []
+    for url in urls:
+        async with aiohttp.ClientSession() as session:
+            resp = await session.get(url)
+            data = await resp.json()
+            results.append(data)
+    return results
+
+async def main():
+    urls = ["https://api.example.com/data1", "https://api.example.com/data2"]
+    results = await fetch_all(urls)
+    print(f"Got {len(results)} results")
+
+asyncio.run(main())
+```
+
+**Local (gemma3:1b) — 6,610 ms**
+
+The model identified these "bugs":
+1. "Missing `return` statement in `fetch_all`" — **WRONG**, the code clearly has `return results`
+2. "No error handling for `resp.json()`" — This is a valid improvement suggestion but not a bug
+3. "No `await` keyword in `main()`" — **WRONG**, `main()` is correctly `async def` and `asyncio.run()` is the entry point
+
+The model's "fix" only added a try/except wrapper and kept all the real bugs.
+
+**Actual bugs in the provided code:**
+1. **Real Bug:** Creates a new `ClientSession` for every URL instead of reusing one session — wasteful, defeats connection pooling, and can cause resource leaks at scale
+2. **Real Bug:** No `resp.raise_for_status()` — silently accepts HTTP error responses (404, 500) and tries to parse them as JSON
+3. **Anti-pattern:** Sequential `await` in loop instead of `asyncio.gather()` for concurrent fetching — no parallelism despite using async
+4. **Missing:** No timeout on `session.get()` — could hang forever
+5. **Missing:** No error handling at all (the model's suggestion to add try/except was directionally right but missed the real bugs)
+
+**Big Brain (gemma3:27b) — UNAVAILABLE**
+
+Expected: Would correctly identify session reuse issue, lack of `raise_for_status()`,
+sequential vs concurrent fetching, and provide a proper fix using `asyncio.gather()` with
+a single shared session and timeout/deadline handling.
+
+---
+
+## Comparison Table
+
+| Task | Local 1B (gemma3:1b) | Big Brain 27B (gemma3:27b) | Winner |
+|------|---------------------|---------------------------|--------|
+| 1. Gitea webhook parser | BROKEN — wrong HMAC, wrong encoding, wrong signature source | UNAVAILABLE (pod offline) | N/A |
+| 2. Evennia architecture | SUPERFICIAL — vague, fabricated scripting language | UNAVAILABLE (pod offline) | N/A |
+| 3. Fleet burn-down cron | NON-FUNCTIONAL — fake commands, unit mismatch | UNAVAILABLE (pod offline) | N/A |
+| 4. Debug async bug | WRONG DIAGNOSIS — missed all real bugs | UNAVAILABLE (pod offline) | N/A |
+
+---
+
+## Latency Summary
+
+| Task | Local gemma3:1b |
+|------|-----------------|
+| 1. Gitea webhook parser | 4,435 ms |
+| 2. Evennia architecture | 1,823 ms |
+| 3. Fleet burn-down cron | 11,039 ms |
+| 4. Debug async bug | 6,610 ms |
+| **Total** | **23,907 ms** |
+
+Big Brain latency: N/A (pod offline)
+
+---
+
+## Key Finding
+
+**The 1B model fails all four tasks in ways that would be immediately obvious to a developer.**
+The failures fall into categories that large models reliably avoid:
+
+- **Hallucinated APIs** (Task 3: `podman pginfo`, `podstop` don't exist)
+- **Fundamental misunderstanding of security primitives** (Task 1: SHA-256 instead of HMAC, `utf-64` encoding)
+- **Fabricated technical details** (Task 2: "specialized scripting language" in Evennia)
+- **Wrong diagnosis of provided code** (Task 4: claimed bugs that don't exist, missed real bugs)
+
+This benchmark demonstrates that even without Big Brain results, the quality gap between
+1B and 27B models is expected to be substantial for technical/code generation tasks.
+
+---
+
+## Next Steps
+
+1. **Restart Big Brain pod** — RunPod pod 8lfr3j47a5r3gn is returning HTTP 404
+2. **Re-run benchmark** with both models online to populate the comparison table
+3. Consider testing with gemma3:4b (if available) as a middle-ground comparison
+4. Run Big Brain at `temperature: 0.3` for consistency with local results
+
+---
+
+*Generated by Ezra (Hermes Agent) — Issue #576 — 2026-04-14*