Merge pull request #22 from AlexanderWhitestone/claude/audit-timmy-dashboard-ft27r

2026-02-24 14:18:29 -05:00
parent 03ff505c4b 96c9f1b02f
commit 7018a756b3
9 changed files with 454 additions and 36 deletions
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@

 [![Tests](https://github.com/AlexanderWhitestone/Timmy-time-dashboard/actions/workflows/tests.yml/badge.svg)](https://github.com/AlexanderWhitestone/Timmy-time-dashboard/actions/workflows/tests.yml)

-A local-first, sovereign AI agent system.  Talk to Timmy, watch his swarm, gate API access with Bitcoin Lightning — all from a browser, no cloud required.
+A local-first, sovereign AI agent system.  Talk to Timmy, watch his swarm, gate API access with Bitcoin Lightning — all from a browser, no cloud AI required.

 **[Live Docs →](https://alexanderwhitestone.github.io/Timmy-time-dashboard/)**

@@ -15,7 +15,7 @@ A local-first, sovereign AI agent system.  Talk to Timmy, watch his swarm, gate
 | **Timmy Agent** | Agno-powered agent (Ollama default, AirLLM optional for 70B/405B) |
 | **Mission Control** | FastAPI + HTMX dashboard — chat, health, swarm, marketplace |
 | **Swarm** | Multi-agent coordinator — spawn agents, post tasks, run Lightning auctions |
-| **L402 / Lightning** | Bitcoin Lightning payment gating for API access |
+| **L402 / Lightning** | Bitcoin Lightning payment gating for API access (mock backend; LND scaffolded) |
 | **Spark Intelligence** | Event capture, predictions, memory consolidation, advisory engine |
 | **Creative Studio** | Multi-persona creative pipeline — image, music, video generation |
 | **Tools** | Git, image, music, and video tools accessible by persona agents |
@@ -25,7 +25,7 @@ A local-first, sovereign AI agent system.  Talk to Timmy, watch his swarm, gate
 | **Telegram** | Bridge Telegram messages to Timmy |
 | **CLI** | `timmy`, `timmy-serve`, `self-tdd` entry points |

-**600+ tests, 100% passing.**
+**Full test suite, 100% passing.**

 ---

@@ -161,7 +161,7 @@ cp .env.example .env
 | `AIRLLM_MODEL_SIZE` | `70b` | `8b` \| `70b` \| `405b` |
 | `L402_HMAC_SECRET` | *(default — change in prod)* | HMAC signing key for macaroons |
 | `L402_MACAROON_SECRET` | *(default — change in prod)* | Macaroon secret |
-| `LIGHTNING_BACKEND` | `mock` | `mock` \| `lnd` |
+| `LIGHTNING_BACKEND` | `mock` | `mock` (production-ready) \| `lnd` (scaffolded, not yet functional) |

 ---

@@ -217,7 +217,7 @@ src/
  shortcuts/          # Siri Shortcuts endpoints
  telegram_bot/       # Telegram bridge
  self_tdd/           # Continuous test watchdog
-tests/                # 600+ tests — one file per module, all mocked
+tests/                # one test file per module, all mocked
 static/style.css      # Dark mission-control theme (JetBrains Mono)
 docs/                 # GitHub Pages landing page
 AGENTS.md             # AI agent development standards ← read this
--- a/docs/AUDIT_REPORT.md
+++ b/docs/AUDIT_REPORT.md
@@ -0,0 +1,352 @@
+# Timmy Time Dashboard - Feature Audit Report
+
+**Date**: 2026-02-24
+**Auditor**: Claude (Opus 4.6)
+**Scope**: All features claimed in documentation (`docs/index.html`, `README.md`) vs. actual implementation
+
+---
+
+## Executive Summary
+
+The Timmy Time Dashboard is a **real, functional codebase** with substantial implementation across its 15+ subsystems. However, the documentation contains several **misleading or inaccurate claims** that overstate readiness in some areas and understate capability in others.
+
+### Key Findings
+
+| Claim | Verdict | Detail |
+|-------|---------|--------|
+| "600+ Tests Passing" | **UNDERSTATED** | 643 tests collected and passing |
+| "20+ API Endpoints" | **UNDERSTATED** | 58 actual endpoints |
+| "0 Cloud Calls" | **FALSE** | Frontend loads Bootstrap, HTMX, Google Fonts from CDN |
+| "LND gRPC-ready for production" | **FALSE** | Every LND method raises `NotImplementedError` |
+| "15 Subsystems" | **TRUE** | 15+ distinct modules confirmed |
+| "No cloud, no telemetry" | **PARTIALLY FALSE** | Backend is local-only; frontend depends on CDN resources |
+| "Agents earn and spend sats autonomously" | **FALSE** | Not implemented; agents bid in sats but no satoshi movement occurs |
+| "15-second Lightning auctions" | **PARTIALLY TRUE** | Auction logic exists but `asyncio.sleep(0)` closes auctions immediately |
+| "Macaroon" implementation | **SIMPLIFIED** | HMAC-only, not true macaroons (no caveats, no delegation) |
+
+**Overall assessment**: The core system (agent, dashboard, swarm coordination, mock Lightning, voice NLU, creative pipeline orchestration, WebSocket, Spark intelligence) is genuinely implemented and well-tested. The main areas of concern are inflated claims about Lightning/LND production readiness and the "zero cloud" positioning.
+
+---
+
+## 1. Test Suite Audit
+
+### Claim: "600+ Tests Passing"
+
+**Verdict: TRUE (understated)**
+
+```
+$ python -m pytest -q
+643 passed, 1 warning in 46.06s
+```
+
+- **47 test files**, **643 test functions**
+- All pass cleanly on Python 3.11
+- Tests are mocked at appropriate boundaries (no Ollama/GPU required)
+- Test quality is generally good - tests verify real state transitions, SQLite persistence, HTTP response structure, and business logic
+
+### Test Quality Assessment
+
+**Strengths:**
+- Swarm tests use real temporary SQLite databases (not mocked away)
+- L402/Lightning tests verify cryptographic operations (macaroon serialization, HMAC signing, preimage verification)
+- Dashboard tests use FastAPI `TestClient` with actual HTTP requests
+- Assembler tests produce real video files with MoviePy
+
+**Weaknesses:**
+- LND backend is entirely untested (all methods raise `NotImplementedError`)
+- `agent_core/ollama_adapter.py` has two TODO stubs (`persist_memory`, `communicate`) that are tested as no-ops
+- Creative tool tests mock the heavyweight model loading (expected, but means end-to-end generation is untested)
+- Some tests only verify status codes without checking response body content
+
+---
+
+## 2. Feature-by-Feature Audit
+
+### 2.1 Timmy Agent
+**Claimed**: Agno-powered conversational agent backed by Ollama, AirLLM for 70B-405B models, SQLite memory
+**Verdict: REAL & FUNCTIONAL**
+
+- `src/timmy/agent.py` (79 lines): Creates a genuine `agno.Agent` with Ollama model, SQLite persistence, tools, and system prompt
+- Backend selection (`backends.py`) implements real Ollama/AirLLM switching with Apple Silicon detection
+- CLI (`cli.py`) provides working `timmy chat`, `timmy think`, `timmy status` commands
+- Approval workflow (`approvals.py`) implements real human-in-the-loop with SQLite-backed state
+- Briefing system (`briefing.py`) generates real scheduled briefings
+
+**Issues**:
+- `agent_core/ollama_adapter.py:184` has `# TODO: Persist to SQLite for long-term memory` and `communicate()` at line 221 is explicitly described as "a stub"
+- CLI tests are sparse: only 2 tests for 3 commands. The `chat` and `think` commands lack dedicated test coverage.
+
+### 2.2 Mission Control UI
+**Claimed**: FastAPI + HTMX + Jinja2 dashboard, dark terminal aesthetic
+**Verdict: REAL & FUNCTIONAL**
+
+- **58 actual endpoints** (documentation claims "20+")
+- Full Jinja2 template hierarchy with base layout + 12 page templates + 12 partials
+- Real HTMX integration for dynamic updates
+- Bootstrap 5 loaded from CDN (contradicts "no cloud" claim)
+- Dark theme with JetBrains Mono font (loaded from Google Fonts CDN)
+
+### 2.3 Multi-Agent Swarm
+**Claimed**: Coordinator, registry, bidder, manager, sub-agent spawning, 15-second Lightning auctions
+**Verdict: REAL & FUNCTIONAL**
+
+- `coordinator.py` (400+ lines): Full orchestration of task lifecycle
+- `registry.py`: Real SQLite-backed agent registry with capabilities tracking
+- `bidder.py`: Genuine auction logic with configurable timeouts and bid scoring
+- `manager.py`: Spawns agents as subprocesses with lifecycle management
+- `tasks.py`: SQLite-backed task CRUD with state machine transitions
+- `comms.py`: In-memory pub/sub (Redis optional, graceful fallback)
+- `routing.py`: Capability-based task routing
+- `learner.py`: Agent outcome learning
+- `recovery.py`: Fault recovery on startup
+- 9 personas defined (Echo, Mace, Helm, Seer, Forge, Quill, Pixel, Lyra, Reel)
+
+**Issues**:
+- The documentation roadmap mentions personas "Echo, Mace, Helm, Seer, Forge, Quill" but the codebase also includes Pixel, Lyra, and Reel. The creative persona toolkits (pixel, lyra, reel) are stubs in `tools.py:293-295` — they create empty `Toolkit` objects because the real tools live in separate modules.
+- **Auction timing bug**: `coordinator.py` uses `await asyncio.sleep(0)` instead of the documented 15-second wait, meaning auctions close almost immediately. This is masked by synchronous in-process bidding but would break for subprocess/Docker agents.
+- **Docker agent HTTP API partially wired**: `agent_runner.py` polls `/internal/tasks` and posts to `/internal/bids` — these endpoints exist in `swarm_internal.py` but the integration path is incomplete for containerized deployment.
+- **Tool execution not fully wired**: `persona_node.py`'s `execute_task()` has infrastructure for tool invocation but doesn't execute tools end-to-end in practice.
+
+### 2.4 L402 Lightning Payments
+**Claimed**: "Bitcoin Lightning payment gating via HMAC macaroons. Mock backend for dev, LND gRPC-ready for production. Agents earn and spend sats autonomously."
+**Verdict: PARTIALLY IMPLEMENTED - LND CLAIM IS FALSE**
+
+**What works:**
+- Mock Lightning backend (`mock_backend.py`): Fully functional invoice creation, payment simulation, settlement, balance tracking
+- L402 proxy (`l402_proxy.py`): Real macaroon creation/verification with HMAC signing
+- Payment handler (`payment_handler.py`): Complete invoice lifecycle management
+- Inter-agent payment settlement (`inter_agent.py`): Framework exists with mock backend
+
+**What does NOT work:**
+- **LND backend (`lnd_backend.py`)**: Every single method raises `NotImplementedError` or returns hardcoded fallback values:
+  - `create_invoice()` — `raise NotImplementedError` (line 199)
+  - `check_payment()` — `raise NotImplementedError` (line 220)
+  - `get_invoice()` — `raise NotImplementedError` (line 248)
+  - `list_invoices()` — `raise NotImplementedError` (line 290)
+  - `get_balance_sats()` — `return 0` with warning (line 304)
+  - `health_check()` — returns `{"ok": False, "backend": "lnd-stub"}` (line 327)
+  - The gRPC stub is explicitly `None` with comment: "LND gRPC stubs not yet implemented" (line 153)
+
+**The documentation claim that LND is "gRPC-ready for production" is false.** The file contains commented-out pseudocode showing what the implementation *would* look like, but no actual gRPC calls are made. The gRPC channel/auth infrastructure is ~80% ready but the protobuf stubs are missing entirely. The claim that "agents earn and spend sats autonomously" is also unimplemented — agents bid in sats during auctions but `payment_handler.settle_invoice()` is never called from agent code. No satoshi movement occurs. This is listed under v3.0.0 (Planned) in the roadmap but stated as current capability in the features section.
+
+Additionally, the "macaroon" implementation is HMAC-only (`l402_proxy.py:67-69`), not true macaroons. There is no support for caveats, delegation, or cryptographic nesting. This is adequate for L402 but not the full macaroon specification the documentation implies.
+
+### 2.5 Spark Intelligence Engine
+**Claimed**: Event capture, predictions (EIDOS), memory consolidation, advisory engine
+**Verdict: REAL & FUNCTIONAL**
+
+- `engine.py`: Full event lifecycle with 8 event types, SQLite persistence
+- `eidos.py`: Genuine prediction logic with multi-component accuracy scoring (winner prediction 0.4 weight, success probability 0.4 weight, bid range 0.2 weight)
+- `memory.py`: Real event-to-memory pipeline with importance scoring and consolidation
+- `advisor.py`: Generates actionable recommendations based on failure patterns, agent performance, and bid optimization
+- Dashboard routes expose `/spark`, `/spark/ui`, `/spark/timeline`, `/spark/insights`
+
+### 2.6 Creative Studio
+**Claimed**: Multi-persona creative pipeline for image, music, video generation
+**Verdict: REAL ORCHESTRATION, BACKEND MODELS OPTIONAL**
+
+- `director.py`: True end-to-end pipeline (storyboard -> music -> video -> assembly -> complete)
+- `assembler.py`: Real video assembly using MoviePy with cross-fade transitions, audio overlay, title cards, subtitles
+- `image_tools.py`: FLUX.1 diffusers pipeline (lazy-loaded)
+- `music_tools.py`: ACE-Step model integration (lazy-loaded)
+- `video_tools.py`: Wan 2.1 text-to-video pipeline (lazy-loaded)
+
+The orchestration is 100% real. Tool backends are implemented with real model loading logic but require heavyweight dependencies (GPU, model downloads). Graceful degradation if missing.
+
+### 2.7 Voice I/O
+**Claimed**: Pattern-matched NLU, TTS via pyttsx3
+**Verdict: REAL & FUNCTIONAL**
+
+- `nlu.py`: Regex-based intent detection with 5 intent types and confidence scoring
+- Entity extraction for agent names, task descriptions, numbers
+- TTS endpoint exists at `/voice/tts/speak`
+- Enhanced voice processing at `/voice/enhanced/process`
+
+### 2.8 Mobile Optimized
+**Claimed**: iOS safe-area, 44px touch targets, 16px inputs, 21-scenario HITL test harness
+**Verdict: REAL & FUNCTIONAL**
+
+- `mobile.html` template with iOS viewport-fit, safe-area insets
+- 21-scenario test harness at `/mobile-test`
+- `test_mobile_scenarios.py`: 36 tests covering mobile-specific behavior
+
+### 2.9 WebSocket Live Feed
+**Claimed**: Real-time swarm events over WebSocket
+**Verdict: REAL & FUNCTIONAL**
+
+- `websocket/handler.py`: Connection manager with broadcast, 100-event replay buffer
+- Specialized broadcast methods for agent_joined, task_posted, bid_submitted, task_assigned, task_completed
+- `/ws/swarm` endpoint for live WebSocket connections
+
+### 2.10 Security
+**Claimed**: XSS prevention via textContent, HMAC-signed macaroons, startup warnings for defaults
+**Verdict: REAL & FUNCTIONAL**
+
+- HMAC macaroon signing is cryptographically implemented
+- Config warns on default secrets at startup
+- Templates use Jinja2 autoescaping
+
+### 2.11 Self-TDD Watchdog
+**Claimed**: 60-second polling, regression alerts
+**Verdict: REAL & FUNCTIONAL**
+
+- `self_tdd/watchdog.py` (71 lines): Polls pytest and alerts on failures
+- `activate_self_tdd.sh`: Bootstrap script
+
+### 2.12 Telegram Integration
+**Claimed**: Bridge Telegram messages to Timmy
+**Verdict: REAL & FUNCTIONAL**
+
+- `telegram_bot/bot.py`: python-telegram-bot integration
+- Message handler creates Timmy agent and processes user text
+- Token management with file persistence
+- Dashboard routes at `/telegram/status` and `/telegram/setup`
+
+### 2.13 Siri Shortcuts
+**Claimed**: iOS automation endpoints
+**Verdict: REAL & FUNCTIONAL**
+
+- `shortcuts/siri.py`: 4 endpoint definitions (chat, status, swarm, task)
+- Setup guide generation for iOS Shortcuts app
+
+### 2.14 Push Notifications
+**Claimed**: Local + macOS native notifications
+**Verdict: REAL & FUNCTIONAL**
+
+- `notifications/push.py`: Bounded notification store, listener callbacks
+- macOS native notifications via osascript
+- Read/unread state management
+
+---
+
+## 3. Documentation Accuracy Issues
+
+### 3.1 FALSE: "0 Cloud Calls"
+
+The hero section, stats bar, and feature descriptions all claim zero cloud dependency. However, `src/dashboard/templates/base.html` loads:
+
+| Resource | CDN |
+|----------|-----|
+| Bootstrap 5.3.3 CSS | `cdn.jsdelivr.net` |
+| Bootstrap 5.3.3 JS | `cdn.jsdelivr.net` |
+| HTMX 2.0.3 | `unpkg.com` |
+| JetBrains Mono font | `fonts.googleapis.com` |
+
+These are loaded on every page render. The dashboard will not render correctly without internet access unless these are bundled locally.
+
+**Recommendation**: Bundle these assets locally or change the documentation to say "no cloud AI/telemetry" instead of "0 Cloud Calls."
+
+### 3.2 FALSE: "LND gRPC-ready for production"
+
+The documentation (both `docs/index.html` and `README.md`) implies the LND backend is production-ready. In reality:
+
+- Every method in `lnd_backend.py` raises `NotImplementedError`
+- The gRPC stub initialization explicitly returns `None` with a warning
+- The code contains only commented-out pseudocode
+- The file itself contains a `generate_lnd_protos()` function explaining what steps are needed to *begin* implementation
+
+**Recommendation**: Change documentation to "LND integration planned" or "LND backend scaffolded — mock only for now."
+
+### 3.3 FALSE: "Agents earn and spend sats autonomously"
+
+This capability is described in the v3.0.0 (Planned) roadmap section but is also implied as current functionality in the L402 features card. The inter-agent payment system (`inter_agent.py`) exists but only works with the mock backend.
+
+### 3.4 UNDERSTATED: Test Count and Endpoint Count
+
+- Documentation says "600+ tests" — actual count is **643**
+- Documentation says "20+ API endpoints" — actual count is **58**
+
+These are technically true ("600+" and "20+" include the real numbers) but are misleadingly conservative.
+
+### 3.5 MINOR: "Bootstrap 5" not mentioned in docs/index.html
+
+The GitHub Pages documentation feature card for Mission Control says "FastAPI + HTMX + Bootstrap 5" in its tag line, which is accurate. But the "no cloud" messaging directly contradicts loading Bootstrap from a CDN.
+
+---
+
+## 4. Code Quality Summary
+
+| Module | Lines | Quality | Notes |
+|--------|-------|---------|-------|
+| swarm | 3,069 | Good | Comprehensive coordination with SQLite persistence |
+| dashboard | 1,806 | Good | Clean FastAPI routes, well-structured templates |
+| timmy | 1,353 | Good | Clean agent setup with proper backend abstraction |
+| spark | 1,238 | Excellent | Sophisticated intelligence pipeline |
+| tools | 869 | Good | Real implementations with lazy-loading pattern |
+| lightning | 868 | Mixed | Mock is excellent; LND is entirely unimplemented |
+| timmy_serve | 693 | Good | L402 proxy works with mock backend |
+| creative | 683 | Good | Real orchestration pipeline |
+| agent_core | 627 | Mixed | Some TODO stubs (persist_memory, communicate) |
+| telegram_bot | 163 | Good | Complete integration |
+| notifications | 146 | Good | Working notification store |
+| voice | 133 | Good | Working NLU with intent detection |
+| websocket | 129 | Good | Solid connection management |
+| shortcuts | 93 | Good | Clean endpoint definitions |
+| self_tdd | 71 | Good | Simple and effective |
+
+**Total**: 86 Python files, 12,007 lines of code
+
+---
+
+## 5. Recommendations
+
+1. **Fix the "0 Cloud Calls" claim** — either bundle frontend dependencies locally or change the messaging
+2. **Fix the LND documentation** — clearly mark it as unimplemented/scaffolded, not "production-ready"
+3. **Fix the autonomous sats claim** — move it from current features to roadmap/planned
+4. **Update test/endpoint counts** — "643 tests" and "58 endpoints" are more impressive than "600+" and "20+"
+5. **Implement `agent_core` TODO stubs** — `persist_memory()` and `communicate()` are dead code
+6. **Bundle CDN resources** — for true offline operation, vendor Bootstrap, HTMX, and the font
+
+---
+
+## Appendix: Test Breakdown by Module
+
+| Test File | Tests | Module Tested |
+|-----------|-------|---------------|
+| test_spark.py | 47 | Spark intelligence engine |
+| test_mobile_scenarios.py | 36 | Mobile layout |
+| test_swarm.py | 29 | Swarm core |
+| test_dashboard_routes.py | 25 | Dashboard routes |
+| test_learner.py | 23 | Agent learning |
+| test_briefing.py | 22 | Briefing system |
+| test_swarm_personas.py | 21 | Persona definitions |
+| test_coordinator.py | 20 | Swarm coordinator |
+| test_creative_director.py | 19 | Creative pipeline |
+| test_tool_executor.py | 19 | Tool execution |
+| test_lightning_interface.py | 19 | Lightning backend |
+| test_dashboard.py | 18 | Dashboard core |
+| test_git_tools.py | 18 | Git tools |
+| test_approvals.py | 17 | Approval workflow |
+| test_swarm_routing.py | 17 | Task routing |
+| test_telegram_bot.py | 16 | Telegram bridge |
+| test_websocket_extended.py | 16 | WebSocket |
+| test_voice_nlu.py | 15 | Voice NLU |
+| test_backends.py | 14 | Backend selection |
+| test_swarm_recovery.py | 14 | Fault recovery |
+| test_swarm_stats.py | 13 | Performance stats |
+| test_swarm_integration_full.py | 13 | Swarm integration |
+| test_l402_proxy.py | 13 | L402 proxy |
+| test_agent.py | 13 | Core agent |
+| test_notifications.py | 11 | Push notifications |
+| test_spark_tools_creative.py | 11 | Spark + creative integration |
+| test_swarm_node.py | 10 | Swarm nodes |
+| test_inter_agent.py | 10 | Inter-agent comms |
+| test_timmy_serve_cli.py | 10 | Serve CLI |
+| test_docker_agent.py | 9 | Docker agents |
+| test_assembler_integration.py | 9 | Video assembly |
+| test_swarm_integration.py | 7 | Swarm integration |
+| test_assembler.py | 7 | Video assembly |
+| test_image_tools.py | 7 | Image tools |
+| test_music_tools.py | 9 | Music tools |
+| test_video_tools.py | 9 | Video tools |
+| test_creative_route.py | 6 | Creative routes |
+| test_shortcuts.py | 6 | Siri shortcuts |
+| test_watchdog.py | 6 | Self-TDD watchdog |
+| test_voice_enhanced.py | 8 | Enhanced voice |
+| test_timmy_serve_app.py | 5 | Serve app |
+| test_music_video_integration.py | 5 | Music + video pipeline |
+| test_swarm_live_page.py | 4 | Live swarm page |
+| test_agent_runner.py | 4 | Agent runner |
+| test_prompts.py | 8 | System prompts |
+| test_cli.py | 2 | CLI |
+| test_websocket.py | 3 | WebSocket core |
--- a/docs/index.html
+++ b/docs/index.html
@@ -563,13 +563,13 @@
  <h1>Your agents.<br><em>Your hardware.</em><br>Your sats.</h1>
  <p class="hero-sub">
    A local-first AI command center. Talk to Timmy, coordinate your swarm,
-    gate API access with Bitcoin Lightning — no cloud, no telemetry, no compromise.
+    gate API access with Bitcoin Lightning — no cloud AI, no telemetry, no compromise.
  </p>
  <div class="hero-badges">
-    <span class="badge green">600+ Tests Passing</span>
+    <span class="badge green">Full Test Suite Passing</span>
    <span class="badge blue">FastAPI + HTMX</span>
    <span class="badge amber">Lightning L402</span>
-    <span class="badge">No Cloud</span>
+    <span class="badge">No Cloud AI</span>
    <span class="badge purple">Multi-Agent Swarm</span>
    <span class="badge">MIT License</span>
  </div>
@@ -582,11 +582,11 @@
 <!-- ── Stats ────────────────────────────────────────────────────────────── -->
 <div class="stats">
  <div class="stat">
-    <div class="stat-number">600+</div>
+    <div class="stat-number">640+</div>
    <div class="stat-label">Tests Passing</div>
  </div>
  <div class="stat">
-    <div class="stat-number">20+</div>
+    <div class="stat-number">58</div>
    <div class="stat-label">API Endpoints</div>
  </div>
  <div class="stat">
@@ -595,7 +595,7 @@
  </div>
  <div class="stat">
    <div class="stat-number">0</div>
-    <div class="stat-label">Cloud Calls</div>
+    <div class="stat-label">Cloud AI Calls</div>
  </div>
 </div>

@@ -639,7 +639,7 @@
      <span class="feature-icon">⚡</span>
      <h3>L402 Lightning Payments</h3>
      <p>Bitcoin Lightning payment gating via HMAC macaroons. Mock backend for dev,
-         LND gRPC-ready for production. Agents earn and spend sats autonomously.</p>
+         LND backend scaffolded for production. Auction bids priced in sats.</p>
      <span class="feature-tag">L402 · Macaroon · BOLT11</span>
    </div>

@@ -780,7 +780,7 @@ External:     Ollama :11434 · optional Redis · optional LND gRPC</span>
      <div class="step-num">5</div>
      <div>
        <h4>Test</h4>
-        <pre class="codeblock"><span class="cmd">make test</span>        <span class="cmt"># 600+ tests — no Ollama needed</span>
+        <pre class="codeblock"><span class="cmd">make test</span>        <span class="cmt"># full test suite — no Ollama needed</span>
 <span class="cmd">make test-cov</span>    <span class="cmt"># + coverage report</span>
 <span class="cmd">make watch</span>       <span class="cmt"># self-TDD watchdog in background</span></pre>
      </div>
@@ -912,7 +912,7 @@ External:     Ollama :11434 · optional Redis · optional LND gRPC</span>
 <footer>
  <div class="footer-copy">
    TIMMY TIME // MISSION CONTROL &nbsp;·&nbsp; MIT License &nbsp;·&nbsp;
-    No cloud. No telemetry. Sats are sovereignty.
+    No cloud AI. No telemetry. Sats are sovereignty.
  </div>
  <div class="footer-links">
    <a href="https://github.com/AlexanderWhitestone/Timmy-time-dashboard">GitHub</a>
--- a/src/agent_core/ollama_adapter.py
+++ b/src/agent_core/ollama_adapter.py
@@ -172,18 +172,19 @@ Respond naturally and helpfully."""
        return result
    
    def remember(self, memory: Memory) -> None:
-        """Store memory persistently.
-        
-        For now, working memory is sufficient. In the future,
-        this would write to SQLite or vector DB for long-term
-        memory across sessions.
+        """Store memory in working memory.
+
+        Adds the memory to the sliding window and bumps its importance.
        """
-        # Mark as accessed to update importance
        memory.touch()
-        
-        # TODO: Persist to SQLite for long-term memory
-        # This would integrate with the existing briefing system
-        pass
+
+        # Deduplicate by id
+        self._working_memory = [m for m in self._working_memory if m.id != memory.id]
+        self._working_memory.append(memory)
+
+        # Evict oldest if over capacity
+        if len(self._working_memory) > self._max_working_memory:
+            self._working_memory.pop(0)
    
    def recall(self, query: str, limit: int = 5) -> list[Memory]:
        """Retrieve relevant memories.
@@ -215,13 +216,22 @@ Respond naturally and helpfully."""
        return [m for _, m in scored[:limit]]
    
    def communicate(self, message: Communication) -> bool:
-        """Send message to another agent.
-        
-        This would use the swarm comms layer for inter-agent
-        messaging. For now, it's a stub.
-        """
-        # TODO: Integrate with swarm.comms
-        return True
+        """Send message to another agent via swarm comms."""
+        try:
+            from swarm.comms import SwarmComms
+            comms = SwarmComms()
+            comms.publish(
+                "agent:messages",
+                "agent_message",
+                {
+                    "from": self._identity.name,
+                    "to": message.recipient,
+                    "content": message.content,
+                },
+            )
+            return True
+        except Exception:
+            return False
    
    def _extract_tags(self, perception: Perception) -> list[str]:
        """Extract searchable tags from perception."""
--- a/src/swarm/coordinator.py
+++ b/src/swarm/coordinator.py
@@ -11,7 +11,7 @@ import logging
 from datetime import datetime, timezone
 from typing import Optional

-from swarm.bidder import AuctionManager, Bid
+from swarm.bidder import AUCTION_DURATION_SECONDS, AuctionManager, Bid
 from swarm.comms import SwarmComms
 from swarm import learner as swarm_learner
 from swarm.manager import SwarmManager
@@ -227,7 +227,7 @@ class SwarmCoordinator:
        All bids are recorded in the learner so agents accumulate outcome
        history that later feeds back into adaptive bidding.
        """
-        await asyncio.sleep(0)  # yield to let any pending callbacks fire
+        await asyncio.sleep(AUCTION_DURATION_SECONDS)

        # Snapshot the auction bids before closing (for learner recording)
        auction = self.auctions.get_auction(task_id)
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -27,3 +27,45 @@ def test_status_does_not_use_inline_string():

    call_args = mock_timmy.print_response.call_args
    assert call_args[0][0] != "Brief status report — one sentence."
+
+
+def test_chat_sends_message_to_agent():
+    """chat command must pass the user message to the agent with streaming."""
+    mock_timmy = MagicMock()
+
+    with patch("timmy.cli.create_timmy", return_value=mock_timmy):
+        runner.invoke(app, ["chat", "Hello Timmy"])
+
+    mock_timmy.print_response.assert_called_once_with("Hello Timmy", stream=True)
+
+
+def test_think_sends_topic_to_agent():
+    """think command must pass the topic wrapped in a prompt with streaming."""
+    mock_timmy = MagicMock()
+
+    with patch("timmy.cli.create_timmy", return_value=mock_timmy):
+        runner.invoke(app, ["think", "Bitcoin self-custody"])
+
+    mock_timmy.print_response.assert_called_once_with(
+        "Think carefully about: Bitcoin self-custody", stream=True
+    )
+
+
+def test_chat_passes_backend_option():
+    """chat --backend airllm must forward the backend to create_timmy."""
+    mock_timmy = MagicMock()
+
+    with patch("timmy.cli.create_timmy", return_value=mock_timmy) as mock_create:
+        runner.invoke(app, ["chat", "test", "--backend", "airllm"])
+
+    mock_create.assert_called_once_with(backend="airllm", model_size=None)
+
+
+def test_think_passes_model_size_option():
+    """think --model-size 70b must forward the model size to create_timmy."""
+    mock_timmy = MagicMock()
+
+    with patch("timmy.cli.create_timmy", return_value=mock_timmy) as mock_create:
+        runner.invoke(app, ["think", "topic", "--model-size", "70b"])
+
+    mock_create.assert_called_once_with(backend=None, model_size="70b")
--- a/tests/test_coordinator.py
+++ b/tests/test_coordinator.py
@@ -162,8 +162,8 @@ async def test_coordinator_run_auction_no_bids():
    coord = SwarmCoordinator()
    task = coord.post_task("No bids task")

-    # Patch sleep to avoid 15-second wait
-    with patch("swarm.bidder.asyncio.sleep", new_callable=AsyncMock):
+    # Patch sleep to avoid 15-second wait in tests
+    with patch("swarm.coordinator.asyncio.sleep", new_callable=AsyncMock):
        winner = await coord.run_auction_and_assign(task.id)

    assert winner is None
--- a/tests/test_swarm_integration.py
+++ b/tests/test_swarm_integration.py
@@ -8,7 +8,7 @@ These tests verify that:
 """

 import asyncio
-from unittest.mock import patch
+from unittest.mock import AsyncMock, patch

 import pytest

@@ -16,6 +16,13 @@ from swarm.coordinator import SwarmCoordinator
 from swarm.tasks import TaskStatus


+@pytest.fixture(autouse=True)
+def _fast_auction():
+    """Skip the 15-second auction wait in tests."""
+    with patch("swarm.coordinator.AUCTION_DURATION_SECONDS", 0):
+        yield
+
+
 class TestSwarmInProcessAgents:
    """Test the in-process agent spawning and bidding flow."""

--- a/tests/test_swarm_integration_full.py
+++ b/tests/test_swarm_integration_full.py
@@ -8,6 +8,13 @@ import pytest
 from unittest.mock import AsyncMock, MagicMock, patch


+@pytest.fixture(autouse=True)
+def _fast_auction():
+    """Skip the 15-second auction wait in tests."""
+    with patch("swarm.coordinator.AUCTION_DURATION_SECONDS", 0):
+        yield
+
+
 class TestFullSwarmLifecycle:
    """Integration tests for end-to-end swarm task lifecycle."""