From 1ab26d30ad433016d10205c688c6be6dd363cfb8 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 15:51:15 +0000 Subject: [PATCH 01/32] feat: integrate Spark Intelligence into Timmy swarm system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a self-evolving cognitive layer inspired by vibeship-spark-intelligence, adapted for Timmy's agent architecture. Spark captures swarm events, runs EIDOS prediction-evaluation loops, consolidates memories, and generates advisory recommendations — all backed by SQLite consistent with existing patterns. New modules: - spark/memory.py — event capture with importance scoring + memory consolidation - spark/eidos.py — EIDOS cognitive loop (predict → observe → evaluate → learn) - spark/advisor.py — ranked advisory generation from accumulated intelligence - spark/engine.py — top-level API wiring all subsystems together Dashboard: - /spark/ui — full Spark Intelligence dashboard (3-column: status/advisories, predictions/memories, event timeline) with HTMX auto-refresh - /spark — JSON API for programmatic access - SPARK link added to navigation header Integration: - Coordinator hooks emit Spark events on task post, bid, assign, complete, fail - EIDOS predictions generated when tasks are posted, evaluated on completion - Memory consolidation triggers when agents accumulate enough outcomes - SPARK_ENABLED config toggle (default: true) Tests: 47 new tests covering all Spark subsystems + dashboard routes. Full suite: 538 tests passing. https://claude.ai/code/session_01KJm6jQkNi3aA3yoQJn636c --- pyproject.toml | 1 + src/config.py | 6 + src/dashboard/app.py | 7 + src/dashboard/routes/spark.py | 147 +++++ src/dashboard/templates/base.html | 1 + .../templates/partials/spark_insights.html | 32 + .../templates/partials/spark_timeline.html | 19 + src/dashboard/templates/spark.html | 556 ++++++++++++++++++ src/spark/__init__.py | 0 src/spark/advisor.py | 278 +++++++++ src/spark/eidos.py | 304 ++++++++++ src/spark/engine.py | 288 +++++++++ src/spark/memory.py | 301 ++++++++++ src/swarm/coordinator.py | 53 +- tests/test_spark.py | 431 ++++++++++++++ 15 files changed, 2420 insertions(+), 4 deletions(-) create mode 100644 src/dashboard/routes/spark.py create mode 100644 src/dashboard/templates/partials/spark_insights.html create mode 100644 src/dashboard/templates/partials/spark_timeline.html create mode 100644 src/dashboard/templates/spark.html create mode 100644 src/spark/__init__.py create mode 100644 src/spark/advisor.py create mode 100644 src/spark/eidos.py create mode 100644 src/spark/engine.py create mode 100644 src/spark/memory.py create mode 100644 tests/test_spark.py diff --git a/pyproject.toml b/pyproject.toml index 1364e6a..6b5344b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ include = [ "src/notifications", "src/shortcuts", "src/telegram_bot", + "src/spark", ] [tool.pytest.ini_options] diff --git a/src/config.py b/src/config.py index 506e643..4197bd1 100644 --- a/src/config.py +++ b/src/config.py @@ -28,6 +28,12 @@ class Settings(BaseSettings): # 8b ~16 GB | 70b ~140 GB | 405b ~810 GB airllm_model_size: Literal["8b", "70b", "405b"] = "70b" + # ── Spark Intelligence ──────────────────────────────────────────────── + # Enable/disable the Spark cognitive layer. + # When enabled, Spark captures swarm events, runs EIDOS predictions, + # consolidates memories, and generates advisory recommendations. + spark_enabled: bool = True + model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", diff --git a/src/dashboard/app.py b/src/dashboard/app.py index 78e7be2..729c7cd 100644 --- a/src/dashboard/app.py +++ b/src/dashboard/app.py @@ -23,6 +23,7 @@ from dashboard.routes.briefing import router as briefing_router from dashboard.routes.telegram import router as telegram_router from dashboard.routes.swarm_internal import router as swarm_internal_router from dashboard.routes.tools import router as tools_router +from dashboard.routes.spark import router as spark_router logging.basicConfig( level=logging.INFO, @@ -97,6 +98,11 @@ async def lifespan(app: FastAPI): except Exception as exc: logger.error("Failed to spawn persona agents: %s", exc) + # Initialise Spark Intelligence engine + from spark.engine import spark_engine + if spark_engine.enabled: + logger.info("Spark Intelligence active — event capture enabled") + # Auto-start Telegram bot if a token is configured from telegram_bot.bot import telegram_bot await telegram_bot.start() @@ -136,6 +142,7 @@ app.include_router(briefing_router) app.include_router(telegram_router) app.include_router(swarm_internal_router) app.include_router(tools_router) +app.include_router(spark_router) @app.get("/", response_class=HTMLResponse) diff --git a/src/dashboard/routes/spark.py b/src/dashboard/routes/spark.py new file mode 100644 index 0000000..f998050 --- /dev/null +++ b/src/dashboard/routes/spark.py @@ -0,0 +1,147 @@ +"""Spark Intelligence dashboard routes. + +GET /spark — JSON status (API) +GET /spark/ui — HTML Spark Intelligence dashboard +GET /spark/timeline — HTMX partial: recent event timeline +GET /spark/insights — HTMX partial: advisories and insights +GET /spark/predictions — HTMX partial: EIDOS predictions +""" + +import json +import logging +from pathlib import Path + +from fastapi import APIRouter, Request +from fastapi.responses import HTMLResponse +from fastapi.templating import Jinja2Templates + +from spark.engine import spark_engine + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/spark", tags=["spark"]) +templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates")) + + +@router.get("/ui", response_class=HTMLResponse) +async def spark_ui(request: Request): + """Render the Spark Intelligence dashboard page.""" + status = spark_engine.status() + advisories = spark_engine.get_advisories() + timeline = spark_engine.get_timeline(limit=20) + predictions = spark_engine.get_predictions(limit=10) + memories = spark_engine.get_memories(limit=10) + + # Parse event data JSON for template display + timeline_enriched = [] + for ev in timeline: + entry = { + "id": ev.id, + "event_type": ev.event_type, + "agent_id": ev.agent_id, + "task_id": ev.task_id, + "description": ev.description, + "importance": ev.importance, + "created_at": ev.created_at, + } + try: + entry["data"] = json.loads(ev.data) + except (json.JSONDecodeError, TypeError): + entry["data"] = {} + timeline_enriched.append(entry) + + # Enrich predictions for display + predictions_enriched = [] + for p in predictions: + entry = { + "id": p.id, + "task_id": p.task_id, + "prediction_type": p.prediction_type, + "accuracy": p.accuracy, + "created_at": p.created_at, + "evaluated_at": p.evaluated_at, + } + try: + entry["predicted"] = json.loads(p.predicted_value) + except (json.JSONDecodeError, TypeError): + entry["predicted"] = {} + try: + entry["actual"] = json.loads(p.actual_value) if p.actual_value else None + except (json.JSONDecodeError, TypeError): + entry["actual"] = None + predictions_enriched.append(entry) + + return templates.TemplateResponse( + request, + "spark.html", + { + "status": status, + "advisories": advisories, + "timeline": timeline_enriched, + "predictions": predictions_enriched, + "memories": memories, + }, + ) + + +@router.get("", response_class=HTMLResponse) +async def spark_status_json(): + """Return Spark Intelligence status as JSON.""" + from fastapi.responses import JSONResponse + status = spark_engine.status() + advisories = spark_engine.get_advisories() + return JSONResponse({ + "status": status, + "advisories": [ + { + "category": a.category, + "priority": a.priority, + "title": a.title, + "detail": a.detail, + "suggested_action": a.suggested_action, + "subject": a.subject, + "evidence_count": a.evidence_count, + } + for a in advisories + ], + }) + + +@router.get("/timeline", response_class=HTMLResponse) +async def spark_timeline(request: Request): + """HTMX partial: recent event timeline.""" + timeline = spark_engine.get_timeline(limit=20) + timeline_enriched = [] + for ev in timeline: + entry = { + "id": ev.id, + "event_type": ev.event_type, + "agent_id": ev.agent_id, + "task_id": ev.task_id, + "description": ev.description, + "importance": ev.importance, + "created_at": ev.created_at, + } + try: + entry["data"] = json.loads(ev.data) + except (json.JSONDecodeError, TypeError): + entry["data"] = {} + timeline_enriched.append(entry) + + return templates.TemplateResponse( + request, + "partials/spark_timeline.html", + {"timeline": timeline_enriched}, + ) + + +@router.get("/insights", response_class=HTMLResponse) +async def spark_insights(request: Request): + """HTMX partial: advisories and consolidated memories.""" + advisories = spark_engine.get_advisories() + memories = spark_engine.get_memories(limit=10) + return templates.TemplateResponse( + request, + "partials/spark_insights.html", + {"advisories": advisories, "memories": memories}, + ) diff --git a/src/dashboard/templates/base.html b/src/dashboard/templates/base.html index 4d92db3..1fc5c2a 100644 --- a/src/dashboard/templates/base.html +++ b/src/dashboard/templates/base.html @@ -23,6 +23,7 @@
BRIEFING SWARM + SPARK MARKET TOOLS MOBILE diff --git a/src/dashboard/templates/partials/spark_insights.html b/src/dashboard/templates/partials/spark_insights.html new file mode 100644 index 0000000..108b5d1 --- /dev/null +++ b/src/dashboard/templates/partials/spark_insights.html @@ -0,0 +1,32 @@ +{% if advisories %} + {% for adv in advisories %} +
+
+ {{ adv.category | replace("_", " ") | upper }} + {{ "%.0f"|format(adv.priority * 100) }}% +
+
{{ adv.title }}
+
{{ adv.detail }}
+
{{ adv.suggested_action }}
+
+ {% endfor %} +{% else %} +
No advisories yet. Run more tasks to build intelligence.
+{% endif %} + +{% if memories %} +
+
CONSOLIDATED MEMORIES
+ {% for mem in memories %} +
+
+ {{ mem.memory_type | upper }} + {{ "%.0f"|format(mem.confidence * 100) }}% conf +
+
{{ mem.content }}
+
+ {{ mem.source_events }} events • {{ mem.created_at[:10] }} +
+
+ {% endfor %} +{% endif %} diff --git a/src/dashboard/templates/partials/spark_timeline.html b/src/dashboard/templates/partials/spark_timeline.html new file mode 100644 index 0000000..ead0178 --- /dev/null +++ b/src/dashboard/templates/partials/spark_timeline.html @@ -0,0 +1,19 @@ +{% if timeline %} + {% for ev in timeline %} +
+
+ {{ ev.event_type | replace("_", " ") | upper }} + + {% if ev.importance >= 0.8 %}●●●{% elif ev.importance >= 0.5 %}●●{% else %}●{% endif %} + +
+
{{ ev.description }}
+ {% if ev.task_id %} +
task: {{ ev.task_id[:8] }}{% if ev.agent_id %} • agent: {{ ev.agent_id[:8] }}{% endif %}
+ {% endif %} +
{{ ev.created_at[:19] }}
+
+ {% endfor %} +{% else %} +
No events captured yet.
+{% endif %} diff --git a/src/dashboard/templates/spark.html b/src/dashboard/templates/spark.html new file mode 100644 index 0000000..d6464d5 --- /dev/null +++ b/src/dashboard/templates/spark.html @@ -0,0 +1,556 @@ +{% extends "base.html" %} + +{% block title %}Timmy Time — Spark Intelligence{% endblock %} + +{% block content %} +
+ + +
+
SPARK INTELLIGENCE
+
+ Self-evolving cognitive layer — + {{ status.events_captured }} events captured, + {{ status.memories_stored }} memories, + {{ status.predictions.evaluated }} predictions evaluated +
+
+ +
+ + +
+ + +
+
// EIDOS LOOP
+
+
+
+ PREDICTIONS + {{ status.predictions.total_predictions }} +
+
+ EVALUATED + {{ status.predictions.evaluated }} +
+
+ PENDING + {{ status.predictions.pending }} +
+
+ ACCURACY + + {{ "%.0f"|format(status.predictions.avg_accuracy * 100) }}% + +
+
+
+
+ + +
+
// EVENT PIPELINE
+
+ {% for event_type, count in status.event_types.items() %} +
+ {{ event_type | replace("_", " ") | upper }} + {{ count }} +
+ {% endfor %} +
+
+ + +
+
+ // ADVISORIES + {{ advisories | length }} +
+
+ {% if advisories %} + {% for adv in advisories %} +
+
+ {{ adv.category | replace("_", " ") | upper }} + {{ "%.0f"|format(adv.priority * 100) }}% +
+
{{ adv.title }}
+
{{ adv.detail }}
+
{{ adv.suggested_action }}
+
+ {% endfor %} + {% else %} +
No advisories yet. Run more tasks to build intelligence.
+ {% endif %} +
+
+
+ + +
+ + +
+
// EIDOS PREDICTIONS
+
+ {% if predictions %} + {% for pred in predictions %} +
+
+ {{ pred.task_id[:8] }}... + {% if pred.accuracy is not none %} + + {{ "%.0f"|format(pred.accuracy * 100) }}% + + {% else %} + PENDING + {% endif %} +
+
+ {% if pred.predicted %} +
+ Winner: + {{ (pred.predicted.likely_winner or "?")[:8] }} +
+
+ Success: + {{ "%.0f"|format((pred.predicted.success_probability or 0) * 100) }}% +
+
+ Bid range: + {{ pred.predicted.estimated_bid_range | join("–") }} sats +
+ {% endif %} + {% if pred.actual %} +
+ Actual: + {% if pred.actual.succeeded %}completed{% else %}failed{% endif %} + by {{ (pred.actual.winner or "?")[:8] }} + {% if pred.actual.winning_bid %} at {{ pred.actual.winning_bid }} sats{% endif %} +
+ {% endif %} +
+
{{ pred.created_at[:19] }}
+
+ {% endfor %} + {% else %} +
No predictions yet. Post tasks to activate the EIDOS loop.
+ {% endif %} +
+
+ + +
+
// MEMORIES
+
+ {% if memories %} + {% for mem in memories %} +
+
+ {{ mem.memory_type | upper }} + {{ "%.0f"|format(mem.confidence * 100) }}% conf +
+
{{ mem.content }}
+
+ {{ mem.source_events }} events • {{ mem.created_at[:10] }} +
+
+ {% endfor %} + {% else %} +
Memories will form as patterns emerge.
+ {% endif %} +
+
+
+ + +
+ +
+
+ // EVENT TIMELINE + {{ status.events_captured }} total +
+
+ {% if timeline %} + {% for ev in timeline %} +
+
+ {{ ev.event_type | replace("_", " ") | upper }} + + {% if ev.importance >= 0.8 %}●●●{% elif ev.importance >= 0.5 %}●●{% else %}●{% endif %} + +
+
{{ ev.description }}
+ {% if ev.task_id %} +
task: {{ ev.task_id[:8] }}{% if ev.agent_id %} • agent: {{ ev.agent_id[:8] }}{% endif %}
+ {% endif %} +
{{ ev.created_at[:19] }}
+
+ {% endfor %} + {% else %} +
No events captured yet.
+ {% endif %} +
+
+
+ +
+
+ + +{% endblock %} diff --git a/src/spark/__init__.py b/src/spark/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/spark/advisor.py b/src/spark/advisor.py new file mode 100644 index 0000000..a0bc465 --- /dev/null +++ b/src/spark/advisor.py @@ -0,0 +1,278 @@ +"""Spark advisor — generates ranked recommendations from accumulated intelligence. + +The advisor examines Spark's event history, consolidated memories, and EIDOS +prediction accuracy to produce actionable recommendations for the swarm. + +Categories +---------- +- agent_performance — "Agent X excels at Y, consider routing more Y tasks" +- bid_optimization — "Bids on Z tasks are consistently high, room to save" +- failure_prevention — "Agent A has failed 3 recent tasks, investigate" +- system_health — "No events in 30 min, swarm may be idle" +""" + +import json +import logging +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Optional + +from spark import memory as spark_memory +from spark import eidos as spark_eidos + +logger = logging.getLogger(__name__) + +# Minimum events before the advisor starts generating recommendations +_MIN_EVENTS = 3 + + +@dataclass +class Advisory: + """A single ranked recommendation.""" + category: str # agent_performance, bid_optimization, etc. + priority: float # 0.0–1.0 (higher = more urgent) + title: str # Short headline + detail: str # Longer explanation + suggested_action: str # What to do about it + subject: Optional[str] = None # agent_id or None for system-level + evidence_count: int = 0 # Number of supporting events + + +def generate_advisories() -> list[Advisory]: + """Analyse Spark data and produce ranked recommendations. + + Returns advisories sorted by priority (highest first). + """ + advisories: list[Advisory] = [] + + event_count = spark_memory.count_events() + if event_count < _MIN_EVENTS: + advisories.append(Advisory( + category="system_health", + priority=0.3, + title="Insufficient data", + detail=f"Only {event_count} events captured. " + f"Spark needs at least {_MIN_EVENTS} events to generate insights.", + suggested_action="Run more swarm tasks to build intelligence.", + evidence_count=event_count, + )) + return advisories + + advisories.extend(_check_failure_patterns()) + advisories.extend(_check_agent_performance()) + advisories.extend(_check_bid_patterns()) + advisories.extend(_check_prediction_accuracy()) + advisories.extend(_check_system_activity()) + + advisories.sort(key=lambda a: a.priority, reverse=True) + return advisories + + +def _check_failure_patterns() -> list[Advisory]: + """Detect agents with recent failure streaks.""" + results: list[Advisory] = [] + failures = spark_memory.get_events(event_type="task_failed", limit=50) + + # Group failures by agent + agent_failures: dict[str, int] = {} + for ev in failures: + aid = ev.agent_id + if aid: + agent_failures[aid] = agent_failures.get(aid, 0) + 1 + + for aid, count in agent_failures.items(): + if count >= 2: + results.append(Advisory( + category="failure_prevention", + priority=min(1.0, 0.5 + count * 0.15), + title=f"Agent {aid[:8]} has {count} failures", + detail=f"Agent {aid[:8]}... has failed {count} recent tasks. " + f"This pattern may indicate a capability mismatch or " + f"configuration issue.", + suggested_action=f"Review task types assigned to {aid[:8]}... " + f"and consider adjusting routing preferences.", + subject=aid, + evidence_count=count, + )) + + return results + + +def _check_agent_performance() -> list[Advisory]: + """Identify top-performing and underperforming agents.""" + results: list[Advisory] = [] + completions = spark_memory.get_events(event_type="task_completed", limit=100) + failures = spark_memory.get_events(event_type="task_failed", limit=100) + + # Build success/failure counts per agent + agent_success: dict[str, int] = {} + agent_fail: dict[str, int] = {} + + for ev in completions: + aid = ev.agent_id + if aid: + agent_success[aid] = agent_success.get(aid, 0) + 1 + + for ev in failures: + aid = ev.agent_id + if aid: + agent_fail[aid] = agent_fail.get(aid, 0) + 1 + + all_agents = set(agent_success) | set(agent_fail) + for aid in all_agents: + wins = agent_success.get(aid, 0) + fails = agent_fail.get(aid, 0) + total = wins + fails + if total < 2: + continue + + rate = wins / total + if rate >= 0.8 and total >= 3: + results.append(Advisory( + category="agent_performance", + priority=0.6, + title=f"Agent {aid[:8]} excels ({rate:.0%} success)", + detail=f"Agent {aid[:8]}... has completed {wins}/{total} tasks " + f"successfully. Consider routing more tasks to this agent.", + suggested_action="Increase task routing weight for this agent.", + subject=aid, + evidence_count=total, + )) + elif rate <= 0.3 and total >= 3: + results.append(Advisory( + category="agent_performance", + priority=0.75, + title=f"Agent {aid[:8]} struggling ({rate:.0%} success)", + detail=f"Agent {aid[:8]}... has only succeeded on {wins}/{total} tasks. " + f"May need different task types or capability updates.", + suggested_action="Review this agent's capabilities and assigned task types.", + subject=aid, + evidence_count=total, + )) + + return results + + +def _check_bid_patterns() -> list[Advisory]: + """Detect bid optimization opportunities.""" + results: list[Advisory] = [] + bids = spark_memory.get_events(event_type="bid_submitted", limit=100) + + if len(bids) < 5: + return results + + # Extract bid amounts + bid_amounts: list[int] = [] + for ev in bids: + try: + data = json.loads(ev.data) + sats = data.get("bid_sats", 0) + if sats > 0: + bid_amounts.append(sats) + except (json.JSONDecodeError, TypeError): + continue + + if not bid_amounts: + return results + + avg_bid = sum(bid_amounts) / len(bid_amounts) + max_bid = max(bid_amounts) + min_bid = min(bid_amounts) + spread = max_bid - min_bid + + if spread > avg_bid * 1.5: + results.append(Advisory( + category="bid_optimization", + priority=0.5, + title=f"Wide bid spread ({min_bid}–{max_bid} sats)", + detail=f"Bids range from {min_bid} to {max_bid} sats " + f"(avg {avg_bid:.0f}). Large spread may indicate " + f"inefficient auction dynamics.", + suggested_action="Review agent bid strategies for consistency.", + evidence_count=len(bid_amounts), + )) + + if avg_bid > 70: + results.append(Advisory( + category="bid_optimization", + priority=0.45, + title=f"High average bid ({avg_bid:.0f} sats)", + detail=f"The swarm average bid is {avg_bid:.0f} sats across " + f"{len(bid_amounts)} bids. This may be above optimal.", + suggested_action="Consider adjusting base bid rates for persona agents.", + evidence_count=len(bid_amounts), + )) + + return results + + +def _check_prediction_accuracy() -> list[Advisory]: + """Report on EIDOS prediction accuracy.""" + results: list[Advisory] = [] + stats = spark_eidos.get_accuracy_stats() + + if stats["evaluated"] < 3: + return results + + avg = stats["avg_accuracy"] + if avg < 0.4: + results.append(Advisory( + category="system_health", + priority=0.65, + title=f"Low prediction accuracy ({avg:.0%})", + detail=f"EIDOS predictions have averaged {avg:.0%} accuracy " + f"over {stats['evaluated']} evaluations. The learning " + f"model needs more data or the swarm behaviour is changing.", + suggested_action="Continue running tasks; accuracy should improve " + "as the model accumulates more training data.", + evidence_count=stats["evaluated"], + )) + elif avg >= 0.75: + results.append(Advisory( + category="system_health", + priority=0.3, + title=f"Strong prediction accuracy ({avg:.0%})", + detail=f"EIDOS predictions are performing well at {avg:.0%} " + f"average accuracy over {stats['evaluated']} evaluations.", + suggested_action="No action needed. Spark intelligence is learning effectively.", + evidence_count=stats["evaluated"], + )) + + return results + + +def _check_system_activity() -> list[Advisory]: + """Check for system idle patterns.""" + results: list[Advisory] = [] + recent = spark_memory.get_events(limit=5) + + if not recent: + results.append(Advisory( + category="system_health", + priority=0.4, + title="No swarm activity detected", + detail="Spark has not captured any events. " + "The swarm may be idle or Spark event capture is not active.", + suggested_action="Post a task to the swarm to activate the pipeline.", + )) + return results + + # Check event type distribution + types = [e.event_type for e in spark_memory.get_events(limit=100)] + type_counts = {} + for t in types: + type_counts[t] = type_counts.get(t, 0) + 1 + + if "task_completed" not in type_counts and "task_failed" not in type_counts: + if type_counts.get("task_posted", 0) > 3: + results.append(Advisory( + category="system_health", + priority=0.6, + title="Tasks posted but none completing", + detail=f"{type_counts.get('task_posted', 0)} tasks posted " + f"but no completions or failures recorded.", + suggested_action="Check agent availability and auction configuration.", + evidence_count=type_counts.get("task_posted", 0), + )) + + return results diff --git a/src/spark/eidos.py b/src/spark/eidos.py new file mode 100644 index 0000000..0377d40 --- /dev/null +++ b/src/spark/eidos.py @@ -0,0 +1,304 @@ +"""EIDOS cognitive loop — prediction, evaluation, and learning. + +Implements the core Spark learning cycle: +1. PREDICT — Before a task is assigned, predict the outcome +2. OBSERVE — Watch what actually happens +3. EVALUATE — Compare prediction vs reality +4. LEARN — Update internal models based on accuracy + +All predictions and evaluations are stored in SQLite for +transparency and audit. The loop runs passively, recording +predictions when tasks are posted and evaluating them when +tasks complete. +""" + +import json +import logging +import sqlite3 +import uuid +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +logger = logging.getLogger(__name__) + +DB_PATH = Path("data/spark.db") + + +@dataclass +class Prediction: + """A prediction made by the EIDOS loop.""" + id: str + task_id: str + prediction_type: str # outcome, best_agent, bid_range + predicted_value: str # JSON-encoded prediction + actual_value: Optional[str] # JSON-encoded actual (filled on evaluation) + accuracy: Optional[float] # 0.0–1.0 (filled on evaluation) + created_at: str + evaluated_at: Optional[str] + + +def _get_conn() -> sqlite3.Connection: + DB_PATH.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(DB_PATH)) + conn.row_factory = sqlite3.Row + conn.execute( + """ + CREATE TABLE IF NOT EXISTS spark_predictions ( + id TEXT PRIMARY KEY, + task_id TEXT NOT NULL, + prediction_type TEXT NOT NULL, + predicted_value TEXT NOT NULL, + actual_value TEXT, + accuracy REAL, + created_at TEXT NOT NULL, + evaluated_at TEXT + ) + """ + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_pred_task ON spark_predictions(task_id)" + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_pred_type ON spark_predictions(prediction_type)" + ) + conn.commit() + return conn + + +# ── Prediction phase ──────────────────────────────────────────────────────── + +def predict_task_outcome( + task_id: str, + task_description: str, + candidate_agents: list[str], + agent_history: Optional[dict] = None, +) -> dict: + """Predict the outcome of a task before it's assigned. + + Returns a prediction dict with: + - likely_winner: agent_id most likely to win the auction + - success_probability: 0.0–1.0 chance the task succeeds + - estimated_bid_range: (low, high) sats range + """ + # Default prediction when no history exists + prediction = { + "likely_winner": candidate_agents[0] if candidate_agents else None, + "success_probability": 0.7, + "estimated_bid_range": [20, 80], + "reasoning": "baseline prediction (no history)", + } + + if agent_history: + # Adjust based on historical success rates + best_agent = None + best_rate = 0.0 + for aid, metrics in agent_history.items(): + if aid not in candidate_agents: + continue + rate = metrics.get("success_rate", 0.0) + if rate > best_rate: + best_rate = rate + best_agent = aid + + if best_agent: + prediction["likely_winner"] = best_agent + prediction["success_probability"] = round( + min(1.0, 0.5 + best_rate * 0.4), 2 + ) + prediction["reasoning"] = ( + f"agent {best_agent[:8]} has {best_rate:.0%} success rate" + ) + + # Adjust bid range from history + all_bids = [] + for metrics in agent_history.values(): + avg = metrics.get("avg_winning_bid", 0) + if avg > 0: + all_bids.append(avg) + if all_bids: + prediction["estimated_bid_range"] = [ + max(1, int(min(all_bids) * 0.8)), + int(max(all_bids) * 1.2), + ] + + # Store prediction + pred_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc).isoformat() + conn = _get_conn() + conn.execute( + """ + INSERT INTO spark_predictions + (id, task_id, prediction_type, predicted_value, created_at) + VALUES (?, ?, ?, ?, ?) + """, + (pred_id, task_id, "outcome", json.dumps(prediction), now), + ) + conn.commit() + conn.close() + + prediction["prediction_id"] = pred_id + return prediction + + +# ── Evaluation phase ──────────────────────────────────────────────────────── + +def evaluate_prediction( + task_id: str, + actual_winner: Optional[str], + task_succeeded: bool, + winning_bid: Optional[int] = None, +) -> Optional[dict]: + """Evaluate a stored prediction against actual outcomes. + + Returns the evaluation result or None if no prediction exists. + """ + conn = _get_conn() + row = conn.execute( + """ + SELECT * FROM spark_predictions + WHERE task_id = ? AND prediction_type = 'outcome' AND evaluated_at IS NULL + ORDER BY created_at DESC LIMIT 1 + """, + (task_id,), + ).fetchone() + + if not row: + conn.close() + return None + + predicted = json.loads(row["predicted_value"]) + actual = { + "winner": actual_winner, + "succeeded": task_succeeded, + "winning_bid": winning_bid, + } + + # Calculate accuracy + accuracy = _compute_accuracy(predicted, actual) + now = datetime.now(timezone.utc).isoformat() + + conn.execute( + """ + UPDATE spark_predictions + SET actual_value = ?, accuracy = ?, evaluated_at = ? + WHERE id = ? + """, + (json.dumps(actual), accuracy, now, row["id"]), + ) + conn.commit() + conn.close() + + return { + "prediction_id": row["id"], + "predicted": predicted, + "actual": actual, + "accuracy": accuracy, + } + + +def _compute_accuracy(predicted: dict, actual: dict) -> float: + """Score prediction accuracy from 0.0–1.0. + + Components: + - Winner prediction: 0.4 weight (correct = 1.0, wrong = 0.0) + - Success prediction: 0.4 weight (how close) + - Bid range: 0.2 weight (was actual bid in predicted range) + """ + score = 0.0 + weights = 0.0 + + # Winner accuracy + pred_winner = predicted.get("likely_winner") + actual_winner = actual.get("winner") + if pred_winner and actual_winner: + score += 0.4 * (1.0 if pred_winner == actual_winner else 0.0) + weights += 0.4 + + # Success probability accuracy + pred_success = predicted.get("success_probability", 0.5) + actual_success = 1.0 if actual.get("succeeded") else 0.0 + success_error = abs(pred_success - actual_success) + score += 0.4 * (1.0 - success_error) + weights += 0.4 + + # Bid range accuracy + bid_range = predicted.get("estimated_bid_range", [20, 80]) + actual_bid = actual.get("winning_bid") + if actual_bid is not None and len(bid_range) == 2: + low, high = bid_range + if low <= actual_bid <= high: + score += 0.2 + else: + # Partial credit: how far outside the range + distance = min(abs(actual_bid - low), abs(actual_bid - high)) + range_size = max(1, high - low) + score += 0.2 * max(0, 1.0 - distance / range_size) + weights += 0.2 + + return round(score / max(weights, 0.01), 2) + + +# ── Query helpers ────────────────────────────────────────────────────────── + +def get_predictions( + task_id: Optional[str] = None, + evaluated_only: bool = False, + limit: int = 50, +) -> list[Prediction]: + """Query stored predictions.""" + conn = _get_conn() + query = "SELECT * FROM spark_predictions WHERE 1=1" + params: list = [] + + if task_id: + query += " AND task_id = ?" + params.append(task_id) + if evaluated_only: + query += " AND evaluated_at IS NOT NULL" + + query += " ORDER BY created_at DESC LIMIT ?" + params.append(limit) + + rows = conn.execute(query, params).fetchall() + conn.close() + return [ + Prediction( + id=r["id"], + task_id=r["task_id"], + prediction_type=r["prediction_type"], + predicted_value=r["predicted_value"], + actual_value=r["actual_value"], + accuracy=r["accuracy"], + created_at=r["created_at"], + evaluated_at=r["evaluated_at"], + ) + for r in rows + ] + + +def get_accuracy_stats() -> dict: + """Return aggregate accuracy statistics for the EIDOS loop.""" + conn = _get_conn() + row = conn.execute( + """ + SELECT + COUNT(*) AS total_predictions, + COUNT(evaluated_at) AS evaluated, + AVG(CASE WHEN accuracy IS NOT NULL THEN accuracy END) AS avg_accuracy, + MIN(CASE WHEN accuracy IS NOT NULL THEN accuracy END) AS min_accuracy, + MAX(CASE WHEN accuracy IS NOT NULL THEN accuracy END) AS max_accuracy + FROM spark_predictions + """ + ).fetchone() + conn.close() + + return { + "total_predictions": row["total_predictions"] or 0, + "evaluated": row["evaluated"] or 0, + "pending": (row["total_predictions"] or 0) - (row["evaluated"] or 0), + "avg_accuracy": round(row["avg_accuracy"] or 0.0, 2), + "min_accuracy": round(row["min_accuracy"] or 0.0, 2), + "max_accuracy": round(row["max_accuracy"] or 0.0, 2), + } diff --git a/src/spark/engine.py b/src/spark/engine.py new file mode 100644 index 0000000..15bd5b3 --- /dev/null +++ b/src/spark/engine.py @@ -0,0 +1,288 @@ +"""Spark Intelligence engine — the top-level API for Spark integration. + +The engine is the single entry point used by the swarm coordinator and +dashboard routes. It wires together memory capture, EIDOS predictions, +memory consolidation, and the advisory system. + +Usage +----- + from spark.engine import spark_engine + + # Capture a swarm event + spark_engine.on_task_posted(task_id, description) + spark_engine.on_bid_submitted(task_id, agent_id, bid_sats) + spark_engine.on_task_completed(task_id, agent_id, result) + spark_engine.on_task_failed(task_id, agent_id, reason) + + # Query Spark intelligence + spark_engine.status() + spark_engine.get_advisories() + spark_engine.get_timeline() +""" + +import json +import logging +from typing import Optional + +from spark import advisor as spark_advisor +from spark import eidos as spark_eidos +from spark import memory as spark_memory +from spark.advisor import Advisory +from spark.memory import SparkEvent, SparkMemory + +logger = logging.getLogger(__name__) + + +class SparkEngine: + """Top-level Spark Intelligence controller.""" + + def __init__(self, enabled: bool = True) -> None: + self._enabled = enabled + if enabled: + logger.info("Spark Intelligence engine initialised") + + @property + def enabled(self) -> bool: + return self._enabled + + # ── Event capture (called by coordinator) ──────────────────────────────── + + def on_task_posted( + self, + task_id: str, + description: str, + candidate_agents: Optional[list[str]] = None, + ) -> Optional[str]: + """Capture a task-posted event and generate a prediction.""" + if not self._enabled: + return None + + event_id = spark_memory.record_event( + event_type="task_posted", + description=description, + task_id=task_id, + data=json.dumps({"candidates": candidate_agents or []}), + ) + + # Generate EIDOS prediction + if candidate_agents: + spark_eidos.predict_task_outcome( + task_id=task_id, + task_description=description, + candidate_agents=candidate_agents, + ) + + logger.debug("Spark: captured task_posted %s", task_id[:8]) + return event_id + + def on_bid_submitted( + self, task_id: str, agent_id: str, bid_sats: int, + ) -> Optional[str]: + """Capture a bid event.""" + if not self._enabled: + return None + + event_id = spark_memory.record_event( + event_type="bid_submitted", + description=f"Agent {agent_id[:8]} bid {bid_sats} sats", + agent_id=agent_id, + task_id=task_id, + data=json.dumps({"bid_sats": bid_sats}), + ) + + logger.debug("Spark: captured bid %s→%s (%d sats)", + agent_id[:8], task_id[:8], bid_sats) + return event_id + + def on_task_assigned( + self, task_id: str, agent_id: str, + ) -> Optional[str]: + """Capture a task-assigned event.""" + if not self._enabled: + return None + + event_id = spark_memory.record_event( + event_type="task_assigned", + description=f"Task assigned to {agent_id[:8]}", + agent_id=agent_id, + task_id=task_id, + ) + + logger.debug("Spark: captured assignment %s→%s", + task_id[:8], agent_id[:8]) + return event_id + + def on_task_completed( + self, + task_id: str, + agent_id: str, + result: str, + winning_bid: Optional[int] = None, + ) -> Optional[str]: + """Capture a task-completed event and evaluate EIDOS prediction.""" + if not self._enabled: + return None + + event_id = spark_memory.record_event( + event_type="task_completed", + description=f"Task completed by {agent_id[:8]}", + agent_id=agent_id, + task_id=task_id, + data=json.dumps({ + "result_length": len(result), + "winning_bid": winning_bid, + }), + ) + + # Evaluate EIDOS prediction + evaluation = spark_eidos.evaluate_prediction( + task_id=task_id, + actual_winner=agent_id, + task_succeeded=True, + winning_bid=winning_bid, + ) + if evaluation: + accuracy = evaluation["accuracy"] + spark_memory.record_event( + event_type="prediction_result", + description=f"Prediction accuracy: {accuracy:.0%}", + task_id=task_id, + data=json.dumps(evaluation, default=str), + importance=0.7, + ) + + # Consolidate memory if enough events for this agent + self._maybe_consolidate(agent_id) + + logger.debug("Spark: captured completion %s by %s", + task_id[:8], agent_id[:8]) + return event_id + + def on_task_failed( + self, + task_id: str, + agent_id: str, + reason: str, + ) -> Optional[str]: + """Capture a task-failed event and evaluate EIDOS prediction.""" + if not self._enabled: + return None + + event_id = spark_memory.record_event( + event_type="task_failed", + description=f"Task failed by {agent_id[:8]}: {reason[:80]}", + agent_id=agent_id, + task_id=task_id, + data=json.dumps({"reason": reason}), + ) + + # Evaluate EIDOS prediction + spark_eidos.evaluate_prediction( + task_id=task_id, + actual_winner=agent_id, + task_succeeded=False, + ) + + # Failures always worth consolidating + self._maybe_consolidate(agent_id) + + logger.debug("Spark: captured failure %s by %s", + task_id[:8], agent_id[:8]) + return event_id + + def on_agent_joined(self, agent_id: str, name: str) -> Optional[str]: + """Capture an agent-joined event.""" + if not self._enabled: + return None + + return spark_memory.record_event( + event_type="agent_joined", + description=f"Agent {name} ({agent_id[:8]}) joined the swarm", + agent_id=agent_id, + ) + + # ── Memory consolidation ──────────────────────────────────────────────── + + def _maybe_consolidate(self, agent_id: str) -> None: + """Consolidate events into memories when enough data exists.""" + agent_events = spark_memory.get_events(agent_id=agent_id, limit=50) + if len(agent_events) < 5: + return + + completions = [e for e in agent_events if e.event_type == "task_completed"] + failures = [e for e in agent_events if e.event_type == "task_failed"] + total = len(completions) + len(failures) + + if total < 3: + return + + success_rate = len(completions) / total if total else 0 + + if success_rate >= 0.8: + spark_memory.store_memory( + memory_type="pattern", + subject=agent_id, + content=f"Agent {agent_id[:8]} has a strong track record: " + f"{len(completions)}/{total} tasks completed successfully.", + confidence=min(0.95, 0.6 + total * 0.05), + source_events=total, + ) + elif success_rate <= 0.3: + spark_memory.store_memory( + memory_type="anomaly", + subject=agent_id, + content=f"Agent {agent_id[:8]} is struggling: only " + f"{len(completions)}/{total} tasks completed.", + confidence=min(0.95, 0.6 + total * 0.05), + source_events=total, + ) + + # ── Query API ──────────────────────────────────────────────────────────── + + def status(self) -> dict: + """Return a summary of Spark Intelligence state.""" + eidos_stats = spark_eidos.get_accuracy_stats() + return { + "enabled": self._enabled, + "events_captured": spark_memory.count_events(), + "memories_stored": spark_memory.count_memories(), + "predictions": eidos_stats, + "event_types": { + "task_posted": spark_memory.count_events("task_posted"), + "bid_submitted": spark_memory.count_events("bid_submitted"), + "task_assigned": spark_memory.count_events("task_assigned"), + "task_completed": spark_memory.count_events("task_completed"), + "task_failed": spark_memory.count_events("task_failed"), + "agent_joined": spark_memory.count_events("agent_joined"), + }, + } + + def get_advisories(self) -> list[Advisory]: + """Generate current advisories based on accumulated intelligence.""" + if not self._enabled: + return [] + return spark_advisor.generate_advisories() + + def get_timeline(self, limit: int = 50) -> list[SparkEvent]: + """Return recent events as a timeline.""" + return spark_memory.get_events(limit=limit) + + def get_memories(self, limit: int = 50) -> list[SparkMemory]: + """Return consolidated memories.""" + return spark_memory.get_memories(limit=limit) + + def get_predictions(self, limit: int = 20) -> list: + """Return recent EIDOS predictions.""" + return spark_eidos.get_predictions(limit=limit) + + +# Module-level singleton — respects SPARK_ENABLED config +def _create_engine() -> SparkEngine: + try: + from config import settings + return SparkEngine(enabled=settings.spark_enabled) + except Exception: + return SparkEngine(enabled=True) + + +spark_engine = _create_engine() diff --git a/src/spark/memory.py b/src/spark/memory.py new file mode 100644 index 0000000..238d4f3 --- /dev/null +++ b/src/spark/memory.py @@ -0,0 +1,301 @@ +"""Spark memory — SQLite-backed event capture and memory consolidation. + +Captures swarm events (tasks posted, bids, assignments, completions, +failures) and distills them into higher-level memories with importance +scoring. This is the persistence layer for Spark Intelligence. + +Tables +------ +spark_events — raw event log (every swarm event) +spark_memories — consolidated insights extracted from event patterns +""" + +import sqlite3 +import uuid +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +DB_PATH = Path("data/spark.db") + +# Importance thresholds +IMPORTANCE_LOW = 0.3 +IMPORTANCE_MEDIUM = 0.6 +IMPORTANCE_HIGH = 0.8 + + +@dataclass +class SparkEvent: + """A single captured swarm event.""" + id: str + event_type: str # task_posted, bid, assignment, completion, failure + agent_id: Optional[str] + task_id: Optional[str] + description: str + data: str # JSON payload + importance: float # 0.0–1.0 + created_at: str + + +@dataclass +class SparkMemory: + """A consolidated memory distilled from event patterns.""" + id: str + memory_type: str # pattern, insight, anomaly + subject: str # agent_id or "system" + content: str # Human-readable insight + confidence: float # 0.0–1.0 + source_events: int # How many events contributed + created_at: str + expires_at: Optional[str] + + +def _get_conn() -> sqlite3.Connection: + DB_PATH.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(DB_PATH)) + conn.row_factory = sqlite3.Row + conn.execute( + """ + CREATE TABLE IF NOT EXISTS spark_events ( + id TEXT PRIMARY KEY, + event_type TEXT NOT NULL, + agent_id TEXT, + task_id TEXT, + description TEXT NOT NULL DEFAULT '', + data TEXT NOT NULL DEFAULT '{}', + importance REAL NOT NULL DEFAULT 0.5, + created_at TEXT NOT NULL + ) + """ + ) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS spark_memories ( + id TEXT PRIMARY KEY, + memory_type TEXT NOT NULL, + subject TEXT NOT NULL DEFAULT 'system', + content TEXT NOT NULL, + confidence REAL NOT NULL DEFAULT 0.5, + source_events INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL, + expires_at TEXT + ) + """ + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_events_type ON spark_events(event_type)" + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_events_agent ON spark_events(agent_id)" + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_events_task ON spark_events(task_id)" + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_memories_subject ON spark_memories(subject)" + ) + conn.commit() + return conn + + +# ── Importance scoring ────────────────────────────────────────────────────── + +def score_importance(event_type: str, data: dict) -> float: + """Compute importance score for an event (0.0–1.0). + + High-importance events: failures, large bids, first-time patterns. + Low-importance events: routine bids, repeated successful completions. + """ + base_scores = { + "task_posted": 0.4, + "bid_submitted": 0.2, + "task_assigned": 0.5, + "task_completed": 0.6, + "task_failed": 0.9, + "agent_joined": 0.5, + "prediction_result": 0.7, + } + score = base_scores.get(event_type, 0.5) + + # Boost for failures (always important to learn from) + if event_type == "task_failed": + score = min(1.0, score + 0.1) + + # Boost for high-value bids + bid_sats = data.get("bid_sats", 0) + if bid_sats and bid_sats > 80: + score = min(1.0, score + 0.15) + + return round(score, 2) + + +# ── Event recording ───────────────────────────────────────────────────────── + +def record_event( + event_type: str, + description: str, + agent_id: Optional[str] = None, + task_id: Optional[str] = None, + data: str = "{}", + importance: Optional[float] = None, +) -> str: + """Record a swarm event. Returns the event id.""" + import json + event_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc).isoformat() + + if importance is None: + try: + parsed = json.loads(data) if isinstance(data, str) else data + except (json.JSONDecodeError, TypeError): + parsed = {} + importance = score_importance(event_type, parsed) + + conn = _get_conn() + conn.execute( + """ + INSERT INTO spark_events + (id, event_type, agent_id, task_id, description, data, importance, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + (event_id, event_type, agent_id, task_id, description, data, importance, now), + ) + conn.commit() + conn.close() + return event_id + + +def get_events( + event_type: Optional[str] = None, + agent_id: Optional[str] = None, + task_id: Optional[str] = None, + limit: int = 100, + min_importance: float = 0.0, +) -> list[SparkEvent]: + """Query events with optional filters.""" + conn = _get_conn() + query = "SELECT * FROM spark_events WHERE importance >= ?" + params: list = [min_importance] + + if event_type: + query += " AND event_type = ?" + params.append(event_type) + if agent_id: + query += " AND agent_id = ?" + params.append(agent_id) + if task_id: + query += " AND task_id = ?" + params.append(task_id) + + query += " ORDER BY created_at DESC LIMIT ?" + params.append(limit) + + rows = conn.execute(query, params).fetchall() + conn.close() + return [ + SparkEvent( + id=r["id"], + event_type=r["event_type"], + agent_id=r["agent_id"], + task_id=r["task_id"], + description=r["description"], + data=r["data"], + importance=r["importance"], + created_at=r["created_at"], + ) + for r in rows + ] + + +def count_events(event_type: Optional[str] = None) -> int: + """Count events, optionally filtered by type.""" + conn = _get_conn() + if event_type: + row = conn.execute( + "SELECT COUNT(*) FROM spark_events WHERE event_type = ?", + (event_type,), + ).fetchone() + else: + row = conn.execute("SELECT COUNT(*) FROM spark_events").fetchone() + conn.close() + return row[0] + + +# ── Memory consolidation ─────────────────────────────────────────────────── + +def store_memory( + memory_type: str, + subject: str, + content: str, + confidence: float = 0.5, + source_events: int = 0, + expires_at: Optional[str] = None, +) -> str: + """Store a consolidated memory. Returns the memory id.""" + mem_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc).isoformat() + conn = _get_conn() + conn.execute( + """ + INSERT INTO spark_memories + (id, memory_type, subject, content, confidence, source_events, created_at, expires_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + (mem_id, memory_type, subject, content, confidence, source_events, now, expires_at), + ) + conn.commit() + conn.close() + return mem_id + + +def get_memories( + memory_type: Optional[str] = None, + subject: Optional[str] = None, + min_confidence: float = 0.0, + limit: int = 50, +) -> list[SparkMemory]: + """Query memories with optional filters.""" + conn = _get_conn() + query = "SELECT * FROM spark_memories WHERE confidence >= ?" + params: list = [min_confidence] + + if memory_type: + query += " AND memory_type = ?" + params.append(memory_type) + if subject: + query += " AND subject = ?" + params.append(subject) + + query += " ORDER BY created_at DESC LIMIT ?" + params.append(limit) + + rows = conn.execute(query, params).fetchall() + conn.close() + return [ + SparkMemory( + id=r["id"], + memory_type=r["memory_type"], + subject=r["subject"], + content=r["content"], + confidence=r["confidence"], + source_events=r["source_events"], + created_at=r["created_at"], + expires_at=r["expires_at"], + ) + for r in rows + ] + + +def count_memories(memory_type: Optional[str] = None) -> int: + """Count memories, optionally filtered by type.""" + conn = _get_conn() + if memory_type: + row = conn.execute( + "SELECT COUNT(*) FROM spark_memories WHERE memory_type = ?", + (memory_type,), + ).fetchone() + else: + row = conn.execute("SELECT COUNT(*) FROM spark_memories").fetchone() + conn.close() + return row[0] diff --git a/src/swarm/coordinator.py b/src/swarm/coordinator.py index 1107bdb..c6d68b2 100644 --- a/src/swarm/coordinator.py +++ b/src/swarm/coordinator.py @@ -29,6 +29,15 @@ from swarm.tasks import ( update_task, ) +# Spark Intelligence integration — lazy import to avoid circular deps +def _get_spark(): + """Lazily import the Spark engine singleton.""" + try: + from spark.engine import spark_engine + return spark_engine + except Exception: + return None + logger = logging.getLogger(__name__) @@ -100,6 +109,10 @@ class SwarmCoordinator: ) # Broadcast bid via WebSocket self._broadcast(self._broadcast_bid, task_id, aid, bid_sats) + # Spark: capture bid event + spark = _get_spark() + if spark: + spark.on_bid_submitted(task_id, aid, bid_sats) self.comms.subscribe("swarm:tasks", _bid_and_register) @@ -109,15 +122,20 @@ class SwarmCoordinator: capabilities=meta["capabilities"], agent_id=aid, ) - + # Register capability manifest with routing engine swarm_routing.routing_engine.register_persona(persona_id, aid) - + self._in_process_nodes.append(node) logger.info("Spawned persona %s (%s)", node.name, aid) - + # Broadcast agent join via WebSocket self._broadcast(self._broadcast_agent_joined, aid, node.name) + + # Spark: capture agent join + spark = _get_spark() + if spark: + spark.on_agent_joined(aid, node.name) return { "agent_id": aid, @@ -193,6 +211,11 @@ class SwarmCoordinator: logger.info("Task posted: %s (%s)", task.id, description[:50]) # Broadcast task posted via WebSocket self._broadcast(self._broadcast_task_posted, task.id, description) + # Spark: capture task-posted event with candidate agents + spark = _get_spark() + if spark: + candidates = [a.id for a in registry.list_agents()] + spark.on_task_posted(task.id, description, candidates) return task async def run_auction_and_assign(self, task_id: str) -> Optional[Bid]: @@ -259,6 +282,10 @@ class SwarmCoordinator: ) # Broadcast task assigned via WebSocket self._broadcast(self._broadcast_task_assigned, task_id, winner.agent_id) + # Spark: capture assignment + spark = _get_spark() + if spark: + spark.on_task_assigned(task_id, winner.agent_id) else: update_task(task_id, status=TaskStatus.FAILED) logger.warning("Task %s: no bids received, marked as failed", task_id) @@ -286,6 +313,10 @@ class SwarmCoordinator: self._broadcast_task_completed, task_id, task.assigned_agent, result ) + # Spark: capture completion + spark = _get_spark() + if spark: + spark.on_task_completed(task_id, task.assigned_agent, result) return updated def fail_task(self, task_id: str, reason: str = "") -> Optional[Task]: @@ -304,6 +335,10 @@ class SwarmCoordinator: registry.update_status(task.assigned_agent, "idle") # Record failure in learner swarm_learner.record_task_result(task_id, task.assigned_agent, succeeded=False) + # Spark: capture failure + spark = _get_spark() + if spark: + spark.on_task_failed(task_id, task.assigned_agent, reason) return updated def get_task(self, task_id: str) -> Optional[Task]: @@ -377,7 +412,7 @@ class SwarmCoordinator: """Return a summary of the swarm state.""" agents = registry.list_agents() tasks = list_tasks() - return { + status = { "agents": len(agents), "agents_idle": sum(1 for a in agents if a.status == "idle"), "agents_busy": sum(1 for a in agents if a.status == "busy"), @@ -388,6 +423,16 @@ class SwarmCoordinator: "active_auctions": len(self.auctions.active_auctions), "routing_manifests": len(swarm_routing.routing_engine._manifests), } + # Include Spark Intelligence summary if available + spark = _get_spark() + if spark and spark.enabled: + spark_status = spark.status() + status["spark"] = { + "events_captured": spark_status["events_captured"], + "memories_stored": spark_status["memories_stored"], + "prediction_accuracy": spark_status["predictions"]["avg_accuracy"], + } + return status def get_routing_decisions(self, task_id: Optional[str] = None, limit: int = 100) -> list: """Get routing decision history for audit. diff --git a/tests/test_spark.py b/tests/test_spark.py new file mode 100644 index 0000000..ce046af --- /dev/null +++ b/tests/test_spark.py @@ -0,0 +1,431 @@ +"""Tests for the Spark Intelligence integration. + +Covers: +- spark.memory: event capture, memory consolidation, importance scoring +- spark.eidos: predictions, evaluations, accuracy stats +- spark.advisor: advisory generation from patterns +- spark.engine: top-level engine wiring all subsystems +- dashboard.routes.spark: HTTP endpoints +""" + +import json +from pathlib import Path + +import pytest + + +# ── Fixtures ──────────────────────────────────────────────────────────────── + +@pytest.fixture(autouse=True) +def tmp_spark_db(tmp_path, monkeypatch): + """Redirect all Spark SQLite writes to a temp directory.""" + db_path = tmp_path / "spark.db" + monkeypatch.setattr("spark.memory.DB_PATH", db_path) + monkeypatch.setattr("spark.eidos.DB_PATH", db_path) + yield db_path + + +# ── spark.memory ──────────────────────────────────────────────────────────── + + +class TestImportanceScoring: + def test_failure_scores_high(self): + from spark.memory import score_importance + score = score_importance("task_failed", {}) + assert score >= 0.9 + + def test_bid_scores_low(self): + from spark.memory import score_importance + score = score_importance("bid_submitted", {}) + assert score <= 0.3 + + def test_high_bid_boosts_score(self): + from spark.memory import score_importance + low = score_importance("bid_submitted", {"bid_sats": 10}) + high = score_importance("bid_submitted", {"bid_sats": 100}) + assert high > low + + def test_unknown_event_default(self): + from spark.memory import score_importance + score = score_importance("unknown_type", {}) + assert score == 0.5 + + +class TestEventRecording: + def test_record_and_query(self): + from spark.memory import record_event, get_events + eid = record_event("task_posted", "Test task", task_id="t1") + assert eid + events = get_events(task_id="t1") + assert len(events) == 1 + assert events[0].event_type == "task_posted" + assert events[0].description == "Test task" + + def test_record_with_agent(self): + from spark.memory import record_event, get_events + record_event("bid_submitted", "Agent bid", agent_id="a1", task_id="t2", + data='{"bid_sats": 50}') + events = get_events(agent_id="a1") + assert len(events) == 1 + assert events[0].agent_id == "a1" + + def test_filter_by_event_type(self): + from spark.memory import record_event, get_events + record_event("task_posted", "posted", task_id="t3") + record_event("task_completed", "completed", task_id="t3") + posted = get_events(event_type="task_posted") + assert len(posted) == 1 + + def test_filter_by_min_importance(self): + from spark.memory import record_event, get_events + record_event("bid_submitted", "low", importance=0.1) + record_event("task_failed", "high", importance=0.9) + high_events = get_events(min_importance=0.5) + assert len(high_events) == 1 + assert high_events[0].event_type == "task_failed" + + def test_count_events(self): + from spark.memory import record_event, count_events + record_event("task_posted", "a") + record_event("task_posted", "b") + record_event("task_completed", "c") + assert count_events() == 3 + assert count_events("task_posted") == 2 + + def test_limit_results(self): + from spark.memory import record_event, get_events + for i in range(10): + record_event("bid_submitted", f"bid {i}") + events = get_events(limit=3) + assert len(events) == 3 + + +class TestMemoryConsolidation: + def test_store_and_query_memory(self): + from spark.memory import store_memory, get_memories + mid = store_memory("pattern", "agent-x", "Strong performer", confidence=0.8) + assert mid + memories = get_memories(subject="agent-x") + assert len(memories) == 1 + assert memories[0].content == "Strong performer" + + def test_filter_by_type(self): + from spark.memory import store_memory, get_memories + store_memory("pattern", "system", "Good pattern") + store_memory("anomaly", "system", "Bad anomaly") + patterns = get_memories(memory_type="pattern") + assert len(patterns) == 1 + assert patterns[0].memory_type == "pattern" + + def test_filter_by_confidence(self): + from spark.memory import store_memory, get_memories + store_memory("pattern", "a", "Low conf", confidence=0.2) + store_memory("pattern", "b", "High conf", confidence=0.9) + high = get_memories(min_confidence=0.5) + assert len(high) == 1 + assert high[0].content == "High conf" + + def test_count_memories(self): + from spark.memory import store_memory, count_memories + store_memory("pattern", "a", "X") + store_memory("anomaly", "b", "Y") + assert count_memories() == 2 + assert count_memories("pattern") == 1 + + +# ── spark.eidos ───────────────────────────────────────────────────────────── + + +class TestPredictions: + def test_predict_stores_prediction(self): + from spark.eidos import predict_task_outcome, get_predictions + result = predict_task_outcome("t1", "Fix the bug", ["agent-a", "agent-b"]) + assert "prediction_id" in result + assert result["likely_winner"] == "agent-a" + preds = get_predictions(task_id="t1") + assert len(preds) == 1 + + def test_predict_with_history(self): + from spark.eidos import predict_task_outcome + history = { + "agent-a": {"success_rate": 0.3, "avg_winning_bid": 40}, + "agent-b": {"success_rate": 0.9, "avg_winning_bid": 30}, + } + result = predict_task_outcome( + "t2", "Research topic", ["agent-a", "agent-b"], + agent_history=history, + ) + assert result["likely_winner"] == "agent-b" + assert result["success_probability"] > 0.5 + + def test_predict_empty_candidates(self): + from spark.eidos import predict_task_outcome + result = predict_task_outcome("t3", "No agents", []) + assert result["likely_winner"] is None + + +class TestEvaluation: + def test_evaluate_correct_prediction(self): + from spark.eidos import predict_task_outcome, evaluate_prediction + predict_task_outcome("t4", "Task", ["agent-a"]) + result = evaluate_prediction("t4", "agent-a", task_succeeded=True, winning_bid=30) + assert result is not None + assert result["accuracy"] > 0.0 + + def test_evaluate_wrong_prediction(self): + from spark.eidos import predict_task_outcome, evaluate_prediction + predict_task_outcome("t5", "Task", ["agent-a"]) + result = evaluate_prediction("t5", "agent-b", task_succeeded=False) + assert result is not None + # Wrong winner + failed = lower accuracy + assert result["accuracy"] < 1.0 + + def test_evaluate_no_prediction_returns_none(self): + from spark.eidos import evaluate_prediction + result = evaluate_prediction("no-task", "agent-a", task_succeeded=True) + assert result is None + + def test_double_evaluation_returns_none(self): + from spark.eidos import predict_task_outcome, evaluate_prediction + predict_task_outcome("t6", "Task", ["agent-a"]) + evaluate_prediction("t6", "agent-a", task_succeeded=True) + # Second evaluation should return None (already evaluated) + result = evaluate_prediction("t6", "agent-a", task_succeeded=True) + assert result is None + + +class TestAccuracyStats: + def test_empty_stats(self): + from spark.eidos import get_accuracy_stats + stats = get_accuracy_stats() + assert stats["total_predictions"] == 0 + assert stats["evaluated"] == 0 + assert stats["avg_accuracy"] == 0.0 + + def test_stats_after_evaluations(self): + from spark.eidos import predict_task_outcome, evaluate_prediction, get_accuracy_stats + for i in range(3): + predict_task_outcome(f"task-{i}", "Description", ["agent-a"]) + evaluate_prediction(f"task-{i}", "agent-a", task_succeeded=True, winning_bid=30) + stats = get_accuracy_stats() + assert stats["total_predictions"] == 3 + assert stats["evaluated"] == 3 + assert stats["pending"] == 0 + assert stats["avg_accuracy"] > 0.0 + + +class TestComputeAccuracy: + def test_perfect_prediction(self): + from spark.eidos import _compute_accuracy + predicted = { + "likely_winner": "agent-a", + "success_probability": 1.0, + "estimated_bid_range": [20, 40], + } + actual = {"winner": "agent-a", "succeeded": True, "winning_bid": 30} + acc = _compute_accuracy(predicted, actual) + assert acc == pytest.approx(1.0, abs=0.01) + + def test_all_wrong(self): + from spark.eidos import _compute_accuracy + predicted = { + "likely_winner": "agent-a", + "success_probability": 1.0, + "estimated_bid_range": [10, 20], + } + actual = {"winner": "agent-b", "succeeded": False, "winning_bid": 100} + acc = _compute_accuracy(predicted, actual) + assert acc < 0.5 + + def test_partial_credit(self): + from spark.eidos import _compute_accuracy + predicted = { + "likely_winner": "agent-a", + "success_probability": 0.5, + "estimated_bid_range": [20, 40], + } + actual = {"winner": "agent-b", "succeeded": True, "winning_bid": 30} + acc = _compute_accuracy(predicted, actual) + # Wrong winner but right success and in bid range → partial + assert 0.2 < acc < 0.8 + + +# ── spark.advisor ─────────────────────────────────────────────────────────── + + +class TestAdvisor: + def test_insufficient_data(self): + from spark.advisor import generate_advisories + advisories = generate_advisories() + assert len(advisories) >= 1 + assert advisories[0].category == "system_health" + assert "Insufficient" in advisories[0].title + + def test_failure_detection(self): + from spark.memory import record_event + from spark.advisor import generate_advisories + # Record enough events to pass the minimum threshold + for i in range(5): + record_event("task_failed", f"Failed task {i}", + agent_id="agent-bad", task_id=f"t-{i}") + advisories = generate_advisories() + failure_advisories = [a for a in advisories if a.category == "failure_prevention"] + assert len(failure_advisories) >= 1 + assert "agent-ba" in failure_advisories[0].title + + def test_advisories_sorted_by_priority(self): + from spark.memory import record_event + from spark.advisor import generate_advisories + for i in range(4): + record_event("task_posted", f"posted {i}", task_id=f"p-{i}") + record_event("task_completed", f"done {i}", + agent_id="agent-good", task_id=f"p-{i}") + advisories = generate_advisories() + if len(advisories) >= 2: + assert advisories[0].priority >= advisories[-1].priority + + def test_no_activity_advisory(self): + from spark.advisor import _check_system_activity + advisories = _check_system_activity() + assert len(advisories) >= 1 + assert "No swarm activity" in advisories[0].title + + +# ── spark.engine ──────────────────────────────────────────────────────────── + + +class TestSparkEngine: + def test_engine_enabled(self): + from spark.engine import SparkEngine + engine = SparkEngine(enabled=True) + assert engine.enabled + + def test_engine_disabled(self): + from spark.engine import SparkEngine + engine = SparkEngine(enabled=False) + result = engine.on_task_posted("t1", "Ignored task") + assert result is None + + def test_on_task_posted(self): + from spark.engine import SparkEngine + from spark.memory import get_events + engine = SparkEngine(enabled=True) + eid = engine.on_task_posted("t1", "Test task", ["agent-a"]) + assert eid is not None + events = get_events(task_id="t1") + assert len(events) == 1 + + def test_on_bid_submitted(self): + from spark.engine import SparkEngine + from spark.memory import get_events + engine = SparkEngine(enabled=True) + eid = engine.on_bid_submitted("t1", "agent-a", 50) + assert eid is not None + events = get_events(event_type="bid_submitted") + assert len(events) == 1 + + def test_on_task_assigned(self): + from spark.engine import SparkEngine + from spark.memory import get_events + engine = SparkEngine(enabled=True) + eid = engine.on_task_assigned("t1", "agent-a") + assert eid is not None + events = get_events(event_type="task_assigned") + assert len(events) == 1 + + def test_on_task_completed_evaluates_prediction(self): + from spark.engine import SparkEngine + from spark.eidos import get_predictions + engine = SparkEngine(enabled=True) + engine.on_task_posted("t1", "Fix bug", ["agent-a"]) + eid = engine.on_task_completed("t1", "agent-a", "Fixed it") + assert eid is not None + preds = get_predictions(task_id="t1") + # Should have prediction(s) evaluated + assert len(preds) >= 1 + + def test_on_task_failed(self): + from spark.engine import SparkEngine + from spark.memory import get_events + engine = SparkEngine(enabled=True) + engine.on_task_posted("t1", "Deploy server", ["agent-a"]) + eid = engine.on_task_failed("t1", "agent-a", "Connection timeout") + assert eid is not None + events = get_events(event_type="task_failed") + assert len(events) == 1 + + def test_on_agent_joined(self): + from spark.engine import SparkEngine + from spark.memory import get_events + engine = SparkEngine(enabled=True) + eid = engine.on_agent_joined("agent-a", "Echo") + assert eid is not None + events = get_events(event_type="agent_joined") + assert len(events) == 1 + + def test_status(self): + from spark.engine import SparkEngine + engine = SparkEngine(enabled=True) + engine.on_task_posted("t1", "Test", ["agent-a"]) + engine.on_bid_submitted("t1", "agent-a", 30) + status = engine.status() + assert status["enabled"] is True + assert status["events_captured"] >= 2 + assert "predictions" in status + assert "event_types" in status + + def test_get_advisories(self): + from spark.engine import SparkEngine + engine = SparkEngine(enabled=True) + advisories = engine.get_advisories() + assert isinstance(advisories, list) + + def test_get_advisories_disabled(self): + from spark.engine import SparkEngine + engine = SparkEngine(enabled=False) + advisories = engine.get_advisories() + assert advisories == [] + + def test_get_timeline(self): + from spark.engine import SparkEngine + engine = SparkEngine(enabled=True) + engine.on_task_posted("t1", "Task 1") + engine.on_task_posted("t2", "Task 2") + timeline = engine.get_timeline(limit=10) + assert len(timeline) == 2 + + def test_memory_consolidation(self): + from spark.engine import SparkEngine + from spark.memory import get_memories + engine = SparkEngine(enabled=True) + # Generate enough completions to trigger consolidation (>=5 events, >=3 outcomes) + for i in range(6): + engine.on_task_completed(f"t-{i}", "agent-star", f"Result {i}") + memories = get_memories(subject="agent-star") + # Should have at least one consolidated memory about strong performance + assert len(memories) >= 1 + + +# ── Dashboard routes ──────────────────────────────────────────────────────── + + +class TestSparkRoutes: + def test_spark_json(self, client): + resp = client.get("/spark") + assert resp.status_code == 200 + data = resp.json() + assert "status" in data + assert "advisories" in data + + def test_spark_ui(self, client): + resp = client.get("/spark/ui") + assert resp.status_code == 200 + assert "SPARK INTELLIGENCE" in resp.text + + def test_spark_timeline(self, client): + resp = client.get("/spark/timeline") + assert resp.status_code == 200 + + def test_spark_insights(self, client): + resp = client.get("/spark/insights") + assert resp.status_code == 200 From 1103da339cb5b40a6bb758fbd725e98118186125 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 16:31:47 +0000 Subject: [PATCH 02/32] feat: add full creative studio + DevOps tools (Pixel, Lyra, Reel personas) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 3 new personas (Pixel, Lyra, Reel) and 5 new tool modules: - Git/DevOps tools (GitPython): clone, status, diff, log, blame, branch, add, commit, push, pull, stash — wired to Forge and Helm personas - Image generation (FLUX via diffusers): text-to-image, storyboards, variations — Pixel persona - Music generation (ACE-Step 1.5): full songs with vocals+instrumentals, instrumental tracks, vocal-only tracks — Lyra persona - Video generation (Wan 2.1 via diffusers): text-to-video, image-to-video clips — Reel persona - Creative Director pipeline: multi-step orchestration that chains storyboard → music → video → assembly into 3+ minute final videos - Video assembler (MoviePy + FFmpeg): stitch clips, overlay audio, title cards, subtitles, final export Also includes: - Spark Intelligence tool-level + creative pipeline event capture - Creative Studio dashboard page (/creative/ui) with 4 tabs - Config settings for all new models and output directories - pyproject.toml creative optional extra for GPU dependencies - 107 new tests covering all modules (624 total, all passing) https://claude.ai/code/session_01KJm6jQkNi3aA3yoQJn636c --- PLAN.md | 478 ++++++++++++++++++++++++++ pyproject.toml | 14 + src/config.py | 22 ++ src/creative/__init__.py | 1 + src/creative/assembler.py | 300 ++++++++++++++++ src/creative/director.py | 378 ++++++++++++++++++++ src/dashboard/app.py | 2 + src/dashboard/routes/creative.py | 87 +++++ src/dashboard/templates/base.html | 1 + src/dashboard/templates/creative.html | 198 +++++++++++ src/spark/engine.py | 67 ++++ src/swarm/personas.py | 58 +++- src/swarm/tool_executor.py | 40 ++- src/timmy/tools.py | 107 +++++- src/tools/__init__.py | 1 + src/tools/git_tools.py | 281 +++++++++++++++ src/tools/image_tools.py | 171 +++++++++ src/tools/music_tools.py | 210 +++++++++++ src/tools/video_tools.py | 206 +++++++++++ tests/test_assembler.py | 69 ++++ tests/test_creative_director.py | 190 ++++++++++ tests/test_creative_route.py | 61 ++++ tests/test_dashboard_routes.py | 4 +- tests/test_git_tools.py | 183 ++++++++++ tests/test_image_tools.py | 120 +++++++ tests/test_music_tools.py | 124 +++++++ tests/test_spark_tools_creative.py | 110 ++++++ tests/test_swarm_personas.py | 10 +- tests/test_video_tools.py | 93 +++++ 29 files changed, 3573 insertions(+), 13 deletions(-) create mode 100644 PLAN.md create mode 100644 src/creative/__init__.py create mode 100644 src/creative/assembler.py create mode 100644 src/creative/director.py create mode 100644 src/dashboard/routes/creative.py create mode 100644 src/dashboard/templates/creative.html create mode 100644 src/tools/__init__.py create mode 100644 src/tools/git_tools.py create mode 100644 src/tools/image_tools.py create mode 100644 src/tools/music_tools.py create mode 100644 src/tools/video_tools.py create mode 100644 tests/test_assembler.py create mode 100644 tests/test_creative_director.py create mode 100644 tests/test_creative_route.py create mode 100644 tests/test_git_tools.py create mode 100644 tests/test_image_tools.py create mode 100644 tests/test_music_tools.py create mode 100644 tests/test_spark_tools_creative.py create mode 100644 tests/test_video_tools.py diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 0000000..a54ab1b --- /dev/null +++ b/PLAN.md @@ -0,0 +1,478 @@ +# Plan: Full Creative & DevOps Capabilities for Timmy + +## Overview + +Add five major capability domains to Timmy's agent system, turning it into a +sovereign creative studio and full-stack DevOps operator. All tools are +open-source, self-hosted, and GPU-accelerated where needed. + +--- + +## Phase 1: Git & DevOps Tools (Forge + Helm personas) + +**Goal:** Timmy can observe local/remote repos, read code, create branches, +stage changes, commit, diff, log, and manage PRs — all through the swarm +task system with Spark event capture. + +### New module: `src/tools/git_tools.py` + +Tools to add (using **GitPython** — BSD-3, `pip install GitPython`): + +| Tool | Function | Persona Access | +|---|---|---| +| `git_clone` | Clone a remote repo to local path | Forge, Helm | +| `git_status` | Show working tree status | Forge, Helm, Timmy | +| `git_diff` | Show staged/unstaged diffs | Forge, Helm, Timmy | +| `git_log` | Show recent commit history | Forge, Helm, Echo, Timmy | +| `git_branch` | List/create/switch branches | Forge, Helm | +| `git_add` | Stage files for commit | Forge, Helm | +| `git_commit` | Create a commit with message | Forge, Helm | +| `git_push` | Push to remote | Forge, Helm | +| `git_pull` | Pull from remote | Forge, Helm | +| `git_blame` | Show line-by-line authorship | Forge, Echo | +| `git_stash` | Stash/pop changes | Forge, Helm | + +### Changes to existing files + +- **`src/timmy/tools.py`** — Add `create_git_tools()` factory, wire into + `PERSONA_TOOLKITS` for Forge and Helm +- **`src/swarm/tool_executor.py`** — Enhance `_infer_tools_needed()` with + git keywords (commit, branch, push, pull, diff, clone, merge) +- **`src/config.py`** — Add `git_default_repo_dir: str = "~/repos"` setting +- **`src/spark/engine.py`** — Add `on_tool_executed()` method to capture + individual tool invocations (not just task-level events) +- **`src/swarm/personas.py`** — Add git-related keywords to Forge and Helm + preferred_keywords + +### New dependency + +```toml +# pyproject.toml +dependencies = [ + ..., + "GitPython>=3.1.40", +] +``` + +### Dashboard + +- **`/tools`** page updated to show git tools in the catalog +- Git tool usage stats visible per agent + +### Tests + +- `tests/test_git_tools.py` — test all git tool functions against tmp repos +- Mock GitPython's `Repo` class for unit tests + +--- + +## Phase 2: Image Generation (new "Pixel" persona) + +**Goal:** Generate storyboard frames and standalone images from text prompts +using FLUX.2 Klein 4B locally. + +### New persona: Pixel — Visual Architect + +```python +"pixel": { + "id": "pixel", + "name": "Pixel", + "role": "Visual Architect", + "description": "Image generation, storyboard frames, and visual design.", + "capabilities": "image-generation,storyboard,design", + "rate_sats": 80, + "bid_base": 60, + "bid_jitter": 20, + "preferred_keywords": [ + "image", "picture", "photo", "draw", "illustration", + "storyboard", "frame", "visual", "design", "generate", + "portrait", "landscape", "scene", "artwork", + ], +} +``` + +### New module: `src/tools/image_tools.py` + +Tools (using **diffusers** + **FLUX.2 Klein 4B** — Apache 2.0): + +| Tool | Function | +|---|---| +| `generate_image` | Text-to-image generation (returns file path) | +| `generate_storyboard` | Generate N frames from scene descriptions | +| `image_variations` | Generate variations of an existing image | + +### Architecture + +``` +generate_image(prompt, width=1024, height=1024, steps=4) + → loads FLUX.2 Klein via diffusers FluxPipeline + → saves to data/images/{uuid}.png + → returns path + metadata +``` + +- Model loaded lazily on first use, kept in memory for subsequent calls +- Falls back to CPU generation (slower) if no GPU +- Output saved to `data/images/` with metadata JSON sidecar + +### New dependency (optional extra) + +```toml +[project.optional-dependencies] +creative = [ + "diffusers>=0.30.0", + "transformers>=4.40.0", + "accelerate>=0.30.0", + "torch>=2.2.0", + "safetensors>=0.4.0", +] +``` + +### Config + +```python +# config.py additions +flux_model_id: str = "black-forest-labs/FLUX.2-klein-4b" +image_output_dir: str = "data/images" +image_default_steps: int = 4 +``` + +### Dashboard + +- `/creative/ui` — new Creative Studio page (image gallery + generation form) +- HTMX-powered: submit prompt, poll for result, display inline +- Gallery view of all generated images with metadata + +### Tests + +- `tests/test_image_tools.py` — mock diffusers pipeline, test prompt handling, + file output, storyboard generation + +--- + +## Phase 3: Music Generation (new "Lyra" persona) + +**Goal:** Generate full songs with vocals, instrumentals, and lyrics using +ACE-Step 1.5 locally. + +### New persona: Lyra — Sound Weaver + +```python +"lyra": { + "id": "lyra", + "name": "Lyra", + "role": "Sound Weaver", + "description": "Music and song generation with vocals, instrumentals, and lyrics.", + "capabilities": "music-generation,vocals,composition", + "rate_sats": 90, + "bid_base": 70, + "bid_jitter": 20, + "preferred_keywords": [ + "music", "song", "sing", "vocal", "instrumental", + "melody", "beat", "track", "compose", "lyrics", + "audio", "sound", "album", "remix", + ], +} +``` + +### New module: `src/tools/music_tools.py` + +Tools (using **ACE-Step 1.5** — Apache 2.0, `pip install ace-step`): + +| Tool | Function | +|---|---| +| `generate_song` | Text/lyrics → full song (vocals + instrumentals) | +| `generate_instrumental` | Text prompt → instrumental track | +| `generate_vocals` | Lyrics + style → vocal track | +| `list_genres` | Return supported genre/style tags | + +### Architecture + +``` +generate_song(lyrics, genre="pop", duration=120, language="en") + → loads ACE-Step model (lazy, cached) + → generates audio + → saves to data/music/{uuid}.wav + → returns path + metadata (duration, genre, etc.) +``` + +- Model loaded lazily, ~4GB VRAM minimum +- Output saved to `data/music/` with metadata sidecar +- Supports 19 languages, genre tags, tempo control + +### New dependency (optional extra, extends `creative`) + +```toml +[project.optional-dependencies] +creative = [ + ..., + "ace-step>=1.5.0", +] +``` + +### Config + +```python +music_output_dir: str = "data/music" +ace_step_model: str = "ace-step/ACE-Step-v1.5" +``` + +### Dashboard + +- `/creative/ui` expanded with Music tab +- Audio player widget (HTML5 `
- View on GitHub + View on GitHub Get Started
@@ -740,7 +740,7 @@ External: Ollama :11434 · optional Redis · optional LND gRPC
1

Clone

-
git clone https://github.com/Alexspayne/Timmy-time-dashboard.git
+        
git clone https://github.com/AlexanderWhitestone/Timmy-time-dashboard.git
 cd Timmy-time-dashboard
@@ -797,7 +797,7 @@ External: Ollama :11434 · optional Redis · optional LND gRPC

The agent team

This repo is built by a multi-agent team. Each tool has a defined lane. - See AGENTS.md + See AGENTS.md for the full development standards.

@@ -840,7 +840,7 @@ External: Ollama :11434 · optional Redis · optional LND gRPC
-
Alex Payne
+
Alexander Whitestone
Human · Orchestrator
  • Vision and product decisions
  • @@ -915,10 +915,10 @@ External: Ollama :11434 · optional Redis · optional LND gRPC No cloud. No telemetry. Sats are sovereignty.
From 832478f0d01dbef670fb76bae384db71303aaad4 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 17:18:29 +0000 Subject: [PATCH 05/32] fix: serve_chat endpoint bug, stale docs, and license mismatch - Fix /serve/chat AttributeError: split Request and ChatRequest params so auth headers are read from HTTP request, not Pydantic body - Add regression tests for the serve_chat endpoint bug - Add agent_core and lightning to pyproject.toml wheel includes - Replace Apache 2.0 LICENSE with MIT to match pyproject.toml - Update test count from "228" to "600+" across README, docs, AGENTS.md - Add 5 missing subsystems to README table (Spark, Creative, Tools, Telegram, agent_core/lightning) - Update AGENTS.md project structure with 6 missing modules - Mark completed v2 roadmap items (personas, MCP tools) in AGENTS.md https://claude.ai/code/session_01GMiccXbo77GkV3TA69x6KS --- AGENTS.md | 10 +- LICENSE | 214 +++------------------------------- README.md | 16 ++- docs/index.html | 10 +- pyproject.toml | 2 + src/timmy_serve/app.py | 8 +- tests/test_timmy_serve_app.py | 97 +++++++++++++++ 7 files changed, 146 insertions(+), 211 deletions(-) create mode 100644 tests/test_timmy_serve_app.py diff --git a/AGENTS.md b/AGENTS.md index 7acbbe8..99761f5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -35,10 +35,16 @@ src/ swarm/ # Multi-agent coordinator, registry, bidder, tasks, comms docker_runner.py # Spawn agents as Docker containers timmy_serve/ # L402 Lightning proxy, payment handler, TTS, CLI + spark/ # Intelligence engine — events, predictions, advisory + creative/ # Creative director + video assembler pipeline + tools/ # Git, image, music, video tools for persona agents + lightning/ # Lightning backend abstraction (mock + LND) + agent_core/ # Substrate-agnostic agent interface voice/ # NLU intent detection (regex-based, no cloud) websocket/ # WebSocket manager (ws_manager singleton) notifications/ # Push notification store (notifier singleton) shortcuts/ # Siri Shortcuts API endpoints + telegram_bot/ # Telegram bridge self_tdd/ # Continuous test watchdog tests/ # One test_*.py per module, all mocked static/ # style.css + bg.svg (arcane theme) @@ -309,9 +315,9 @@ make docker-agent # add a Local agent worker **v2.0.0 — Exodus (in progress)** - [x] Persistent swarm state across restarts - [x] Docker infrastructure for agent containers -- [ ] Implement Echo, Mace, Helm, Seer, Forge, Quill persona agents (Dockerised) +- [x] Implement Echo, Mace, Helm, Seer, Forge, Quill persona agents (+ Pixel, Lyra, Reel) +- [x] MCP tool integration for Timmy - [ ] Real LND gRPC backend for `PaymentHandler` (replace mock) -- [ ] MCP tool integration for Timmy - [ ] Marketplace frontend — wire `/marketplace` route to real data **v3.0.0 — Revelation (planned)** diff --git a/LICENSE b/LICENSE index 261eeb9..16e48f0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,21 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ +MIT License - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +Copyright (c) 2026 Alexander Whitestone - 1. Definitions. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 730e860..5224df9 100644 --- a/README.md +++ b/README.md @@ -16,12 +16,16 @@ A local-first, sovereign AI agent system. Talk to Timmy, watch his swarm, gate | **Mission Control** | FastAPI + HTMX dashboard — chat, health, swarm, marketplace | | **Swarm** | Multi-agent coordinator — spawn agents, post tasks, run Lightning auctions | | **L402 / Lightning** | Bitcoin Lightning payment gating for API access | +| **Spark Intelligence** | Event capture, predictions, memory consolidation, advisory engine | +| **Creative Studio** | Multi-persona creative pipeline — image, music, video generation | +| **Tools** | Git, image, music, and video tools accessible by persona agents | | **Voice** | NLU intent detection + TTS (pyttsx3, no cloud) | | **WebSocket** | Real-time swarm live feed | | **Mobile** | Responsive layout with full iOS safe-area and touch support | +| **Telegram** | Bridge Telegram messages to Timmy | | **CLI** | `timmy`, `timmy-serve`, `self-tdd` entry points | -**228 tests, 100% passing.** +**600+ tests, 100% passing.** --- @@ -66,7 +70,7 @@ make dev ## Common commands ```bash -make test # run all 228 tests (no Ollama needed) +make test # run all tests (no Ollama needed) make test-cov # test + coverage report make dev # start dashboard (http://localhost:8000) make watch # self-TDD watchdog (60s poll, alerts on regressions) @@ -202,12 +206,18 @@ src/ dashboard/ # FastAPI app, routes, Jinja2 templates swarm/ # Multi-agent: coordinator, registry, bidder, tasks, comms timmy_serve/ # L402 proxy, payment handler, TTS, serve CLI + spark/ # Intelligence engine — events, predictions, advisory + creative/ # Creative director + video assembler pipeline + tools/ # Git, image, music, video tools for persona agents + lightning/ # Lightning backend abstraction (mock + LND) + agent_core/ # Substrate-agnostic agent interface voice/ # NLU intent detection websocket/ # WebSocket connection manager notifications/ # Push notification store shortcuts/ # Siri Shortcuts endpoints + telegram_bot/ # Telegram bridge self_tdd/ # Continuous test watchdog -tests/ # 228 tests — one file per module, all mocked +tests/ # 600+ tests — one file per module, all mocked static/style.css # Dark mission-control theme (JetBrains Mono) docs/ # GitHub Pages landing page AGENTS.md # AI agent development standards ← read this diff --git a/docs/index.html b/docs/index.html index 4ae7d7a..0c7494a 100644 --- a/docs/index.html +++ b/docs/index.html @@ -566,7 +566,7 @@ gate API access with Bitcoin Lightning — no cloud, no telemetry, no compromise.

- 228 Tests Passing + 600+ Tests Passing FastAPI + HTMX Lightning L402 No Cloud @@ -582,7 +582,7 @@
-
228
+
600+
Tests Passing
@@ -590,7 +590,7 @@
API Endpoints
-
11
+
15
Subsystems
@@ -780,7 +780,7 @@ External: Ollama :11434 · optional Redis · optional LND gRPC
5

Test

-
make test        # 228 tests — no Ollama needed
+        
make test        # 600+ tests — no Ollama needed
 make test-cov    # + coverage report
 make watch       # self-TDD watchdog in background
@@ -870,7 +870,7 @@ External: Ollama :11434 · optional Redis · optional LND gRPC
✓ Complete

Foundation

-

Agno + Ollama + SQLite + FastAPI dashboard + HTMX + 228 tests. +

Agno + Ollama + SQLite + FastAPI dashboard + HTMX + full test suite. CLI, mobile layout, Bootstrap, CI/CD, AirLLM big-brain backend.

diff --git a/pyproject.toml b/pyproject.toml index a5150d3..b1900cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,6 +87,8 @@ include = [ "src/spark", "src/tools", "src/creative", + "src/agent_core", + "src/lightning", ] [tool.pytest.ini_options] diff --git a/src/timmy_serve/app.py b/src/timmy_serve/app.py index 2ac198a..146dcd4 100644 --- a/src/timmy_serve/app.py +++ b/src/timmy_serve/app.py @@ -158,18 +158,18 @@ def create_timmy_serve_app(price_sats: int = DEFAULT_PRICE_SATS) -> FastAPI: ) @app.post("/serve/chat", response_model=ChatResponse) - async def serve_chat(request: ChatRequest): + async def serve_chat(request: Request, body: ChatRequest): """Process a chat request (L402-gated). - + Requires valid L402 token in Authorization header: Authorization: L402 : """ try: # Create Timmy agent and process request timmy = create_timmy() - result = timmy.run(request.message, stream=False) + result = timmy.run(body.message, stream=False) response_text = result.content if hasattr(result, "content") else str(result) - + # Get payment hash from Authorization header for receipt auth_header = request.headers.get("authorization", "") payment_hash = None diff --git a/tests/test_timmy_serve_app.py b/tests/test_timmy_serve_app.py new file mode 100644 index 0000000..cd24e5e --- /dev/null +++ b/tests/test_timmy_serve_app.py @@ -0,0 +1,97 @@ +"""Tests for timmy_serve/app.py — Serve FastAPI app and endpoints.""" + +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + + +@pytest.fixture +def serve_client(): + """Create a TestClient for the timmy-serve app.""" + from timmy_serve.app import create_timmy_serve_app + + app = create_timmy_serve_app(price_sats=100) + return TestClient(app) + + +class TestHealthEndpoint: + def test_health_returns_ok(self, serve_client): + resp = serve_client.get("/health") + assert resp.status_code == 200 + data = resp.json() + assert data["status"] == "healthy" + assert data["service"] == "timmy-serve" + + +class TestServeStatus: + def test_status_returns_pricing(self, serve_client): + resp = serve_client.get("/serve/status") + assert resp.status_code == 200 + data = resp.json() + assert data["price_sats"] == 100 + assert "total_invoices" in data + assert "total_earned_sats" in data + + +class TestServeChatEndpoint: + """Regression tests for /serve/chat. + + The original implementation declared ``async def serve_chat(request: ChatRequest)`` + which shadowed FastAPI's ``Request`` object. Calling ``request.headers`` on a + Pydantic model raised ``AttributeError``. The fix splits the parameters into + ``request: Request`` (FastAPI) and ``body: ChatRequest`` (Pydantic). + """ + + def test_chat_without_auth_returns_402(self, serve_client): + """Unauthenticated request should get a 402 challenge.""" + resp = serve_client.post( + "/serve/chat", + json={"message": "Hello"}, + ) + assert resp.status_code == 402 + data = resp.json() + assert data["error"] == "Payment Required" + assert "macaroon" in data + assert "invoice" in data + + @patch("timmy_serve.app.create_timmy") + @patch("timmy_serve.app.verify_l402_token", return_value=True) + def test_chat_with_valid_l402_token(self, mock_verify, mock_create, serve_client): + """Authenticated request should reach the chat handler without AttributeError.""" + mock_agent = MagicMock() + mock_result = MagicMock() + mock_result.content = "I am Timmy." + mock_agent.run.return_value = mock_result + mock_create.return_value = mock_agent + + resp = serve_client.post( + "/serve/chat", + json={"message": "Who are you?"}, + headers={"Authorization": "L402 fake-macaroon:fake-preimage"}, + ) + # The key assertion: we must NOT get a 500 from AttributeError + assert resp.status_code == 200 + data = resp.json() + assert data["response"] == "I am Timmy." + mock_agent.run.assert_called_once_with("Who are you?", stream=False) + + @patch("timmy_serve.app.create_timmy") + @patch("timmy_serve.app.verify_l402_token", return_value=True) + def test_chat_reads_auth_header_from_request( + self, mock_verify, mock_create, serve_client + ): + """Ensure auth header is read from the HTTP Request, not the JSON body.""" + mock_agent = MagicMock() + mock_result = MagicMock() + mock_result.content = "ok" + mock_agent.run.return_value = mock_result + mock_create.return_value = mock_agent + + resp = serve_client.post( + "/serve/chat", + json={"message": "test"}, + headers={"Authorization": "L402 abc:def"}, + ) + assert resp.status_code == 200 + # Should not raise AttributeError on request.headers From dd28595dbd88119fea35e991948095224f241439 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 17:34:04 +0000 Subject: [PATCH 06/32] audit: comprehensive feature verification against documentation claims Audits all 15+ subsystems against claims in docs/index.html and README.md. 643 tests pass (not "600+"), 58 endpoints exist (not "20+"). Identifies three false claims: "0 Cloud Calls" (CDN deps in templates), "LND gRPC-ready" (every method raises NotImplementedError), and "agents earn sats autonomously" (unimplemented v3 feature presented as current). https://claude.ai/code/session_01SZTwAkTg6v4ybv8g9NLxqN --- docs/AUDIT_REPORT.md | 342 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 342 insertions(+) create mode 100644 docs/AUDIT_REPORT.md diff --git a/docs/AUDIT_REPORT.md b/docs/AUDIT_REPORT.md new file mode 100644 index 0000000..2667c2a --- /dev/null +++ b/docs/AUDIT_REPORT.md @@ -0,0 +1,342 @@ +# Timmy Time Dashboard - Feature Audit Report + +**Date**: 2026-02-24 +**Auditor**: Claude (Opus 4.6) +**Scope**: All features claimed in documentation (`docs/index.html`, `README.md`) vs. actual implementation + +--- + +## Executive Summary + +The Timmy Time Dashboard is a **real, functional codebase** with substantial implementation across its 15+ subsystems. However, the documentation contains several **misleading or inaccurate claims** that overstate readiness in some areas and understate capability in others. + +### Key Findings + +| Claim | Verdict | Detail | +|-------|---------|--------| +| "600+ Tests Passing" | **UNDERSTATED** | 643 tests collected and passing | +| "20+ API Endpoints" | **UNDERSTATED** | 58 actual endpoints | +| "0 Cloud Calls" | **FALSE** | Frontend loads Bootstrap, HTMX, Google Fonts from CDN | +| "LND gRPC-ready for production" | **FALSE** | Every LND method raises `NotImplementedError` | +| "15 Subsystems" | **TRUE** | 15+ distinct modules confirmed | +| "No cloud, no telemetry" | **PARTIALLY FALSE** | Backend is local-only; frontend depends on CDN resources | +| "Agents earn and spend sats autonomously" | **FALSE** | Not implemented; inter-agent payments exist only as mock scaffolding | + +**Overall assessment**: The core system (agent, dashboard, swarm coordination, mock Lightning, voice NLU, creative pipeline orchestration, WebSocket, Spark intelligence) is genuinely implemented and well-tested. The main areas of concern are inflated claims about Lightning/LND production readiness and the "zero cloud" positioning. + +--- + +## 1. Test Suite Audit + +### Claim: "600+ Tests Passing" + +**Verdict: TRUE (understated)** + +``` +$ python -m pytest -q +643 passed, 1 warning in 46.06s +``` + +- **47 test files**, **643 test functions** +- All pass cleanly on Python 3.11 +- Tests are mocked at appropriate boundaries (no Ollama/GPU required) +- Test quality is generally good - tests verify real state transitions, SQLite persistence, HTTP response structure, and business logic + +### Test Quality Assessment + +**Strengths:** +- Swarm tests use real temporary SQLite databases (not mocked away) +- L402/Lightning tests verify cryptographic operations (macaroon serialization, HMAC signing, preimage verification) +- Dashboard tests use FastAPI `TestClient` with actual HTTP requests +- Assembler tests produce real video files with MoviePy + +**Weaknesses:** +- LND backend is entirely untested (all methods raise `NotImplementedError`) +- `agent_core/ollama_adapter.py` has two TODO stubs (`persist_memory`, `communicate`) that are tested as no-ops +- Creative tool tests mock the heavyweight model loading (expected, but means end-to-end generation is untested) +- Some tests only verify status codes without checking response body content + +--- + +## 2. Feature-by-Feature Audit + +### 2.1 Timmy Agent +**Claimed**: Agno-powered conversational agent backed by Ollama, AirLLM for 70B-405B models, SQLite memory +**Verdict: REAL & FUNCTIONAL** + +- `src/timmy/agent.py` (79 lines): Creates a genuine `agno.Agent` with Ollama model, SQLite persistence, tools, and system prompt +- Backend selection (`backends.py`) implements real Ollama/AirLLM switching with Apple Silicon detection +- CLI (`cli.py`) provides working `timmy chat`, `timmy think`, `timmy status` commands +- Approval workflow (`approvals.py`) implements real human-in-the-loop with SQLite-backed state +- Briefing system (`briefing.py`) generates real scheduled briefings + +**Issue**: `agent_core/ollama_adapter.py:184` has `# TODO: Persist to SQLite for long-term memory` and `communicate()` at line 221 is explicitly described as "a stub" + +### 2.2 Mission Control UI +**Claimed**: FastAPI + HTMX + Jinja2 dashboard, dark terminal aesthetic +**Verdict: REAL & FUNCTIONAL** + +- **58 actual endpoints** (documentation claims "20+") +- Full Jinja2 template hierarchy with base layout + 12 page templates + 12 partials +- Real HTMX integration for dynamic updates +- Bootstrap 5 loaded from CDN (contradicts "no cloud" claim) +- Dark theme with JetBrains Mono font (loaded from Google Fonts CDN) + +### 2.3 Multi-Agent Swarm +**Claimed**: Coordinator, registry, bidder, manager, sub-agent spawning, 15-second Lightning auctions +**Verdict: REAL & FUNCTIONAL** + +- `coordinator.py` (400+ lines): Full orchestration of task lifecycle +- `registry.py`: Real SQLite-backed agent registry with capabilities tracking +- `bidder.py`: Genuine auction logic with configurable timeouts and bid scoring +- `manager.py`: Spawns agents as subprocesses with lifecycle management +- `tasks.py`: SQLite-backed task CRUD with state machine transitions +- `comms.py`: In-memory pub/sub (Redis optional, graceful fallback) +- `routing.py`: Capability-based task routing +- `learner.py`: Agent outcome learning +- `recovery.py`: Fault recovery on startup +- 9 personas defined (Echo, Mace, Helm, Seer, Forge, Quill, Pixel, Lyra, Reel) + +**Issue**: The documentation roadmap mentions personas "Echo, Mace, Helm, Seer, Forge, Quill" but the codebase also includes Pixel, Lyra, and Reel. The creative persona toolkits (pixel, lyra, reel) are stubs in `tools.py:293-295` — they create empty `Toolkit` objects because the real tools live in separate modules. + +### 2.4 L402 Lightning Payments +**Claimed**: "Bitcoin Lightning payment gating via HMAC macaroons. Mock backend for dev, LND gRPC-ready for production. Agents earn and spend sats autonomously." +**Verdict: PARTIALLY IMPLEMENTED - LND CLAIM IS FALSE** + +**What works:** +- Mock Lightning backend (`mock_backend.py`): Fully functional invoice creation, payment simulation, settlement, balance tracking +- L402 proxy (`l402_proxy.py`): Real macaroon creation/verification with HMAC signing +- Payment handler (`payment_handler.py`): Complete invoice lifecycle management +- Inter-agent payment settlement (`inter_agent.py`): Framework exists with mock backend + +**What does NOT work:** +- **LND backend (`lnd_backend.py`)**: Every single method raises `NotImplementedError` or returns hardcoded fallback values: + - `create_invoice()` — `raise NotImplementedError` (line 199) + - `check_payment()` — `raise NotImplementedError` (line 220) + - `get_invoice()` — `raise NotImplementedError` (line 248) + - `list_invoices()` — `raise NotImplementedError` (line 290) + - `get_balance_sats()` — `return 0` with warning (line 304) + - `health_check()` — returns `{"ok": False, "backend": "lnd-stub"}` (line 327) + - The gRPC stub is explicitly `None` with comment: "LND gRPC stubs not yet implemented" (line 153) + +**The documentation claim that LND is "gRPC-ready for production" is false.** The file contains commented-out pseudocode showing what the implementation *would* look like, but no actual gRPC calls are made. The claim that "agents earn and spend sats autonomously" is also unimplemented — this is listed under v3.0.0 (Planned) in the roadmap but stated as current capability in the features section. + +### 2.5 Spark Intelligence Engine +**Claimed**: Event capture, predictions (EIDOS), memory consolidation, advisory engine +**Verdict: REAL & FUNCTIONAL** + +- `engine.py`: Full event lifecycle with 8 event types, SQLite persistence +- `eidos.py`: Genuine prediction logic with multi-component accuracy scoring (winner prediction 0.4 weight, success probability 0.4 weight, bid range 0.2 weight) +- `memory.py`: Real event-to-memory pipeline with importance scoring and consolidation +- `advisor.py`: Generates actionable recommendations based on failure patterns, agent performance, and bid optimization +- Dashboard routes expose `/spark`, `/spark/ui`, `/spark/timeline`, `/spark/insights` + +### 2.6 Creative Studio +**Claimed**: Multi-persona creative pipeline for image, music, video generation +**Verdict: REAL ORCHESTRATION, BACKEND MODELS OPTIONAL** + +- `director.py`: True end-to-end pipeline (storyboard -> music -> video -> assembly -> complete) +- `assembler.py`: Real video assembly using MoviePy with cross-fade transitions, audio overlay, title cards, subtitles +- `image_tools.py`: FLUX.1 diffusers pipeline (lazy-loaded) +- `music_tools.py`: ACE-Step model integration (lazy-loaded) +- `video_tools.py`: Wan 2.1 text-to-video pipeline (lazy-loaded) + +The orchestration is 100% real. Tool backends are implemented with real model loading logic but require heavyweight dependencies (GPU, model downloads). Graceful degradation if missing. + +### 2.7 Voice I/O +**Claimed**: Pattern-matched NLU, TTS via pyttsx3 +**Verdict: REAL & FUNCTIONAL** + +- `nlu.py`: Regex-based intent detection with 5 intent types and confidence scoring +- Entity extraction for agent names, task descriptions, numbers +- TTS endpoint exists at `/voice/tts/speak` +- Enhanced voice processing at `/voice/enhanced/process` + +### 2.8 Mobile Optimized +**Claimed**: iOS safe-area, 44px touch targets, 16px inputs, 21-scenario HITL test harness +**Verdict: REAL & FUNCTIONAL** + +- `mobile.html` template with iOS viewport-fit, safe-area insets +- 21-scenario test harness at `/mobile-test` +- `test_mobile_scenarios.py`: 36 tests covering mobile-specific behavior + +### 2.9 WebSocket Live Feed +**Claimed**: Real-time swarm events over WebSocket +**Verdict: REAL & FUNCTIONAL** + +- `websocket/handler.py`: Connection manager with broadcast, 100-event replay buffer +- Specialized broadcast methods for agent_joined, task_posted, bid_submitted, task_assigned, task_completed +- `/ws/swarm` endpoint for live WebSocket connections + +### 2.10 Security +**Claimed**: XSS prevention via textContent, HMAC-signed macaroons, startup warnings for defaults +**Verdict: REAL & FUNCTIONAL** + +- HMAC macaroon signing is cryptographically implemented +- Config warns on default secrets at startup +- Templates use Jinja2 autoescaping + +### 2.11 Self-TDD Watchdog +**Claimed**: 60-second polling, regression alerts +**Verdict: REAL & FUNCTIONAL** + +- `self_tdd/watchdog.py` (71 lines): Polls pytest and alerts on failures +- `activate_self_tdd.sh`: Bootstrap script + +### 2.12 Telegram Integration +**Claimed**: Bridge Telegram messages to Timmy +**Verdict: REAL & FUNCTIONAL** + +- `telegram_bot/bot.py`: python-telegram-bot integration +- Message handler creates Timmy agent and processes user text +- Token management with file persistence +- Dashboard routes at `/telegram/status` and `/telegram/setup` + +### 2.13 Siri Shortcuts +**Claimed**: iOS automation endpoints +**Verdict: REAL & FUNCTIONAL** + +- `shortcuts/siri.py`: 4 endpoint definitions (chat, status, swarm, task) +- Setup guide generation for iOS Shortcuts app + +### 2.14 Push Notifications +**Claimed**: Local + macOS native notifications +**Verdict: REAL & FUNCTIONAL** + +- `notifications/push.py`: Bounded notification store, listener callbacks +- macOS native notifications via osascript +- Read/unread state management + +--- + +## 3. Documentation Accuracy Issues + +### 3.1 FALSE: "0 Cloud Calls" + +The hero section, stats bar, and feature descriptions all claim zero cloud dependency. However, `src/dashboard/templates/base.html` loads: + +| Resource | CDN | +|----------|-----| +| Bootstrap 5.3.3 CSS | `cdn.jsdelivr.net` | +| Bootstrap 5.3.3 JS | `cdn.jsdelivr.net` | +| HTMX 2.0.3 | `unpkg.com` | +| JetBrains Mono font | `fonts.googleapis.com` | + +These are loaded on every page render. The dashboard will not render correctly without internet access unless these are bundled locally. + +**Recommendation**: Bundle these assets locally or change the documentation to say "no cloud AI/telemetry" instead of "0 Cloud Calls." + +### 3.2 FALSE: "LND gRPC-ready for production" + +The documentation (both `docs/index.html` and `README.md`) implies the LND backend is production-ready. In reality: + +- Every method in `lnd_backend.py` raises `NotImplementedError` +- The gRPC stub initialization explicitly returns `None` with a warning +- The code contains only commented-out pseudocode +- The file itself contains a `generate_lnd_protos()` function explaining what steps are needed to *begin* implementation + +**Recommendation**: Change documentation to "LND integration planned" or "LND backend scaffolded — mock only for now." + +### 3.3 FALSE: "Agents earn and spend sats autonomously" + +This capability is described in the v3.0.0 (Planned) roadmap section but is also implied as current functionality in the L402 features card. The inter-agent payment system (`inter_agent.py`) exists but only works with the mock backend. + +### 3.4 UNDERSTATED: Test Count and Endpoint Count + +- Documentation says "600+ tests" — actual count is **643** +- Documentation says "20+ API endpoints" — actual count is **58** + +These are technically true ("600+" and "20+" include the real numbers) but are misleadingly conservative. + +### 3.5 MINOR: "Bootstrap 5" not mentioned in docs/index.html + +The GitHub Pages documentation feature card for Mission Control says "FastAPI + HTMX + Bootstrap 5" in its tag line, which is accurate. But the "no cloud" messaging directly contradicts loading Bootstrap from a CDN. + +--- + +## 4. Code Quality Summary + +| Module | Lines | Quality | Notes | +|--------|-------|---------|-------| +| swarm | 3,069 | Good | Comprehensive coordination with SQLite persistence | +| dashboard | 1,806 | Good | Clean FastAPI routes, well-structured templates | +| timmy | 1,353 | Good | Clean agent setup with proper backend abstraction | +| spark | 1,238 | Excellent | Sophisticated intelligence pipeline | +| tools | 869 | Good | Real implementations with lazy-loading pattern | +| lightning | 868 | Mixed | Mock is excellent; LND is entirely unimplemented | +| timmy_serve | 693 | Good | L402 proxy works with mock backend | +| creative | 683 | Good | Real orchestration pipeline | +| agent_core | 627 | Mixed | Some TODO stubs (persist_memory, communicate) | +| telegram_bot | 163 | Good | Complete integration | +| notifications | 146 | Good | Working notification store | +| voice | 133 | Good | Working NLU with intent detection | +| websocket | 129 | Good | Solid connection management | +| shortcuts | 93 | Good | Clean endpoint definitions | +| self_tdd | 71 | Good | Simple and effective | + +**Total**: 86 Python files, 12,007 lines of code + +--- + +## 5. Recommendations + +1. **Fix the "0 Cloud Calls" claim** — either bundle frontend dependencies locally or change the messaging +2. **Fix the LND documentation** — clearly mark it as unimplemented/scaffolded, not "production-ready" +3. **Fix the autonomous sats claim** — move it from current features to roadmap/planned +4. **Update test/endpoint counts** — "643 tests" and "58 endpoints" are more impressive than "600+" and "20+" +5. **Implement `agent_core` TODO stubs** — `persist_memory()` and `communicate()` are dead code +6. **Bundle CDN resources** — for true offline operation, vendor Bootstrap, HTMX, and the font + +--- + +## Appendix: Test Breakdown by Module + +| Test File | Tests | Module Tested | +|-----------|-------|---------------| +| test_spark.py | 47 | Spark intelligence engine | +| test_mobile_scenarios.py | 36 | Mobile layout | +| test_swarm.py | 29 | Swarm core | +| test_dashboard_routes.py | 25 | Dashboard routes | +| test_learner.py | 23 | Agent learning | +| test_briefing.py | 22 | Briefing system | +| test_swarm_personas.py | 21 | Persona definitions | +| test_coordinator.py | 20 | Swarm coordinator | +| test_creative_director.py | 19 | Creative pipeline | +| test_tool_executor.py | 19 | Tool execution | +| test_lightning_interface.py | 19 | Lightning backend | +| test_dashboard.py | 18 | Dashboard core | +| test_git_tools.py | 18 | Git tools | +| test_approvals.py | 17 | Approval workflow | +| test_swarm_routing.py | 17 | Task routing | +| test_telegram_bot.py | 16 | Telegram bridge | +| test_websocket_extended.py | 16 | WebSocket | +| test_voice_nlu.py | 15 | Voice NLU | +| test_backends.py | 14 | Backend selection | +| test_swarm_recovery.py | 14 | Fault recovery | +| test_swarm_stats.py | 13 | Performance stats | +| test_swarm_integration_full.py | 13 | Swarm integration | +| test_l402_proxy.py | 13 | L402 proxy | +| test_agent.py | 13 | Core agent | +| test_notifications.py | 11 | Push notifications | +| test_spark_tools_creative.py | 11 | Spark + creative integration | +| test_swarm_node.py | 10 | Swarm nodes | +| test_inter_agent.py | 10 | Inter-agent comms | +| test_timmy_serve_cli.py | 10 | Serve CLI | +| test_docker_agent.py | 9 | Docker agents | +| test_assembler_integration.py | 9 | Video assembly | +| test_swarm_integration.py | 7 | Swarm integration | +| test_assembler.py | 7 | Video assembly | +| test_image_tools.py | 7 | Image tools | +| test_music_tools.py | 9 | Music tools | +| test_video_tools.py | 9 | Video tools | +| test_creative_route.py | 6 | Creative routes | +| test_shortcuts.py | 6 | Siri shortcuts | +| test_watchdog.py | 6 | Self-TDD watchdog | +| test_voice_enhanced.py | 8 | Enhanced voice | +| test_timmy_serve_app.py | 5 | Serve app | +| test_music_video_integration.py | 5 | Music + video pipeline | +| test_swarm_live_page.py | 4 | Live swarm page | +| test_agent_runner.py | 4 | Agent runner | +| test_prompts.py | 8 | System prompts | +| test_cli.py | 2 | CLI | +| test_websocket.py | 3 | WebSocket core | From 0367fe3649b081fa8b2aa9480e461da42761a94b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 17:36:10 +0000 Subject: [PATCH 07/32] audit: add detailed findings from parallel subsystem audits Incorporates findings from deep-dive audits of all 5 subsystems: - Swarm auction timing bug (sleep(0) instead of 15s) - Docker agent HTTP API partially wired - L402 macaroons are HMAC-only (no caveats/delegation) - Agent sats are bid-only, no settlement occurs - CLI test coverage gap (2 tests for 3 commands) - agent_core persist_memory/communicate are stubs https://claude.ai/code/session_01SZTwAkTg6v4ybv8g9NLxqN --- docs/AUDIT_REPORT.md | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/AUDIT_REPORT.md b/docs/AUDIT_REPORT.md index 2667c2a..0f7761a 100644 --- a/docs/AUDIT_REPORT.md +++ b/docs/AUDIT_REPORT.md @@ -20,7 +20,9 @@ The Timmy Time Dashboard is a **real, functional codebase** with substantial imp | "LND gRPC-ready for production" | **FALSE** | Every LND method raises `NotImplementedError` | | "15 Subsystems" | **TRUE** | 15+ distinct modules confirmed | | "No cloud, no telemetry" | **PARTIALLY FALSE** | Backend is local-only; frontend depends on CDN resources | -| "Agents earn and spend sats autonomously" | **FALSE** | Not implemented; inter-agent payments exist only as mock scaffolding | +| "Agents earn and spend sats autonomously" | **FALSE** | Not implemented; agents bid in sats but no satoshi movement occurs | +| "15-second Lightning auctions" | **PARTIALLY TRUE** | Auction logic exists but `asyncio.sleep(0)` closes auctions immediately | +| "Macaroon" implementation | **SIMPLIFIED** | HMAC-only, not true macaroons (no caveats, no delegation) | **Overall assessment**: The core system (agent, dashboard, swarm coordination, mock Lightning, voice NLU, creative pipeline orchestration, WebSocket, Spark intelligence) is genuinely implemented and well-tested. The main areas of concern are inflated claims about Lightning/LND production readiness and the "zero cloud" positioning. @@ -70,7 +72,9 @@ $ python -m pytest -q - Approval workflow (`approvals.py`) implements real human-in-the-loop with SQLite-backed state - Briefing system (`briefing.py`) generates real scheduled briefings -**Issue**: `agent_core/ollama_adapter.py:184` has `# TODO: Persist to SQLite for long-term memory` and `communicate()` at line 221 is explicitly described as "a stub" +**Issues**: +- `agent_core/ollama_adapter.py:184` has `# TODO: Persist to SQLite for long-term memory` and `communicate()` at line 221 is explicitly described as "a stub" +- CLI tests are sparse: only 2 tests for 3 commands. The `chat` and `think` commands lack dedicated test coverage. ### 2.2 Mission Control UI **Claimed**: FastAPI + HTMX + Jinja2 dashboard, dark terminal aesthetic @@ -97,7 +101,11 @@ $ python -m pytest -q - `recovery.py`: Fault recovery on startup - 9 personas defined (Echo, Mace, Helm, Seer, Forge, Quill, Pixel, Lyra, Reel) -**Issue**: The documentation roadmap mentions personas "Echo, Mace, Helm, Seer, Forge, Quill" but the codebase also includes Pixel, Lyra, and Reel. The creative persona toolkits (pixel, lyra, reel) are stubs in `tools.py:293-295` — they create empty `Toolkit` objects because the real tools live in separate modules. +**Issues**: +- The documentation roadmap mentions personas "Echo, Mace, Helm, Seer, Forge, Quill" but the codebase also includes Pixel, Lyra, and Reel. The creative persona toolkits (pixel, lyra, reel) are stubs in `tools.py:293-295` — they create empty `Toolkit` objects because the real tools live in separate modules. +- **Auction timing bug**: `coordinator.py` uses `await asyncio.sleep(0)` instead of the documented 15-second wait, meaning auctions close almost immediately. This is masked by synchronous in-process bidding but would break for subprocess/Docker agents. +- **Docker agent HTTP API partially wired**: `agent_runner.py` polls `/internal/tasks` and posts to `/internal/bids` — these endpoints exist in `swarm_internal.py` but the integration path is incomplete for containerized deployment. +- **Tool execution not fully wired**: `persona_node.py`'s `execute_task()` has infrastructure for tool invocation but doesn't execute tools end-to-end in practice. ### 2.4 L402 Lightning Payments **Claimed**: "Bitcoin Lightning payment gating via HMAC macaroons. Mock backend for dev, LND gRPC-ready for production. Agents earn and spend sats autonomously." @@ -119,7 +127,9 @@ $ python -m pytest -q - `health_check()` — returns `{"ok": False, "backend": "lnd-stub"}` (line 327) - The gRPC stub is explicitly `None` with comment: "LND gRPC stubs not yet implemented" (line 153) -**The documentation claim that LND is "gRPC-ready for production" is false.** The file contains commented-out pseudocode showing what the implementation *would* look like, but no actual gRPC calls are made. The claim that "agents earn and spend sats autonomously" is also unimplemented — this is listed under v3.0.0 (Planned) in the roadmap but stated as current capability in the features section. +**The documentation claim that LND is "gRPC-ready for production" is false.** The file contains commented-out pseudocode showing what the implementation *would* look like, but no actual gRPC calls are made. The gRPC channel/auth infrastructure is ~80% ready but the protobuf stubs are missing entirely. The claim that "agents earn and spend sats autonomously" is also unimplemented — agents bid in sats during auctions but `payment_handler.settle_invoice()` is never called from agent code. No satoshi movement occurs. This is listed under v3.0.0 (Planned) in the roadmap but stated as current capability in the features section. + +Additionally, the "macaroon" implementation is HMAC-only (`l402_proxy.py:67-69`), not true macaroons. There is no support for caveats, delegation, or cryptographic nesting. This is adequate for L402 but not the full macaroon specification the documentation implies. ### 2.5 Spark Intelligence Engine **Claimed**: Event capture, predictions (EIDOS), memory consolidation, advisory engine From 4daf382819f87145cb00092958729814b0d85b5d Mon Sep 17 00:00:00 2001 From: AlexanderWhitestone <8633216+AlexanderWhitestone@users.noreply.github.com> Date: Tue, 24 Feb 2026 12:58:19 -0500 Subject: [PATCH 08/32] security: fix L402 macaroon forgery and XSS in templates --- apply_security_fixes.py | 183 ++ coverage.xml | 2222 +++++++++++++++++ src/dashboard/templates/briefing.html | 2 +- src/dashboard/templates/marketplace.html | 8 +- .../partials/approval_card_single.html | 6 +- .../templates/partials/chat_message.html | 6 +- src/dashboard/templates/partials/history.html | 6 +- src/timmy_serve/l402_proxy.py | 38 +- tests/test_security_regression.py | 75 + 9 files changed, 2522 insertions(+), 24 deletions(-) create mode 100644 apply_security_fixes.py create mode 100644 coverage.xml create mode 100644 tests/test_security_regression.py diff --git a/apply_security_fixes.py b/apply_security_fixes.py new file mode 100644 index 0000000..2f4420c --- /dev/null +++ b/apply_security_fixes.py @@ -0,0 +1,183 @@ +import os + +def fix_l402_proxy(): + path = "src/timmy_serve/l402_proxy.py" + with open(path, "r") as f: + content = f.read() + + # 1. Add hmac_secret to Macaroon dataclass + old_dataclass = "@dataclass\nclass Macaroon:\n \"\"\"Simplified HMAC-based macaroon for L402 authentication.\"\"\"\n identifier: str # payment_hash\n signature: str # HMAC signature\n location: str = \"timmy-time\"\n version: int = 1" + new_dataclass = "@dataclass\nclass Macaroon:\n \"\"\"Simplified HMAC-based macaroon for L402 authentication.\"\"\"\n identifier: str # payment_hash\n signature: str # HMAC signature\n location: str = \"timmy-time\"\n version: int = 1\n hmac_secret: str = \"\" # Added for multi-key support" + content = content.replace(old_dataclass, new_dataclass) + + # 2. Update _MACAROON_SECRET logic + old_secret_logic = """_MACAROON_SECRET_DEFAULT = "timmy-macaroon-secret" +_MACAROON_SECRET_RAW = os.environ.get("L402_MACAROON_SECRET", _MACAROON_SECRET_DEFAULT) +_MACAROON_SECRET = _MACAROON_SECRET_RAW.encode() + +if _MACAROON_SECRET_RAW == _MACAROON_SECRET_DEFAULT: + logger.warning( + "SEC: L402_MACAROON_SECRET is using the default value — set a unique " + "secret in .env before deploying to production." + )""" + new_secret_logic = """_MACAROON_SECRET_DEFAULT = "timmy-macaroon-secret" +_MACAROON_SECRET_RAW = os.environ.get("L402_MACAROON_SECRET", _MACAROON_SECRET_DEFAULT) +_MACAROON_SECRET = _MACAROON_SECRET_RAW.encode() + +_HMAC_SECRET_DEFAULT = "timmy-hmac-secret" +_HMAC_SECRET_RAW = os.environ.get("L402_HMAC_SECRET", _HMAC_SECRET_DEFAULT) +_HMAC_SECRET = _HMAC_SECRET_RAW.encode() + +if _MACAROON_SECRET_RAW == _MACAROON_SECRET_DEFAULT or _HMAC_SECRET_RAW == _HMAC_SECRET_DEFAULT: + logger.warning( + "SEC: L402 secrets are using default values — set L402_MACAROON_SECRET " + "and L402_HMAC_SECRET in .env before deploying to production." + )""" + content = content.replace(old_secret_logic, new_secret_logic) + + # 3. Update _sign to use the two-key derivation + old_sign = """def _sign(identifier: str) -> str: + \"\"\"Create an HMAC signature for a macaroon identifier.\"\"\" + return hmac.new(_MACAROON_SECRET, identifier.encode(), hashlib.sha256).hexdigest()""" + new_sign = """def _sign(identifier: str, hmac_secret: Optional[str] = None) -> str: + \"\"\"Create an HMAC signature for a macaroon identifier using two-key derivation. + + The base macaroon secret is used to derive a key-specific secret from the + hmac_secret, which is then used to sign the identifier. This prevents + macaroon forgery if the hmac_secret is known but the base secret is not. + \"\"\" + key = hmac.new( + _MACAROON_SECRET, + (hmac_secret or _HMAC_SECRET_RAW).encode(), + hashlib.sha256 + ).digest() + return hmac.new(key, identifier.encode(), hashlib.sha256).hexdigest()""" + content = content.replace(old_sign, new_sign) + + # 4. Update create_l402_challenge + old_create = """ invoice = payment_handler.create_invoice(amount_sats, memo) + signature = _sign(invoice.payment_hash) + macaroon = Macaroon( + identifier=invoice.payment_hash, + signature=signature, + )""" + new_create = """ invoice = payment_handler.create_invoice(amount_sats, memo) + hmac_secret = _HMAC_SECRET_RAW + signature = _sign(invoice.payment_hash, hmac_secret) + macaroon = Macaroon( + identifier=invoice.payment_hash, + signature=signature, + hmac_secret=hmac_secret, + )""" + content = content.replace(old_create, new_create) + + # 5. Update Macaroon.serialize and deserialize + old_serialize = """ def serialize(self) -> str: + \"\"\"Encode the macaroon as a base64 string.\"\"\" + raw = f"{self.version}:{self.location}:{self.identifier}:{self.signature}" + return base64.urlsafe_b64encode(raw.encode()).decode()""" + new_serialize = """ def serialize(self) -> str: + \"\"\"Encode the macaroon as a base64 string.\"\"\" + raw = f"{self.version}:{self.location}:{self.identifier}:{self.signature}:{self.hmac_secret}" + return base64.urlsafe_b64encode(raw.encode()).decode()""" + content = content.replace(old_serialize, new_serialize) + + old_deserialize = """ @classmethod + def deserialize(cls, token: str) -> Optional["Macaroon"]: + \"\"\"Decode a base64 macaroon string.\"\"\" + try: + raw = base64.urlsafe_b64decode(token.encode()).decode() + parts = raw.split(":") + if len(parts) != 4: + return None + return cls( + version=int(parts[0]), + location=parts[1], + identifier=parts[2], + signature=parts[3], + ) + except Exception: + return None""" + new_deserialize = """ @classmethod + def deserialize(cls, token: str) -> Optional["Macaroon"]: + \"\"\"Decode a base64 macaroon string.\"\"\" + try: + raw = base64.urlsafe_b64decode(token.encode()).decode() + parts = raw.split(":") + if len(parts) < 4: + return None + return cls( + version=int(parts[0]), + location=parts[1], + identifier=parts[2], + signature=parts[3], + hmac_secret=parts[4] if len(parts) > 4 else "", + ) + except Exception: + return None""" + content = content.replace(old_deserialize, new_deserialize) + + # 6. Update verify_l402_token + old_verify_sig = """ # Check HMAC signature + expected_sig = _sign(macaroon.identifier) + if not hmac.compare_digest(macaroon.signature, expected_sig):""" + new_verify_sig = """ # Check HMAC signature + expected_sig = _sign(macaroon.identifier, macaroon.hmac_secret) + if not hmac.compare_digest(macaroon.signature, expected_sig):""" + content = content.replace(old_verify_sig, new_verify_sig) + + with open(path, "w") as f: + f.write(content) + +def fix_xss(): + # Fix chat_message.html + path = "src/dashboard/templates/partials/chat_message.html" + with open(path, "r") as f: + content = f.read() + content = content.replace("{{ user_message }}", "{{ user_message | e }}") + content = content.replace("{{ response }}", "{{ response | e }}") + content = content.replace("{{ error }}", "{{ error | e }}") + with open(path, "w") as f: + f.write(content) + + # Fix history.html + path = "src/dashboard/templates/partials/history.html" + with open(path, "r") as f: + content = f.read() + content = content.replace("{{ msg.content }}", "{{ msg.content | e }}") + with open(path, "w") as f: + f.write(content) + + # Fix briefing.html + path = "src/dashboard/templates/briefing.html" + with open(path, "r") as f: + content = f.read() + content = content.replace("{{ briefing.summary }}", "{{ briefing.summary | e }}") + with open(path, "w") as f: + f.write(content) + + # Fix approval_card_single.html + path = "src/dashboard/templates/partials/approval_card_single.html" + with open(path, "r") as f: + content = f.read() + content = content.replace("{{ item.title }}", "{{ item.title | e }}") + content = content.replace("{{ item.description }}", "{{ item.description | e }}") + content = content.replace("{{ item.proposed_action }}", "{{ item.proposed_action | e }}") + with open(path, "w") as f: + f.write(content) + + # Fix marketplace.html + path = "src/dashboard/templates/marketplace.html" + with open(path, "r") as f: + content = f.read() + content = content.replace("{{ agent.name }}", "{{ agent.name | e }}") + content = content.replace("{{ agent.role }}", "{{ agent.role | e }}") + content = content.replace("{{ agent.description or 'No description' }}", "{{ (agent.description or 'No description') | e }}") + content = content.replace("{{ cap.strip() }}", "{{ cap.strip() | e }}") + with open(path, "w") as f: + f.write(content) + +if __name__ == "__main__": + fix_l402_proxy() + fix_xss() + print("Security fixes applied successfully.") diff --git a/coverage.xml b/coverage.xml new file mode 100644 index 0000000..6a8d2dd --- /dev/null +++ b/coverage.xml @@ -0,0 +1,2222 @@ + + + + + + /home/ubuntu/Timmy-time-dashboard/src + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/dashboard/templates/briefing.html b/src/dashboard/templates/briefing.html index 711a2d9..5d98f02 100644 --- a/src/dashboard/templates/briefing.html +++ b/src/dashboard/templates/briefing.html @@ -22,7 +22,7 @@
// TIMMY’S REPORT
-
{{ briefing.summary }}
+
{{ briefing.summary | e }}
diff --git a/src/dashboard/templates/marketplace.html b/src/dashboard/templates/marketplace.html index 081dfcd..244aae6 100644 --- a/src/dashboard/templates/marketplace.html +++ b/src/dashboard/templates/marketplace.html @@ -20,13 +20,13 @@
{{ agent.name[0] }}
- {{ agent.name }} + {{ agent.name | e }} - {{ agent.role }} + {{ agent.role | e }}
-
{{ agent.description or 'No description' }}
+
{{ (agent.description or 'No description') | e }}
{{ cap.strip() }} + {{ cap.strip() | e }} {% endfor %} {% endif %}
diff --git a/src/dashboard/templates/partials/approval_card_single.html b/src/dashboard/templates/partials/approval_card_single.html index 2a30388..db0d7e3 100644 --- a/src/dashboard/templates/partials/approval_card_single.html +++ b/src/dashboard/templates/partials/approval_card_single.html @@ -1,10 +1,10 @@
-
{{ item.title }}
+
{{ item.title | e }}
{{ item.impact }}
-
{{ item.description }}
-
▶ {{ item.proposed_action }}
+
{{ item.description | e }}
+
▶ {{ item.proposed_action | e }}
{% if item.status == "pending" %}
diff --git a/src/dashboard/templates/partials/chat_message.html b/src/dashboard/templates/partials/chat_message.html index 9620da2..5d48134 100644 --- a/src/dashboard/templates/partials/chat_message.html +++ b/src/dashboard/templates/partials/chat_message.html @@ -1,15 +1,15 @@
YOU // {{ timestamp }}
-
{{ user_message }}
+
{{ user_message | e }}
{% if response %}
TIMMY // {{ timestamp }}
-
{{ response }}
+
{{ response | e }}
{% elif error %}
SYSTEM // {{ timestamp }}
-
{{ error }}
+
{{ error | e }}
{% endif %} diff --git a/src/dashboard/templates/partials/history.html b/src/dashboard/templates/partials/history.html index 2c4bbc2..26165d5 100644 --- a/src/dashboard/templates/partials/history.html +++ b/src/dashboard/templates/partials/history.html @@ -3,17 +3,17 @@ {% if msg.role == "user" %}
YOU // {{ msg.timestamp }}
-
{{ msg.content }}
+
{{ msg.content | e }}
{% elif msg.role == "agent" %}
TIMMY // {{ msg.timestamp }}
-
{{ msg.content }}
+
{{ msg.content | e }}
{% else %}
SYSTEM // {{ msg.timestamp }}
-
{{ msg.content }}
+
{{ msg.content | e }}
{% endif %} {% endfor %} diff --git a/src/timmy_serve/l402_proxy.py b/src/timmy_serve/l402_proxy.py index ba35c4e..461aa51 100644 --- a/src/timmy_serve/l402_proxy.py +++ b/src/timmy_serve/l402_proxy.py @@ -26,10 +26,14 @@ _MACAROON_SECRET_DEFAULT = "timmy-macaroon-secret" _MACAROON_SECRET_RAW = os.environ.get("L402_MACAROON_SECRET", _MACAROON_SECRET_DEFAULT) _MACAROON_SECRET = _MACAROON_SECRET_RAW.encode() -if _MACAROON_SECRET_RAW == _MACAROON_SECRET_DEFAULT: +_HMAC_SECRET_DEFAULT = "timmy-hmac-secret" +_HMAC_SECRET_RAW = os.environ.get("L402_HMAC_SECRET", _HMAC_SECRET_DEFAULT) +_HMAC_SECRET = _HMAC_SECRET_RAW.encode() + +if _MACAROON_SECRET_RAW == _MACAROON_SECRET_DEFAULT or _HMAC_SECRET_RAW == _HMAC_SECRET_DEFAULT: logger.warning( - "SEC: L402_MACAROON_SECRET is using the default value — set a unique " - "secret in .env before deploying to production." + "SEC: L402 secrets are using default values — set L402_MACAROON_SECRET " + "and L402_HMAC_SECRET in .env before deploying to production." ) @@ -40,10 +44,11 @@ class Macaroon: signature: str # HMAC signature location: str = "timmy-time" version: int = 1 + hmac_secret: str = "" # Added for multi-key support def serialize(self) -> str: """Encode the macaroon as a base64 string.""" - raw = f"{self.version}:{self.location}:{self.identifier}:{self.signature}" + raw = f"{self.version}:{self.location}:{self.identifier}:{self.signature}:{self.hmac_secret}" return base64.urlsafe_b64encode(raw.encode()).decode() @classmethod @@ -52,21 +57,32 @@ class Macaroon: try: raw = base64.urlsafe_b64decode(token.encode()).decode() parts = raw.split(":") - if len(parts) != 4: + if len(parts) < 4: return None return cls( version=int(parts[0]), location=parts[1], identifier=parts[2], signature=parts[3], + hmac_secret=parts[4] if len(parts) > 4 else "", ) except Exception: return None -def _sign(identifier: str) -> str: - """Create an HMAC signature for a macaroon identifier.""" - return hmac.new(_MACAROON_SECRET, identifier.encode(), hashlib.sha256).hexdigest() +def _sign(identifier: str, hmac_secret: Optional[str] = None) -> str: + """Create an HMAC signature for a macaroon identifier using two-key derivation. + + The base macaroon secret is used to derive a key-specific secret from the + hmac_secret, which is then used to sign the identifier. This prevents + macaroon forgery if the hmac_secret is known but the base secret is not. + """ + key = hmac.new( + _MACAROON_SECRET, + (hmac_secret or _HMAC_SECRET_RAW).encode(), + hashlib.sha256 + ).digest() + return hmac.new(key, identifier.encode(), hashlib.sha256).hexdigest() def create_l402_challenge(amount_sats: int, memo: str = "API access") -> dict: @@ -78,10 +94,12 @@ def create_l402_challenge(amount_sats: int, memo: str = "API access") -> dict: - payment_hash: for tracking payment status """ invoice = payment_handler.create_invoice(amount_sats, memo) - signature = _sign(invoice.payment_hash) + hmac_secret = _HMAC_SECRET_RAW + signature = _sign(invoice.payment_hash, hmac_secret) macaroon = Macaroon( identifier=invoice.payment_hash, signature=signature, + hmac_secret=hmac_secret, ) logger.info("L402 challenge created: %d sats — %s", amount_sats, memo) return { @@ -104,7 +122,7 @@ def verify_l402_token(token: str, preimage: Optional[str] = None) -> bool: return False # Check HMAC signature - expected_sig = _sign(macaroon.identifier) + expected_sig = _sign(macaroon.identifier, macaroon.hmac_secret) if not hmac.compare_digest(macaroon.signature, expected_sig): logger.warning("L402: signature mismatch") return False diff --git a/tests/test_security_regression.py b/tests/test_security_regression.py new file mode 100644 index 0000000..94df9b6 --- /dev/null +++ b/tests/test_security_regression.py @@ -0,0 +1,75 @@ +import hmac +import hashlib +import base64 +import pytest +from timmy_serve.l402_proxy import create_l402_challenge, verify_l402_token, Macaroon, _sign + +def test_l402_macaroon_forgery_prevention(): + """Test that knowing the hmac_secret is not enough to forge a macaroon. + + The forgery attempt uses the same hmac_secret found in a valid macaroon + but doesn't know the server's internal _MACAROON_SECRET. + """ + # 1. Create a valid challenge + challenge = create_l402_challenge(100, "valid") + valid_token = challenge["macaroon"] + + # 2. Extract components from the valid macaroon + valid_mac = Macaroon.deserialize(valid_token) + assert valid_mac is not None + + # 3. Attempt to forge a macaroon for a different (unpaid) identifier + # but using the same hmac_secret and the same signing logic a naive + # attacker might assume (if it was just hmac(hmac_secret, identifier)). + fake_identifier = "forged-payment-hash" + + # Naive forgery attempt: + fake_signature = hmac.new( + valid_mac.hmac_secret.encode(), + fake_identifier.encode(), + hashlib.sha256 + ).hexdigest() + + fake_mac = Macaroon( + identifier=fake_identifier, + signature=fake_signature, + hmac_secret=valid_mac.hmac_secret, + version=valid_mac.version, + location=valid_mac.location + ) + fake_token = fake_mac.serialize() + + # 4. Verification should fail because the server uses two-key derivation + assert verify_l402_token(fake_token) is False + +def test_xss_protection_in_templates(): + """Verify that templates now use the escape filter for user-controlled content.""" + templates_to_check = [ + ("src/dashboard/templates/partials/chat_message.html", "{{ user_message | e }}"), + ("src/dashboard/templates/partials/history.html", "{{ msg.content | e }}"), + ("src/dashboard/templates/briefing.html", "{{ briefing.summary | e }}"), + ("src/dashboard/templates/partials/approval_card_single.html", "{{ item.title | e }}"), + ("src/dashboard/templates/marketplace.html", "{{ agent.name | e }}"), + ] + + for path, expected_snippet in templates_to_check: + with open(path, "r") as f: + content = f.read() + assert expected_snippet in content, f"XSS fix missing in {path}" + +def test_macaroon_serialization_v2(): + """Test that the new serialization format includes the hmac_secret.""" + mac = Macaroon(identifier="id", signature="sig", hmac_secret="secret") + serialized = mac.serialize() + + # Decode manually to check parts + raw = base64.urlsafe_b64decode(serialized.encode()).decode() + parts = raw.split(":") + assert len(parts) == 5 + assert parts[2] == "id" + assert parts[3] == "sig" + assert parts[4] == "secret" + + # Test deserialization + restored = Macaroon.deserialize(serialized) + assert restored.hmac_secret == "secret" From 96c9f1b02f1fff6df9362172f520c192e65c65eb Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 18:29:21 +0000 Subject: [PATCH 09/32] =?UTF-8?q?fix:=20address=20audit=20low-hanging=20fr?= =?UTF-8?q?uit=20=E2=80=94=20docs=20accuracy,=20auction=20timing,=20stubs,?= =?UTF-8?q?=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Docs: "No Cloud" → "No Cloud AI" (frontend uses CDN for Bootstrap/HTMX/fonts) - Docs: "600+" → "640+" tests, "20+" → "58" endpoints (actual counts) - Docs: LND described as "scaffolded" not "gRPC-ready"; remove "agents earn sats" - Fix auction timing: coordinator sleep(0) → sleep(AUCTION_DURATION_SECONDS) - agent_core: implement remember() with dedup/eviction, communicate() via swarm comms - Tests: add CLI tests for chat, think, and backend/model-size forwarding (647 passing) https://claude.ai/code/session_01SZTwAkTg6v4ybv8g9NLxqN --- README.md | 10 +++---- docs/index.html | 18 ++++++------ src/agent_core/ollama_adapter.py | 44 +++++++++++++++++----------- src/swarm/coordinator.py | 4 +-- tests/test_cli.py | 42 ++++++++++++++++++++++++++ tests/test_coordinator.py | 4 +-- tests/test_swarm_integration.py | 9 +++++- tests/test_swarm_integration_full.py | 7 +++++ 8 files changed, 102 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 5224df9..d75f8c3 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Tests](https://github.com/AlexanderWhitestone/Timmy-time-dashboard/actions/workflows/tests.yml/badge.svg)](https://github.com/AlexanderWhitestone/Timmy-time-dashboard/actions/workflows/tests.yml) -A local-first, sovereign AI agent system. Talk to Timmy, watch his swarm, gate API access with Bitcoin Lightning — all from a browser, no cloud required. +A local-first, sovereign AI agent system. Talk to Timmy, watch his swarm, gate API access with Bitcoin Lightning — all from a browser, no cloud AI required. **[Live Docs →](https://alexanderwhitestone.github.io/Timmy-time-dashboard/)** @@ -15,7 +15,7 @@ A local-first, sovereign AI agent system. Talk to Timmy, watch his swarm, gate | **Timmy Agent** | Agno-powered agent (Ollama default, AirLLM optional for 70B/405B) | | **Mission Control** | FastAPI + HTMX dashboard — chat, health, swarm, marketplace | | **Swarm** | Multi-agent coordinator — spawn agents, post tasks, run Lightning auctions | -| **L402 / Lightning** | Bitcoin Lightning payment gating for API access | +| **L402 / Lightning** | Bitcoin Lightning payment gating for API access (mock backend; LND scaffolded) | | **Spark Intelligence** | Event capture, predictions, memory consolidation, advisory engine | | **Creative Studio** | Multi-persona creative pipeline — image, music, video generation | | **Tools** | Git, image, music, and video tools accessible by persona agents | @@ -25,7 +25,7 @@ A local-first, sovereign AI agent system. Talk to Timmy, watch his swarm, gate | **Telegram** | Bridge Telegram messages to Timmy | | **CLI** | `timmy`, `timmy-serve`, `self-tdd` entry points | -**600+ tests, 100% passing.** +**Full test suite, 100% passing.** --- @@ -161,7 +161,7 @@ cp .env.example .env | `AIRLLM_MODEL_SIZE` | `70b` | `8b` \| `70b` \| `405b` | | `L402_HMAC_SECRET` | *(default — change in prod)* | HMAC signing key for macaroons | | `L402_MACAROON_SECRET` | *(default — change in prod)* | Macaroon secret | -| `LIGHTNING_BACKEND` | `mock` | `mock` \| `lnd` | +| `LIGHTNING_BACKEND` | `mock` | `mock` (production-ready) \| `lnd` (scaffolded, not yet functional) | --- @@ -217,7 +217,7 @@ src/ shortcuts/ # Siri Shortcuts endpoints telegram_bot/ # Telegram bridge self_tdd/ # Continuous test watchdog -tests/ # 600+ tests — one file per module, all mocked +tests/ # one test file per module, all mocked static/style.css # Dark mission-control theme (JetBrains Mono) docs/ # GitHub Pages landing page AGENTS.md # AI agent development standards ← read this diff --git a/docs/index.html b/docs/index.html index 0c7494a..f356284 100644 --- a/docs/index.html +++ b/docs/index.html @@ -563,13 +563,13 @@

Your agents.
Your hardware.
Your sats.

A local-first AI command center. Talk to Timmy, coordinate your swarm, - gate API access with Bitcoin Lightning — no cloud, no telemetry, no compromise. + gate API access with Bitcoin Lightning — no cloud AI, no telemetry, no compromise.

- 600+ Tests Passing + Full Test Suite Passing FastAPI + HTMX Lightning L402 - No Cloud + No Cloud AI Multi-Agent Swarm MIT License
@@ -582,11 +582,11 @@
-
600+
+
640+
Tests Passing
-
20+
+
58
API Endpoints
@@ -595,7 +595,7 @@
0
-
Cloud Calls
+
Cloud AI Calls
@@ -639,7 +639,7 @@

L402 Lightning Payments

Bitcoin Lightning payment gating via HMAC macaroons. Mock backend for dev, - LND gRPC-ready for production. Agents earn and spend sats autonomously.

+ LND backend scaffolded for production. Auction bids priced in sats.

L402 · Macaroon · BOLT11
@@ -780,7 +780,7 @@ External: Ollama :11434 · optional Redis · optional LND gRPC
5

Test

-
make test        # 600+ tests — no Ollama needed
+        
make test        # full test suite — no Ollama needed
 make test-cov    # + coverage report
 make watch       # self-TDD watchdog in background
@@ -912,7 +912,7 @@ External: Ollama :11434 · optional Redis · optional LND gRPC