Add outcome-based learning system for swarm agents

Introduce a feedback loop where task outcomes (win/loss, success/failure) feed back into agent bidding strategy. Borrows the "learn from outcomes" concept from Spark Intelligence but builds it natively on Timmy's existing SQLite + swarm architecture. New module: src/swarm/learner.py - Records every bid outcome with task description context - Computes per-agent metrics: win rate, success rate, keyword performance - suggest_bid() adjusts bids based on historical performance - learned_keywords() discovers what task types agents actually excel at Changes: - persona_node: _compute_bid() now consults learner for adaptive adjustments - coordinator: complete_task/fail_task feed results into learner - coordinator: run_auction_and_assign records all bid outcomes - routes/swarm: add /swarm/insights and /swarm/insights/{agent_id} endpoints - routes/swarm: add POST /swarm/tasks/{task_id}/fail endpoint All 413 tests pass (23 new + 390 existing). https://claude.ai/code/session_01E5jhTCwSUnJk9p9zrTMVUJ
2026-02-22 22:04:37 +00:00
parent 9981580131
commit 167fd0a7b4
7 changed files with 697 additions and 3 deletions
--- a/src/dashboard/routes/swarm.py
+++ b/src/dashboard/routes/swarm.py
@@ -12,6 +12,7 @@ from fastapi import APIRouter, Form, HTTPException, Request
 from fastapi.responses import HTMLResponse
 from fastapi.templating import Jinja2Templates

+from swarm import learner as swarm_learner
 from swarm import registry
 from swarm.coordinator import coordinator
 from swarm.tasks import TaskStatus, update_task
@@ -139,6 +140,55 @@ async def complete_task(task_id: str, result: str = Form(...)):
    return {"task_id": task_id, "status": task.status.value}


+@router.post("/tasks/{task_id}/fail")
+async def fail_task(task_id: str, reason: str = Form("")):
+    """Mark a task failed — feeds failure data into the learner."""
+    task = coordinator.fail_task(task_id, reason)
+    if task is None:
+        raise HTTPException(404, "Task not found")
+    return {"task_id": task_id, "status": task.status.value}
+
+
+# ── Learning insights ────────────────────────────────────────────────────────
+
+@router.get("/insights")
+async def swarm_insights():
+    """Return learned performance metrics for all agents."""
+    all_metrics = swarm_learner.get_all_metrics()
+    return {
+        "agents": {
+            aid: {
+                "total_bids": m.total_bids,
+                "auctions_won": m.auctions_won,
+                "tasks_completed": m.tasks_completed,
+                "tasks_failed": m.tasks_failed,
+                "win_rate": round(m.win_rate, 3),
+                "success_rate": round(m.success_rate, 3),
+                "avg_winning_bid": round(m.avg_winning_bid, 1),
+                "top_keywords": swarm_learner.learned_keywords(aid)[:10],
+            }
+            for aid, m in all_metrics.items()
+        }
+    }
+
+
+@router.get("/insights/{agent_id}")
+async def agent_insights(agent_id: str):
+    """Return learned performance metrics for a specific agent."""
+    m = swarm_learner.get_metrics(agent_id)
+    return {
+        "agent_id": agent_id,
+        "total_bids": m.total_bids,
+        "auctions_won": m.auctions_won,
+        "tasks_completed": m.tasks_completed,
+        "tasks_failed": m.tasks_failed,
+        "win_rate": round(m.win_rate, 3),
+        "success_rate": round(m.success_rate, 3),
+        "avg_winning_bid": round(m.avg_winning_bid, 1),
+        "learned_keywords": swarm_learner.learned_keywords(agent_id),
+    }
+
+
 # ── UI endpoints (return HTML partials for HTMX) ─────────────────────────────

@router.get("/agents/sidebar", response_class=HTMLResponse)
--- a/src/swarm/coordinator.py
+++ b/src/swarm/coordinator.py
@@ -13,6 +13,7 @@ from typing import Optional

 from swarm.bidder import AuctionManager, Bid
 from swarm.comms import SwarmComms
+from swarm import learner as swarm_learner
 from swarm.manager import SwarmManager
 from swarm.recovery import reconcile_on_startup
 from swarm.registry import AgentRecord
@@ -183,9 +184,33 @@ class SwarmCoordinator:

        The auction should already be open (via post_task).  This method
        waits the remaining bidding window and then closes it.
+
+        All bids are recorded in the learner so agents accumulate outcome
+        history that later feeds back into adaptive bidding.
        """
        await asyncio.sleep(0)  # yield to let any pending callbacks fire
+
+        # Snapshot the auction bids before closing (for learner recording)
+        auction = self.auctions.get_auction(task_id)
+        all_bids = list(auction.bids) if auction else []
+
        winner = self.auctions.close_auction(task_id)
+
+        # Retrieve description for learner context
+        task = get_task(task_id)
+        description = task.description if task else ""
+
+        # Record every bid outcome in the learner
+        winner_id = winner.agent_id if winner else None
+        for bid in all_bids:
+            swarm_learner.record_outcome(
+                task_id=task_id,
+                agent_id=bid.agent_id,
+                description=description,
+                bid_sats=bid.bid_sats,
+                won_auction=(bid.agent_id == winner_id),
+            )
+
        if winner:
            update_task(
                task_id,
@@ -220,6 +245,26 @@ class SwarmCoordinator:
        if task.assigned_agent:
            registry.update_status(task.assigned_agent, "idle")
            self.comms.complete_task(task_id, task.assigned_agent, result)
+            # Record success in learner
+            swarm_learner.record_task_result(task_id, task.assigned_agent, succeeded=True)
+        return updated
+
+    def fail_task(self, task_id: str, reason: str = "") -> Optional[Task]:
+        """Mark a task as failed — feeds failure data into the learner."""
+        task = get_task(task_id)
+        if task is None:
+            return None
+        now = datetime.now(timezone.utc).isoformat()
+        updated = update_task(
+            task_id,
+            status=TaskStatus.FAILED,
+            result=reason,
+            completed_at=now,
+        )
+        if task.assigned_agent:
+            registry.update_status(task.assigned_agent, "idle")
+            # Record failure in learner
+            swarm_learner.record_task_result(task_id, task.assigned_agent, succeeded=False)
        return updated

    def get_task(self, task_id: str) -> Optional[Task]:
--- a/src/swarm/learner.py
+++ b/src/swarm/learner.py
@@ -0,0 +1,253 @@
+"""Swarm learner — outcome tracking and adaptive bid intelligence.
+
+Records task outcomes (win/loss, success/failure) per agent and extracts
+actionable metrics.  Persona nodes consult the learner to adjust bids
+based on historical performance rather than using static strategies.
+
+Inspired by feedback-loop learning: outcomes re-enter the system to
+improve future decisions.  All data lives in swarm.db alongside the
+existing bid_history and tasks tables.
+"""
+
+import re
+import sqlite3
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+DB_PATH = Path("data/swarm.db")
+
+# Minimum outcomes before the learner starts adjusting bids
+_MIN_OUTCOMES = 3
+
+# Stop-words excluded from keyword extraction
+_STOP_WORDS = frozenset({
+    "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
+    "of", "with", "by", "from", "is", "it", "this", "that", "be", "as",
+    "are", "was", "were", "been", "do", "does", "did", "will", "would",
+    "can", "could", "should", "may", "might", "me", "my", "i", "we",
+    "you", "your", "please", "task", "need", "want", "make", "get",
+})
+
+_WORD_RE = re.compile(r"[a-z]{3,}")
+
+
+@dataclass
+class AgentMetrics:
+    """Computed performance metrics for a single agent."""
+    agent_id: str
+    total_bids: int = 0
+    auctions_won: int = 0
+    tasks_completed: int = 0
+    tasks_failed: int = 0
+    avg_winning_bid: float = 0.0
+    win_rate: float = 0.0
+    success_rate: float = 0.0
+    keyword_wins: dict[str, int] = field(default_factory=dict)
+    keyword_failures: dict[str, int] = field(default_factory=dict)
+
+
+def _get_conn() -> sqlite3.Connection:
+    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(str(DB_PATH))
+    conn.row_factory = sqlite3.Row
+    conn.execute(
+        """
+        CREATE TABLE IF NOT EXISTS task_outcomes (
+            id              INTEGER PRIMARY KEY AUTOINCREMENT,
+            task_id         TEXT NOT NULL,
+            agent_id        TEXT NOT NULL,
+            description     TEXT NOT NULL DEFAULT '',
+            bid_sats        INTEGER NOT NULL DEFAULT 0,
+            won_auction     INTEGER NOT NULL DEFAULT 0,
+            task_succeeded  INTEGER,
+            created_at      TEXT NOT NULL DEFAULT (datetime('now'))
+        )
+        """
+    )
+    conn.commit()
+    return conn
+
+
+def _extract_keywords(text: str) -> list[str]:
+    """Pull meaningful words from a task description."""
+    words = _WORD_RE.findall(text.lower())
+    return [w for w in words if w not in _STOP_WORDS]
+
+
+# ── Recording ────────────────────────────────────────────────────────────────
+
+def record_outcome(
+    task_id: str,
+    agent_id: str,
+    description: str,
+    bid_sats: int,
+    won_auction: bool,
+    task_succeeded: Optional[bool] = None,
+) -> None:
+    """Record one agent's outcome for a task."""
+    conn = _get_conn()
+    conn.execute(
+        """
+        INSERT INTO task_outcomes
+            (task_id, agent_id, description, bid_sats, won_auction, task_succeeded)
+        VALUES (?, ?, ?, ?, ?, ?)
+        """,
+        (
+            task_id,
+            agent_id,
+            description,
+            bid_sats,
+            int(won_auction),
+            int(task_succeeded) if task_succeeded is not None else None,
+        ),
+    )
+    conn.commit()
+    conn.close()
+
+
+def record_task_result(task_id: str, agent_id: str, succeeded: bool) -> int:
+    """Update the task_succeeded flag for an already-recorded winning outcome.
+
+    Returns the number of rows updated.
+    """
+    conn = _get_conn()
+    cursor = conn.execute(
+        """
+        UPDATE task_outcomes
+        SET task_succeeded = ?
+        WHERE task_id = ? AND agent_id = ? AND won_auction = 1
+        """,
+        (int(succeeded), task_id, agent_id),
+    )
+    conn.commit()
+    updated = cursor.rowcount
+    conn.close()
+    return updated
+
+
+# ── Metrics ──────────────────────────────────────────────────────────────────
+
+def get_metrics(agent_id: str) -> AgentMetrics:
+    """Compute performance metrics from stored outcomes."""
+    conn = _get_conn()
+    rows = conn.execute(
+        "SELECT * FROM task_outcomes WHERE agent_id = ?",
+        (agent_id,),
+    ).fetchall()
+    conn.close()
+
+    metrics = AgentMetrics(agent_id=agent_id)
+    if not rows:
+        return metrics
+
+    metrics.total_bids = len(rows)
+    winning_bids: list[int] = []
+
+    for row in rows:
+        won = bool(row["won_auction"])
+        succeeded = row["task_succeeded"]
+        keywords = _extract_keywords(row["description"])
+
+        if won:
+            metrics.auctions_won += 1
+            winning_bids.append(row["bid_sats"])
+            if succeeded == 1:
+                metrics.tasks_completed += 1
+                for kw in keywords:
+                    metrics.keyword_wins[kw] = metrics.keyword_wins.get(kw, 0) + 1
+            elif succeeded == 0:
+                metrics.tasks_failed += 1
+                for kw in keywords:
+                    metrics.keyword_failures[kw] = metrics.keyword_failures.get(kw, 0) + 1
+
+    metrics.win_rate = (
+        metrics.auctions_won / metrics.total_bids if metrics.total_bids else 0.0
+    )
+    decided = metrics.tasks_completed + metrics.tasks_failed
+    metrics.success_rate = (
+        metrics.tasks_completed / decided if decided else 0.0
+    )
+    metrics.avg_winning_bid = (
+        sum(winning_bids) / len(winning_bids) if winning_bids else 0.0
+    )
+    return metrics
+
+
+def get_all_metrics() -> dict[str, AgentMetrics]:
+    """Return metrics for every agent that has recorded outcomes."""
+    conn = _get_conn()
+    agent_ids = [
+        row["agent_id"]
+        for row in conn.execute(
+            "SELECT DISTINCT agent_id FROM task_outcomes"
+        ).fetchall()
+    ]
+    conn.close()
+    return {aid: get_metrics(aid) for aid in agent_ids}
+
+
+# ── Bid intelligence ─────────────────────────────────────────────────────────
+
+def suggest_bid(agent_id: str, task_description: str, base_bid: int) -> int:
+    """Adjust a base bid using learned performance data.
+
+    Returns the base_bid unchanged until the agent has enough history
+    (>= _MIN_OUTCOMES).  After that:
+
+    - Win rate too high (>80%): nudge bid up — still win, earn more.
+    - Win rate too low (<20%): nudge bid down — be more competitive.
+    - Success rate low on won tasks: nudge bid up — avoid winning tasks
+      this agent tends to fail.
+    - Strong keyword match from past wins: nudge bid down — this agent
+      is proven on similar work.
+    """
+    metrics = get_metrics(agent_id)
+    if metrics.total_bids < _MIN_OUTCOMES:
+        return base_bid
+
+    factor = 1.0
+
+    # Win-rate adjustment
+    if metrics.win_rate > 0.8:
+        factor *= 1.15          # bid higher, maximise revenue
+    elif metrics.win_rate < 0.2:
+        factor *= 0.85          # bid lower, be competitive
+
+    # Success-rate adjustment (only when enough completed tasks)
+    decided = metrics.tasks_completed + metrics.tasks_failed
+    if decided >= 2:
+        if metrics.success_rate < 0.5:
+            factor *= 1.25      # avoid winning bad matches
+        elif metrics.success_rate > 0.8:
+            factor *= 0.90      # we're good at this, lean in
+
+    # Keyword relevance from past wins
+    task_keywords = _extract_keywords(task_description)
+    if task_keywords:
+        wins = sum(metrics.keyword_wins.get(kw, 0) for kw in task_keywords)
+        fails = sum(metrics.keyword_failures.get(kw, 0) for kw in task_keywords)
+        if wins > fails and wins >= 2:
+            factor *= 0.90      # proven track record on these keywords
+        elif fails > wins and fails >= 2:
+            factor *= 1.15      # poor track record — back off
+
+    adjusted = int(base_bid * factor)
+    return max(1, adjusted)
+
+
+def learned_keywords(agent_id: str) -> list[dict]:
+    """Return keywords ranked by net wins (wins minus failures).
+
+    Useful for discovering which task types an agent actually excels at,
+    potentially different from its hardcoded preferred_keywords.
+    """
+    metrics = get_metrics(agent_id)
+    all_kw = set(metrics.keyword_wins) | set(metrics.keyword_failures)
+    results = []
+    for kw in all_kw:
+        wins = metrics.keyword_wins.get(kw, 0)
+        fails = metrics.keyword_failures.get(kw, 0)
+        results.append({"keyword": kw, "wins": wins, "failures": fails, "net": wins - fails})
+    results.sort(key=lambda x: x["net"], reverse=True)
+    return results
--- a/src/swarm/persona_node.py
+++ b/src/swarm/persona_node.py
@@ -6,6 +6,8 @@ PersonaNode extends the base SwarmNode to:
   persona's preferred_keywords the node bids aggressively (bid_base ± jitter).
   Otherwise it bids at a higher, less-competitive rate.
 3. Register with the swarm registry under its persona's capabilities string.
+4. (Adaptive) Consult the swarm learner to adjust bids based on historical
+   win/loss and success/failure data when available.

 Usage (via coordinator):
    coordinator.spawn_persona("echo")
@@ -35,6 +37,7 @@ class PersonaNode(SwarmNode):
        persona_id: str,
        agent_id: str,
        comms: Optional[SwarmComms] = None,
+        use_learner: bool = True,
    ) -> None:
        meta: PersonaMeta = PERSONAS[persona_id]
        super().__init__(
@@ -45,6 +48,7 @@ class PersonaNode(SwarmNode):
        )
        self._meta = meta
        self._persona_id = persona_id
+        self._use_learner = use_learner
        logger.debug("PersonaNode %s (%s) initialised", meta["name"], agent_id)

    # ── Bid strategy ─────────────────────────────────────────────────────────
@@ -54,6 +58,9 @@ class PersonaNode(SwarmNode):

        Bids lower (more aggressively) when the description contains at least
        one of our preferred_keywords.  Bids higher for off-spec tasks.
+
+        When the learner is enabled and the agent has enough history, the
+        base bid is adjusted by learned performance metrics before jitter.
        """
        desc_lower = task_description.lower()
        is_preferred = any(
@@ -62,9 +69,19 @@ class PersonaNode(SwarmNode):
        base = self._meta["bid_base"]
        jitter = random.randint(0, self._meta["bid_jitter"])
        if is_preferred:
-            return max(1, base - jitter)
-        # Off-spec: inflate bid so we lose to the specialist
-        return min(200, int(base * _OFF_SPEC_MULTIPLIER) + jitter)
+            raw = max(1, base - jitter)
+        else:
+            # Off-spec: inflate bid so we lose to the specialist
+            raw = min(200, int(base * _OFF_SPEC_MULTIPLIER) + jitter)
+
+        # Consult learner for adaptive adjustment
+        if self._use_learner:
+            try:
+                from swarm.learner import suggest_bid
+                return suggest_bid(self.agent_id, task_description, raw)
+            except Exception:
+                logger.debug("Learner unavailable, using static bid")
+        return raw

    def _on_task_posted(self, msg: SwarmMessage) -> None:
        """Handle task announcement with persona-aware bidding."""