Add outcome-based learning system for swarm agents

Introduce a feedback loop where task outcomes (win/loss, success/failure)
feed back into agent bidding strategy. Borrows the "learn from outcomes"
concept from Spark Intelligence but builds it natively on Timmy's existing
SQLite + swarm architecture.

New module: src/swarm/learner.py
- Records every bid outcome with task description context
- Computes per-agent metrics: win rate, success rate, keyword performance
- suggest_bid() adjusts bids based on historical performance
- learned_keywords() discovers what task types agents actually excel at

Changes:
- persona_node: _compute_bid() now consults learner for adaptive adjustments
- coordinator: complete_task/fail_task feed results into learner
- coordinator: run_auction_and_assign records all bid outcomes
- routes/swarm: add /swarm/insights and /swarm/insights/{agent_id} endpoints
- routes/swarm: add POST /swarm/tasks/{task_id}/fail endpoint

All 413 tests pass (23 new + 390 existing).

https://claude.ai/code/session_01E5jhTCwSUnJk9p9zrTMVUJ
This commit is contained in:
Claude
2026-02-22 22:04:37 +00:00
parent 9981580131
commit 167fd0a7b4
7 changed files with 697 additions and 3 deletions

View File

@@ -12,6 +12,7 @@ from fastapi import APIRouter, Form, HTTPException, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from swarm import learner as swarm_learner
from swarm import registry
from swarm.coordinator import coordinator
from swarm.tasks import TaskStatus, update_task
@@ -139,6 +140,55 @@ async def complete_task(task_id: str, result: str = Form(...)):
return {"task_id": task_id, "status": task.status.value}
@router.post("/tasks/{task_id}/fail")
async def fail_task(task_id: str, reason: str = Form("")):
"""Mark a task failed — feeds failure data into the learner."""
task = coordinator.fail_task(task_id, reason)
if task is None:
raise HTTPException(404, "Task not found")
return {"task_id": task_id, "status": task.status.value}
# ── Learning insights ────────────────────────────────────────────────────────
@router.get("/insights")
async def swarm_insights():
"""Return learned performance metrics for all agents."""
all_metrics = swarm_learner.get_all_metrics()
return {
"agents": {
aid: {
"total_bids": m.total_bids,
"auctions_won": m.auctions_won,
"tasks_completed": m.tasks_completed,
"tasks_failed": m.tasks_failed,
"win_rate": round(m.win_rate, 3),
"success_rate": round(m.success_rate, 3),
"avg_winning_bid": round(m.avg_winning_bid, 1),
"top_keywords": swarm_learner.learned_keywords(aid)[:10],
}
for aid, m in all_metrics.items()
}
}
@router.get("/insights/{agent_id}")
async def agent_insights(agent_id: str):
"""Return learned performance metrics for a specific agent."""
m = swarm_learner.get_metrics(agent_id)
return {
"agent_id": agent_id,
"total_bids": m.total_bids,
"auctions_won": m.auctions_won,
"tasks_completed": m.tasks_completed,
"tasks_failed": m.tasks_failed,
"win_rate": round(m.win_rate, 3),
"success_rate": round(m.success_rate, 3),
"avg_winning_bid": round(m.avg_winning_bid, 1),
"learned_keywords": swarm_learner.learned_keywords(agent_id),
}
# ── UI endpoints (return HTML partials for HTMX) ─────────────────────────────
@router.get("/agents/sidebar", response_class=HTMLResponse)

View File

@@ -13,6 +13,7 @@ from typing import Optional
from swarm.bidder import AuctionManager, Bid
from swarm.comms import SwarmComms
from swarm import learner as swarm_learner
from swarm.manager import SwarmManager
from swarm.recovery import reconcile_on_startup
from swarm.registry import AgentRecord
@@ -183,9 +184,33 @@ class SwarmCoordinator:
The auction should already be open (via post_task). This method
waits the remaining bidding window and then closes it.
All bids are recorded in the learner so agents accumulate outcome
history that later feeds back into adaptive bidding.
"""
await asyncio.sleep(0) # yield to let any pending callbacks fire
# Snapshot the auction bids before closing (for learner recording)
auction = self.auctions.get_auction(task_id)
all_bids = list(auction.bids) if auction else []
winner = self.auctions.close_auction(task_id)
# Retrieve description for learner context
task = get_task(task_id)
description = task.description if task else ""
# Record every bid outcome in the learner
winner_id = winner.agent_id if winner else None
for bid in all_bids:
swarm_learner.record_outcome(
task_id=task_id,
agent_id=bid.agent_id,
description=description,
bid_sats=bid.bid_sats,
won_auction=(bid.agent_id == winner_id),
)
if winner:
update_task(
task_id,
@@ -220,6 +245,26 @@ class SwarmCoordinator:
if task.assigned_agent:
registry.update_status(task.assigned_agent, "idle")
self.comms.complete_task(task_id, task.assigned_agent, result)
# Record success in learner
swarm_learner.record_task_result(task_id, task.assigned_agent, succeeded=True)
return updated
def fail_task(self, task_id: str, reason: str = "") -> Optional[Task]:
"""Mark a task as failed — feeds failure data into the learner."""
task = get_task(task_id)
if task is None:
return None
now = datetime.now(timezone.utc).isoformat()
updated = update_task(
task_id,
status=TaskStatus.FAILED,
result=reason,
completed_at=now,
)
if task.assigned_agent:
registry.update_status(task.assigned_agent, "idle")
# Record failure in learner
swarm_learner.record_task_result(task_id, task.assigned_agent, succeeded=False)
return updated
def get_task(self, task_id: str) -> Optional[Task]:

253
src/swarm/learner.py Normal file
View File

@@ -0,0 +1,253 @@
"""Swarm learner — outcome tracking and adaptive bid intelligence.
Records task outcomes (win/loss, success/failure) per agent and extracts
actionable metrics. Persona nodes consult the learner to adjust bids
based on historical performance rather than using static strategies.
Inspired by feedback-loop learning: outcomes re-enter the system to
improve future decisions. All data lives in swarm.db alongside the
existing bid_history and tasks tables.
"""
import re
import sqlite3
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
DB_PATH = Path("data/swarm.db")
# Minimum outcomes before the learner starts adjusting bids
_MIN_OUTCOMES = 3
# Stop-words excluded from keyword extraction
_STOP_WORDS = frozenset({
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
"of", "with", "by", "from", "is", "it", "this", "that", "be", "as",
"are", "was", "were", "been", "do", "does", "did", "will", "would",
"can", "could", "should", "may", "might", "me", "my", "i", "we",
"you", "your", "please", "task", "need", "want", "make", "get",
})
_WORD_RE = re.compile(r"[a-z]{3,}")
@dataclass
class AgentMetrics:
"""Computed performance metrics for a single agent."""
agent_id: str
total_bids: int = 0
auctions_won: int = 0
tasks_completed: int = 0
tasks_failed: int = 0
avg_winning_bid: float = 0.0
win_rate: float = 0.0
success_rate: float = 0.0
keyword_wins: dict[str, int] = field(default_factory=dict)
keyword_failures: dict[str, int] = field(default_factory=dict)
def _get_conn() -> sqlite3.Connection:
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(DB_PATH))
conn.row_factory = sqlite3.Row
conn.execute(
"""
CREATE TABLE IF NOT EXISTS task_outcomes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_id TEXT NOT NULL,
agent_id TEXT NOT NULL,
description TEXT NOT NULL DEFAULT '',
bid_sats INTEGER NOT NULL DEFAULT 0,
won_auction INTEGER NOT NULL DEFAULT 0,
task_succeeded INTEGER,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
)
"""
)
conn.commit()
return conn
def _extract_keywords(text: str) -> list[str]:
"""Pull meaningful words from a task description."""
words = _WORD_RE.findall(text.lower())
return [w for w in words if w not in _STOP_WORDS]
# ── Recording ────────────────────────────────────────────────────────────────
def record_outcome(
task_id: str,
agent_id: str,
description: str,
bid_sats: int,
won_auction: bool,
task_succeeded: Optional[bool] = None,
) -> None:
"""Record one agent's outcome for a task."""
conn = _get_conn()
conn.execute(
"""
INSERT INTO task_outcomes
(task_id, agent_id, description, bid_sats, won_auction, task_succeeded)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
task_id,
agent_id,
description,
bid_sats,
int(won_auction),
int(task_succeeded) if task_succeeded is not None else None,
),
)
conn.commit()
conn.close()
def record_task_result(task_id: str, agent_id: str, succeeded: bool) -> int:
"""Update the task_succeeded flag for an already-recorded winning outcome.
Returns the number of rows updated.
"""
conn = _get_conn()
cursor = conn.execute(
"""
UPDATE task_outcomes
SET task_succeeded = ?
WHERE task_id = ? AND agent_id = ? AND won_auction = 1
""",
(int(succeeded), task_id, agent_id),
)
conn.commit()
updated = cursor.rowcount
conn.close()
return updated
# ── Metrics ──────────────────────────────────────────────────────────────────
def get_metrics(agent_id: str) -> AgentMetrics:
"""Compute performance metrics from stored outcomes."""
conn = _get_conn()
rows = conn.execute(
"SELECT * FROM task_outcomes WHERE agent_id = ?",
(agent_id,),
).fetchall()
conn.close()
metrics = AgentMetrics(agent_id=agent_id)
if not rows:
return metrics
metrics.total_bids = len(rows)
winning_bids: list[int] = []
for row in rows:
won = bool(row["won_auction"])
succeeded = row["task_succeeded"]
keywords = _extract_keywords(row["description"])
if won:
metrics.auctions_won += 1
winning_bids.append(row["bid_sats"])
if succeeded == 1:
metrics.tasks_completed += 1
for kw in keywords:
metrics.keyword_wins[kw] = metrics.keyword_wins.get(kw, 0) + 1
elif succeeded == 0:
metrics.tasks_failed += 1
for kw in keywords:
metrics.keyword_failures[kw] = metrics.keyword_failures.get(kw, 0) + 1
metrics.win_rate = (
metrics.auctions_won / metrics.total_bids if metrics.total_bids else 0.0
)
decided = metrics.tasks_completed + metrics.tasks_failed
metrics.success_rate = (
metrics.tasks_completed / decided if decided else 0.0
)
metrics.avg_winning_bid = (
sum(winning_bids) / len(winning_bids) if winning_bids else 0.0
)
return metrics
def get_all_metrics() -> dict[str, AgentMetrics]:
"""Return metrics for every agent that has recorded outcomes."""
conn = _get_conn()
agent_ids = [
row["agent_id"]
for row in conn.execute(
"SELECT DISTINCT agent_id FROM task_outcomes"
).fetchall()
]
conn.close()
return {aid: get_metrics(aid) for aid in agent_ids}
# ── Bid intelligence ─────────────────────────────────────────────────────────
def suggest_bid(agent_id: str, task_description: str, base_bid: int) -> int:
"""Adjust a base bid using learned performance data.
Returns the base_bid unchanged until the agent has enough history
(>= _MIN_OUTCOMES). After that:
- Win rate too high (>80%): nudge bid up — still win, earn more.
- Win rate too low (<20%): nudge bid down — be more competitive.
- Success rate low on won tasks: nudge bid up — avoid winning tasks
this agent tends to fail.
- Strong keyword match from past wins: nudge bid down — this agent
is proven on similar work.
"""
metrics = get_metrics(agent_id)
if metrics.total_bids < _MIN_OUTCOMES:
return base_bid
factor = 1.0
# Win-rate adjustment
if metrics.win_rate > 0.8:
factor *= 1.15 # bid higher, maximise revenue
elif metrics.win_rate < 0.2:
factor *= 0.85 # bid lower, be competitive
# Success-rate adjustment (only when enough completed tasks)
decided = metrics.tasks_completed + metrics.tasks_failed
if decided >= 2:
if metrics.success_rate < 0.5:
factor *= 1.25 # avoid winning bad matches
elif metrics.success_rate > 0.8:
factor *= 0.90 # we're good at this, lean in
# Keyword relevance from past wins
task_keywords = _extract_keywords(task_description)
if task_keywords:
wins = sum(metrics.keyword_wins.get(kw, 0) for kw in task_keywords)
fails = sum(metrics.keyword_failures.get(kw, 0) for kw in task_keywords)
if wins > fails and wins >= 2:
factor *= 0.90 # proven track record on these keywords
elif fails > wins and fails >= 2:
factor *= 1.15 # poor track record — back off
adjusted = int(base_bid * factor)
return max(1, adjusted)
def learned_keywords(agent_id: str) -> list[dict]:
"""Return keywords ranked by net wins (wins minus failures).
Useful for discovering which task types an agent actually excels at,
potentially different from its hardcoded preferred_keywords.
"""
metrics = get_metrics(agent_id)
all_kw = set(metrics.keyword_wins) | set(metrics.keyword_failures)
results = []
for kw in all_kw:
wins = metrics.keyword_wins.get(kw, 0)
fails = metrics.keyword_failures.get(kw, 0)
results.append({"keyword": kw, "wins": wins, "failures": fails, "net": wins - fails})
results.sort(key=lambda x: x["net"], reverse=True)
return results

View File

@@ -6,6 +6,8 @@ PersonaNode extends the base SwarmNode to:
persona's preferred_keywords the node bids aggressively (bid_base ± jitter).
Otherwise it bids at a higher, less-competitive rate.
3. Register with the swarm registry under its persona's capabilities string.
4. (Adaptive) Consult the swarm learner to adjust bids based on historical
win/loss and success/failure data when available.
Usage (via coordinator):
coordinator.spawn_persona("echo")
@@ -35,6 +37,7 @@ class PersonaNode(SwarmNode):
persona_id: str,
agent_id: str,
comms: Optional[SwarmComms] = None,
use_learner: bool = True,
) -> None:
meta: PersonaMeta = PERSONAS[persona_id]
super().__init__(
@@ -45,6 +48,7 @@ class PersonaNode(SwarmNode):
)
self._meta = meta
self._persona_id = persona_id
self._use_learner = use_learner
logger.debug("PersonaNode %s (%s) initialised", meta["name"], agent_id)
# ── Bid strategy ─────────────────────────────────────────────────────────
@@ -54,6 +58,9 @@ class PersonaNode(SwarmNode):
Bids lower (more aggressively) when the description contains at least
one of our preferred_keywords. Bids higher for off-spec tasks.
When the learner is enabled and the agent has enough history, the
base bid is adjusted by learned performance metrics before jitter.
"""
desc_lower = task_description.lower()
is_preferred = any(
@@ -62,9 +69,19 @@ class PersonaNode(SwarmNode):
base = self._meta["bid_base"]
jitter = random.randint(0, self._meta["bid_jitter"])
if is_preferred:
return max(1, base - jitter)
# Off-spec: inflate bid so we lose to the specialist
return min(200, int(base * _OFF_SPEC_MULTIPLIER) + jitter)
raw = max(1, base - jitter)
else:
# Off-spec: inflate bid so we lose to the specialist
raw = min(200, int(base * _OFF_SPEC_MULTIPLIER) + jitter)
# Consult learner for adaptive adjustment
if self._use_learner:
try:
from swarm.learner import suggest_bid
return suggest_bid(self.agent_id, task_description, raw)
except Exception:
logger.debug("Learner unavailable, using static bid")
return raw
def _on_task_posted(self, msg: SwarmMessage) -> None:
"""Handle task announcement with persona-aware bidding."""