forked from Rockachopa/Timmy-time-dashboard
Add outcome-based learning system for swarm agents
Introduce a feedback loop where task outcomes (win/loss, success/failure)
feed back into agent bidding strategy. Borrows the "learn from outcomes"
concept from Spark Intelligence but builds it natively on Timmy's existing
SQLite + swarm architecture.
New module: src/swarm/learner.py
- Records every bid outcome with task description context
- Computes per-agent metrics: win rate, success rate, keyword performance
- suggest_bid() adjusts bids based on historical performance
- learned_keywords() discovers what task types agents actually excel at
Changes:
- persona_node: _compute_bid() now consults learner for adaptive adjustments
- coordinator: complete_task/fail_task feed results into learner
- coordinator: run_auction_and_assign records all bid outcomes
- routes/swarm: add /swarm/insights and /swarm/insights/{agent_id} endpoints
- routes/swarm: add POST /swarm/tasks/{task_id}/fail endpoint
All 413 tests pass (23 new + 390 existing).
https://claude.ai/code/session_01E5jhTCwSUnJk9p9zrTMVUJ
This commit is contained in:
@@ -12,6 +12,7 @@ from fastapi import APIRouter, Form, HTTPException, Request
|
||||
from fastapi.responses import HTMLResponse
|
||||
from fastapi.templating import Jinja2Templates
|
||||
|
||||
from swarm import learner as swarm_learner
|
||||
from swarm import registry
|
||||
from swarm.coordinator import coordinator
|
||||
from swarm.tasks import TaskStatus, update_task
|
||||
@@ -139,6 +140,55 @@ async def complete_task(task_id: str, result: str = Form(...)):
|
||||
return {"task_id": task_id, "status": task.status.value}
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/fail")
|
||||
async def fail_task(task_id: str, reason: str = Form("")):
|
||||
"""Mark a task failed — feeds failure data into the learner."""
|
||||
task = coordinator.fail_task(task_id, reason)
|
||||
if task is None:
|
||||
raise HTTPException(404, "Task not found")
|
||||
return {"task_id": task_id, "status": task.status.value}
|
||||
|
||||
|
||||
# ── Learning insights ────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/insights")
|
||||
async def swarm_insights():
|
||||
"""Return learned performance metrics for all agents."""
|
||||
all_metrics = swarm_learner.get_all_metrics()
|
||||
return {
|
||||
"agents": {
|
||||
aid: {
|
||||
"total_bids": m.total_bids,
|
||||
"auctions_won": m.auctions_won,
|
||||
"tasks_completed": m.tasks_completed,
|
||||
"tasks_failed": m.tasks_failed,
|
||||
"win_rate": round(m.win_rate, 3),
|
||||
"success_rate": round(m.success_rate, 3),
|
||||
"avg_winning_bid": round(m.avg_winning_bid, 1),
|
||||
"top_keywords": swarm_learner.learned_keywords(aid)[:10],
|
||||
}
|
||||
for aid, m in all_metrics.items()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@router.get("/insights/{agent_id}")
|
||||
async def agent_insights(agent_id: str):
|
||||
"""Return learned performance metrics for a specific agent."""
|
||||
m = swarm_learner.get_metrics(agent_id)
|
||||
return {
|
||||
"agent_id": agent_id,
|
||||
"total_bids": m.total_bids,
|
||||
"auctions_won": m.auctions_won,
|
||||
"tasks_completed": m.tasks_completed,
|
||||
"tasks_failed": m.tasks_failed,
|
||||
"win_rate": round(m.win_rate, 3),
|
||||
"success_rate": round(m.success_rate, 3),
|
||||
"avg_winning_bid": round(m.avg_winning_bid, 1),
|
||||
"learned_keywords": swarm_learner.learned_keywords(agent_id),
|
||||
}
|
||||
|
||||
|
||||
# ── UI endpoints (return HTML partials for HTMX) ─────────────────────────────
|
||||
|
||||
@router.get("/agents/sidebar", response_class=HTMLResponse)
|
||||
|
||||
@@ -13,6 +13,7 @@ from typing import Optional
|
||||
|
||||
from swarm.bidder import AuctionManager, Bid
|
||||
from swarm.comms import SwarmComms
|
||||
from swarm import learner as swarm_learner
|
||||
from swarm.manager import SwarmManager
|
||||
from swarm.recovery import reconcile_on_startup
|
||||
from swarm.registry import AgentRecord
|
||||
@@ -183,9 +184,33 @@ class SwarmCoordinator:
|
||||
|
||||
The auction should already be open (via post_task). This method
|
||||
waits the remaining bidding window and then closes it.
|
||||
|
||||
All bids are recorded in the learner so agents accumulate outcome
|
||||
history that later feeds back into adaptive bidding.
|
||||
"""
|
||||
await asyncio.sleep(0) # yield to let any pending callbacks fire
|
||||
|
||||
# Snapshot the auction bids before closing (for learner recording)
|
||||
auction = self.auctions.get_auction(task_id)
|
||||
all_bids = list(auction.bids) if auction else []
|
||||
|
||||
winner = self.auctions.close_auction(task_id)
|
||||
|
||||
# Retrieve description for learner context
|
||||
task = get_task(task_id)
|
||||
description = task.description if task else ""
|
||||
|
||||
# Record every bid outcome in the learner
|
||||
winner_id = winner.agent_id if winner else None
|
||||
for bid in all_bids:
|
||||
swarm_learner.record_outcome(
|
||||
task_id=task_id,
|
||||
agent_id=bid.agent_id,
|
||||
description=description,
|
||||
bid_sats=bid.bid_sats,
|
||||
won_auction=(bid.agent_id == winner_id),
|
||||
)
|
||||
|
||||
if winner:
|
||||
update_task(
|
||||
task_id,
|
||||
@@ -220,6 +245,26 @@ class SwarmCoordinator:
|
||||
if task.assigned_agent:
|
||||
registry.update_status(task.assigned_agent, "idle")
|
||||
self.comms.complete_task(task_id, task.assigned_agent, result)
|
||||
# Record success in learner
|
||||
swarm_learner.record_task_result(task_id, task.assigned_agent, succeeded=True)
|
||||
return updated
|
||||
|
||||
def fail_task(self, task_id: str, reason: str = "") -> Optional[Task]:
|
||||
"""Mark a task as failed — feeds failure data into the learner."""
|
||||
task = get_task(task_id)
|
||||
if task is None:
|
||||
return None
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
updated = update_task(
|
||||
task_id,
|
||||
status=TaskStatus.FAILED,
|
||||
result=reason,
|
||||
completed_at=now,
|
||||
)
|
||||
if task.assigned_agent:
|
||||
registry.update_status(task.assigned_agent, "idle")
|
||||
# Record failure in learner
|
||||
swarm_learner.record_task_result(task_id, task.assigned_agent, succeeded=False)
|
||||
return updated
|
||||
|
||||
def get_task(self, task_id: str) -> Optional[Task]:
|
||||
|
||||
253
src/swarm/learner.py
Normal file
253
src/swarm/learner.py
Normal file
@@ -0,0 +1,253 @@
|
||||
"""Swarm learner — outcome tracking and adaptive bid intelligence.
|
||||
|
||||
Records task outcomes (win/loss, success/failure) per agent and extracts
|
||||
actionable metrics. Persona nodes consult the learner to adjust bids
|
||||
based on historical performance rather than using static strategies.
|
||||
|
||||
Inspired by feedback-loop learning: outcomes re-enter the system to
|
||||
improve future decisions. All data lives in swarm.db alongside the
|
||||
existing bid_history and tasks tables.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sqlite3
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
DB_PATH = Path("data/swarm.db")
|
||||
|
||||
# Minimum outcomes before the learner starts adjusting bids
|
||||
_MIN_OUTCOMES = 3
|
||||
|
||||
# Stop-words excluded from keyword extraction
|
||||
_STOP_WORDS = frozenset({
|
||||
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
|
||||
"of", "with", "by", "from", "is", "it", "this", "that", "be", "as",
|
||||
"are", "was", "were", "been", "do", "does", "did", "will", "would",
|
||||
"can", "could", "should", "may", "might", "me", "my", "i", "we",
|
||||
"you", "your", "please", "task", "need", "want", "make", "get",
|
||||
})
|
||||
|
||||
_WORD_RE = re.compile(r"[a-z]{3,}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentMetrics:
|
||||
"""Computed performance metrics for a single agent."""
|
||||
agent_id: str
|
||||
total_bids: int = 0
|
||||
auctions_won: int = 0
|
||||
tasks_completed: int = 0
|
||||
tasks_failed: int = 0
|
||||
avg_winning_bid: float = 0.0
|
||||
win_rate: float = 0.0
|
||||
success_rate: float = 0.0
|
||||
keyword_wins: dict[str, int] = field(default_factory=dict)
|
||||
keyword_failures: dict[str, int] = field(default_factory=dict)
|
||||
|
||||
|
||||
def _get_conn() -> sqlite3.Connection:
|
||||
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(str(DB_PATH))
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS task_outcomes (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
task_id TEXT NOT NULL,
|
||||
agent_id TEXT NOT NULL,
|
||||
description TEXT NOT NULL DEFAULT '',
|
||||
bid_sats INTEGER NOT NULL DEFAULT 0,
|
||||
won_auction INTEGER NOT NULL DEFAULT 0,
|
||||
task_succeeded INTEGER,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.commit()
|
||||
return conn
|
||||
|
||||
|
||||
def _extract_keywords(text: str) -> list[str]:
|
||||
"""Pull meaningful words from a task description."""
|
||||
words = _WORD_RE.findall(text.lower())
|
||||
return [w for w in words if w not in _STOP_WORDS]
|
||||
|
||||
|
||||
# ── Recording ────────────────────────────────────────────────────────────────
|
||||
|
||||
def record_outcome(
|
||||
task_id: str,
|
||||
agent_id: str,
|
||||
description: str,
|
||||
bid_sats: int,
|
||||
won_auction: bool,
|
||||
task_succeeded: Optional[bool] = None,
|
||||
) -> None:
|
||||
"""Record one agent's outcome for a task."""
|
||||
conn = _get_conn()
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO task_outcomes
|
||||
(task_id, agent_id, description, bid_sats, won_auction, task_succeeded)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
task_id,
|
||||
agent_id,
|
||||
description,
|
||||
bid_sats,
|
||||
int(won_auction),
|
||||
int(task_succeeded) if task_succeeded is not None else None,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def record_task_result(task_id: str, agent_id: str, succeeded: bool) -> int:
|
||||
"""Update the task_succeeded flag for an already-recorded winning outcome.
|
||||
|
||||
Returns the number of rows updated.
|
||||
"""
|
||||
conn = _get_conn()
|
||||
cursor = conn.execute(
|
||||
"""
|
||||
UPDATE task_outcomes
|
||||
SET task_succeeded = ?
|
||||
WHERE task_id = ? AND agent_id = ? AND won_auction = 1
|
||||
""",
|
||||
(int(succeeded), task_id, agent_id),
|
||||
)
|
||||
conn.commit()
|
||||
updated = cursor.rowcount
|
||||
conn.close()
|
||||
return updated
|
||||
|
||||
|
||||
# ── Metrics ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def get_metrics(agent_id: str) -> AgentMetrics:
|
||||
"""Compute performance metrics from stored outcomes."""
|
||||
conn = _get_conn()
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM task_outcomes WHERE agent_id = ?",
|
||||
(agent_id,),
|
||||
).fetchall()
|
||||
conn.close()
|
||||
|
||||
metrics = AgentMetrics(agent_id=agent_id)
|
||||
if not rows:
|
||||
return metrics
|
||||
|
||||
metrics.total_bids = len(rows)
|
||||
winning_bids: list[int] = []
|
||||
|
||||
for row in rows:
|
||||
won = bool(row["won_auction"])
|
||||
succeeded = row["task_succeeded"]
|
||||
keywords = _extract_keywords(row["description"])
|
||||
|
||||
if won:
|
||||
metrics.auctions_won += 1
|
||||
winning_bids.append(row["bid_sats"])
|
||||
if succeeded == 1:
|
||||
metrics.tasks_completed += 1
|
||||
for kw in keywords:
|
||||
metrics.keyword_wins[kw] = metrics.keyword_wins.get(kw, 0) + 1
|
||||
elif succeeded == 0:
|
||||
metrics.tasks_failed += 1
|
||||
for kw in keywords:
|
||||
metrics.keyword_failures[kw] = metrics.keyword_failures.get(kw, 0) + 1
|
||||
|
||||
metrics.win_rate = (
|
||||
metrics.auctions_won / metrics.total_bids if metrics.total_bids else 0.0
|
||||
)
|
||||
decided = metrics.tasks_completed + metrics.tasks_failed
|
||||
metrics.success_rate = (
|
||||
metrics.tasks_completed / decided if decided else 0.0
|
||||
)
|
||||
metrics.avg_winning_bid = (
|
||||
sum(winning_bids) / len(winning_bids) if winning_bids else 0.0
|
||||
)
|
||||
return metrics
|
||||
|
||||
|
||||
def get_all_metrics() -> dict[str, AgentMetrics]:
|
||||
"""Return metrics for every agent that has recorded outcomes."""
|
||||
conn = _get_conn()
|
||||
agent_ids = [
|
||||
row["agent_id"]
|
||||
for row in conn.execute(
|
||||
"SELECT DISTINCT agent_id FROM task_outcomes"
|
||||
).fetchall()
|
||||
]
|
||||
conn.close()
|
||||
return {aid: get_metrics(aid) for aid in agent_ids}
|
||||
|
||||
|
||||
# ── Bid intelligence ─────────────────────────────────────────────────────────
|
||||
|
||||
def suggest_bid(agent_id: str, task_description: str, base_bid: int) -> int:
|
||||
"""Adjust a base bid using learned performance data.
|
||||
|
||||
Returns the base_bid unchanged until the agent has enough history
|
||||
(>= _MIN_OUTCOMES). After that:
|
||||
|
||||
- Win rate too high (>80%): nudge bid up — still win, earn more.
|
||||
- Win rate too low (<20%): nudge bid down — be more competitive.
|
||||
- Success rate low on won tasks: nudge bid up — avoid winning tasks
|
||||
this agent tends to fail.
|
||||
- Strong keyword match from past wins: nudge bid down — this agent
|
||||
is proven on similar work.
|
||||
"""
|
||||
metrics = get_metrics(agent_id)
|
||||
if metrics.total_bids < _MIN_OUTCOMES:
|
||||
return base_bid
|
||||
|
||||
factor = 1.0
|
||||
|
||||
# Win-rate adjustment
|
||||
if metrics.win_rate > 0.8:
|
||||
factor *= 1.15 # bid higher, maximise revenue
|
||||
elif metrics.win_rate < 0.2:
|
||||
factor *= 0.85 # bid lower, be competitive
|
||||
|
||||
# Success-rate adjustment (only when enough completed tasks)
|
||||
decided = metrics.tasks_completed + metrics.tasks_failed
|
||||
if decided >= 2:
|
||||
if metrics.success_rate < 0.5:
|
||||
factor *= 1.25 # avoid winning bad matches
|
||||
elif metrics.success_rate > 0.8:
|
||||
factor *= 0.90 # we're good at this, lean in
|
||||
|
||||
# Keyword relevance from past wins
|
||||
task_keywords = _extract_keywords(task_description)
|
||||
if task_keywords:
|
||||
wins = sum(metrics.keyword_wins.get(kw, 0) for kw in task_keywords)
|
||||
fails = sum(metrics.keyword_failures.get(kw, 0) for kw in task_keywords)
|
||||
if wins > fails and wins >= 2:
|
||||
factor *= 0.90 # proven track record on these keywords
|
||||
elif fails > wins and fails >= 2:
|
||||
factor *= 1.15 # poor track record — back off
|
||||
|
||||
adjusted = int(base_bid * factor)
|
||||
return max(1, adjusted)
|
||||
|
||||
|
||||
def learned_keywords(agent_id: str) -> list[dict]:
|
||||
"""Return keywords ranked by net wins (wins minus failures).
|
||||
|
||||
Useful for discovering which task types an agent actually excels at,
|
||||
potentially different from its hardcoded preferred_keywords.
|
||||
"""
|
||||
metrics = get_metrics(agent_id)
|
||||
all_kw = set(metrics.keyword_wins) | set(metrics.keyword_failures)
|
||||
results = []
|
||||
for kw in all_kw:
|
||||
wins = metrics.keyword_wins.get(kw, 0)
|
||||
fails = metrics.keyword_failures.get(kw, 0)
|
||||
results.append({"keyword": kw, "wins": wins, "failures": fails, "net": wins - fails})
|
||||
results.sort(key=lambda x: x["net"], reverse=True)
|
||||
return results
|
||||
@@ -6,6 +6,8 @@ PersonaNode extends the base SwarmNode to:
|
||||
persona's preferred_keywords the node bids aggressively (bid_base ± jitter).
|
||||
Otherwise it bids at a higher, less-competitive rate.
|
||||
3. Register with the swarm registry under its persona's capabilities string.
|
||||
4. (Adaptive) Consult the swarm learner to adjust bids based on historical
|
||||
win/loss and success/failure data when available.
|
||||
|
||||
Usage (via coordinator):
|
||||
coordinator.spawn_persona("echo")
|
||||
@@ -35,6 +37,7 @@ class PersonaNode(SwarmNode):
|
||||
persona_id: str,
|
||||
agent_id: str,
|
||||
comms: Optional[SwarmComms] = None,
|
||||
use_learner: bool = True,
|
||||
) -> None:
|
||||
meta: PersonaMeta = PERSONAS[persona_id]
|
||||
super().__init__(
|
||||
@@ -45,6 +48,7 @@ class PersonaNode(SwarmNode):
|
||||
)
|
||||
self._meta = meta
|
||||
self._persona_id = persona_id
|
||||
self._use_learner = use_learner
|
||||
logger.debug("PersonaNode %s (%s) initialised", meta["name"], agent_id)
|
||||
|
||||
# ── Bid strategy ─────────────────────────────────────────────────────────
|
||||
@@ -54,6 +58,9 @@ class PersonaNode(SwarmNode):
|
||||
|
||||
Bids lower (more aggressively) when the description contains at least
|
||||
one of our preferred_keywords. Bids higher for off-spec tasks.
|
||||
|
||||
When the learner is enabled and the agent has enough history, the
|
||||
base bid is adjusted by learned performance metrics before jitter.
|
||||
"""
|
||||
desc_lower = task_description.lower()
|
||||
is_preferred = any(
|
||||
@@ -62,9 +69,19 @@ class PersonaNode(SwarmNode):
|
||||
base = self._meta["bid_base"]
|
||||
jitter = random.randint(0, self._meta["bid_jitter"])
|
||||
if is_preferred:
|
||||
return max(1, base - jitter)
|
||||
# Off-spec: inflate bid so we lose to the specialist
|
||||
return min(200, int(base * _OFF_SPEC_MULTIPLIER) + jitter)
|
||||
raw = max(1, base - jitter)
|
||||
else:
|
||||
# Off-spec: inflate bid so we lose to the specialist
|
||||
raw = min(200, int(base * _OFF_SPEC_MULTIPLIER) + jitter)
|
||||
|
||||
# Consult learner for adaptive adjustment
|
||||
if self._use_learner:
|
||||
try:
|
||||
from swarm.learner import suggest_bid
|
||||
return suggest_bid(self.agent_id, task_description, raw)
|
||||
except Exception:
|
||||
logger.debug("Learner unavailable, using static bid")
|
||||
return raw
|
||||
|
||||
def _on_task_posted(self, msg: SwarmMessage) -> None:
|
||||
"""Handle task announcement with persona-aware bidding."""
|
||||
|
||||
Reference in New Issue
Block a user