feat: complete Event Log, Ledger, Memory, Cascade Router, Upgrade Queue, Activity Feed

This commit implements six major features:

1. Event Log System (src/swarm/event_log.py)
   - SQLite-based audit trail for all swarm events
   - Task lifecycle tracking (created, assigned, completed, failed)
   - Agent lifecycle tracking (joined, left, status changes)
   - Integrated with coordinator for automatic logging
   - Dashboard page at /swarm/events

2. Lightning Ledger (src/lightning/ledger.py)
   - Transaction tracking for Lightning Network payments
   - Balance calculations (incoming, outgoing, net, available)
   - Integrated with payment_handler for automatic logging
   - Dashboard page at /lightning/ledger

3. Semantic Memory / Vector Store (src/memory/vector_store.py)
   - Embedding-based similarity search for Echo agent
   - Fallback to keyword matching if sentence-transformers unavailable
   - Personal facts storage and retrieval
   - Dashboard page at /memory

4. Cascade Router Integration (src/timmy/cascade_adapter.py)
   - Automatic LLM failover between providers (Ollama → AirLLM → API)
   - Circuit breaker pattern for failing providers
   - Metrics tracking per provider (latency, error rates)
   - Dashboard status page at /router/status

5. Self-Upgrade Approval Queue (src/upgrades/)
   - State machine for self-modifications: proposed → approved/rejected → applied/failed
   - Human approval required before applying changes
   - Git integration for branch management
   - Dashboard queue at /self-modify/queue

6. Real-Time Activity Feed (src/events/broadcaster.py)
   - WebSocket-based live activity streaming
   - Bridges event_log to dashboard clients
   - Activity panel on /swarm/live

Tests:
- 101 unit tests passing
- 4 new E2E test files for Selenium testing
- Run with: SELENIUM_UI=1 pytest tests/functional/ -v --headed

Documentation:
- 6 ADRs (017-022) documenting architecture decisions
- Implementation summary in docs/IMPLEMENTATION_SUMMARY.md
- Architecture diagram in docs/architecture-v2.md
This commit is contained in:
Alexander Payne
2026-02-26 08:01:01 -05:00
parent 8d85f95ee5
commit d8d976aa60
41 changed files with 6735 additions and 254 deletions

483
src/memory/vector_store.py Normal file
View File

@@ -0,0 +1,483 @@
"""Vector store for semantic memory using sqlite-vss.
Provides embedding-based similarity search for the Echo agent
to retrieve relevant context from conversation history.
"""
import json
import sqlite3
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
DB_PATH = Path("data/swarm.db")
# Simple embedding function using sentence-transformers if available,
# otherwise fall back to keyword-based "pseudo-embeddings"
try:
from sentence_transformers import SentenceTransformer
_model = SentenceTransformer('all-MiniLM-L6-v2')
_has_embeddings = True
except ImportError:
_has_embeddings = False
_model = None
def _get_embedding_dimension() -> int:
"""Get the dimension of embeddings."""
if _has_embeddings and _model:
return _model.get_sentence_embedding_dimension()
return 384 # Default for all-MiniLM-L6-v2
def _compute_embedding(text: str) -> list[float]:
"""Compute embedding vector for text.
Uses sentence-transformers if available, otherwise returns
a simple hash-based vector for basic similarity.
"""
if _has_embeddings and _model:
return _model.encode(text).tolist()
# Fallback: simple character n-gram hash embedding
# Not as good but allows the system to work without heavy deps
dim = 384
vec = [0.0] * dim
text = text.lower()
# Generate character trigram features
for i in range(len(text) - 2):
trigram = text[i:i+3]
hash_val = hash(trigram) % dim
vec[hash_val] += 1.0
# Normalize
norm = sum(x*x for x in vec) ** 0.5
if norm > 0:
vec = [x/norm for x in vec]
return vec
@dataclass
class MemoryEntry:
"""A memory entry with vector embedding."""
id: str = field(default_factory=lambda: str(uuid.uuid4()))
content: str = "" # The actual text content
source: str = "" # Where it came from (agent, user, system)
context_type: str = "conversation" # conversation, document, fact, etc.
agent_id: Optional[str] = None
task_id: Optional[str] = None
session_id: Optional[str] = None
metadata: Optional[dict] = None
embedding: Optional[list[float]] = None
timestamp: str = field(
default_factory=lambda: datetime.now(timezone.utc).isoformat()
)
relevance_score: Optional[float] = None # Set during search
def _get_conn() -> sqlite3.Connection:
"""Get database connection with vector extension."""
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(DB_PATH))
conn.row_factory = sqlite3.Row
# Try to load sqlite-vss extension
try:
conn.enable_load_extension(True)
conn.load_extension("vector0")
conn.load_extension("vss0")
_has_vss = True
except Exception:
_has_vss = False
# Create tables
conn.execute(
"""
CREATE TABLE IF NOT EXISTS memory_entries (
id TEXT PRIMARY KEY,
content TEXT NOT NULL,
source TEXT NOT NULL,
context_type TEXT NOT NULL DEFAULT 'conversation',
agent_id TEXT,
task_id TEXT,
session_id TEXT,
metadata TEXT,
embedding TEXT, -- JSON array of floats
timestamp TEXT NOT NULL
)
"""
)
# Create indexes
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memory_agent ON memory_entries(agent_id)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memory_task ON memory_entries(task_id)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memory_session ON memory_entries(session_id)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memory_time ON memory_entries(timestamp)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memory_type ON memory_entries(context_type)"
)
conn.commit()
return conn
def store_memory(
content: str,
source: str,
context_type: str = "conversation",
agent_id: Optional[str] = None,
task_id: Optional[str] = None,
session_id: Optional[str] = None,
metadata: Optional[dict] = None,
compute_embedding: bool = True,
) -> MemoryEntry:
"""Store a memory entry with optional embedding.
Args:
content: The text content to store
source: Source of the memory (agent name, user, system)
context_type: Type of context (conversation, document, fact)
agent_id: Associated agent ID
task_id: Associated task ID
session_id: Session identifier
metadata: Additional structured data
compute_embedding: Whether to compute vector embedding
Returns:
The stored MemoryEntry
"""
embedding = None
if compute_embedding:
embedding = _compute_embedding(content)
entry = MemoryEntry(
content=content,
source=source,
context_type=context_type,
agent_id=agent_id,
task_id=task_id,
session_id=session_id,
metadata=metadata,
embedding=embedding,
)
conn = _get_conn()
conn.execute(
"""
INSERT INTO memory_entries
(id, content, source, context_type, agent_id, task_id, session_id,
metadata, embedding, timestamp)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
entry.id,
entry.content,
entry.source,
entry.context_type,
entry.agent_id,
entry.task_id,
entry.session_id,
json.dumps(metadata) if metadata else None,
json.dumps(embedding) if embedding else None,
entry.timestamp,
),
)
conn.commit()
conn.close()
return entry
def search_memories(
query: str,
limit: int = 10,
context_type: Optional[str] = None,
agent_id: Optional[str] = None,
session_id: Optional[str] = None,
min_relevance: float = 0.0,
) -> list[MemoryEntry]:
"""Search for memories by semantic similarity.
Args:
query: Search query text
limit: Maximum results
context_type: Filter by context type
agent_id: Filter by agent
session_id: Filter by session
min_relevance: Minimum similarity score (0-1)
Returns:
List of MemoryEntry objects sorted by relevance
"""
query_embedding = _compute_embedding(query)
conn = _get_conn()
# Build query with filters
conditions = []
params = []
if context_type:
conditions.append("context_type = ?")
params.append(context_type)
if agent_id:
conditions.append("agent_id = ?")
params.append(agent_id)
if session_id:
conditions.append("session_id = ?")
params.append(session_id)
where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
# Fetch candidates (we'll do in-memory similarity for now)
# For production with sqlite-vss, this would use vector similarity index
query_sql = f"""
SELECT * FROM memory_entries
{where_clause}
ORDER BY timestamp DESC
LIMIT ?
"""
params.append(limit * 3) # Get more candidates for ranking
rows = conn.execute(query_sql, params).fetchall()
conn.close()
# Compute similarity scores
results = []
for row in rows:
entry = MemoryEntry(
id=row["id"],
content=row["content"],
source=row["source"],
context_type=row["context_type"],
agent_id=row["agent_id"],
task_id=row["task_id"],
session_id=row["session_id"],
metadata=json.loads(row["metadata"]) if row["metadata"] else None,
embedding=json.loads(row["embedding"]) if row["embedding"] else None,
timestamp=row["timestamp"],
)
if entry.embedding:
# Cosine similarity
score = _cosine_similarity(query_embedding, entry.embedding)
entry.relevance_score = score
if score >= min_relevance:
results.append(entry)
else:
# Fallback: check for keyword overlap
score = _keyword_overlap(query, entry.content)
entry.relevance_score = score
if score >= min_relevance:
results.append(entry)
# Sort by relevance and return top results
results.sort(key=lambda x: x.relevance_score or 0, reverse=True)
return results[:limit]
def _cosine_similarity(a: list[float], b: list[float]) -> float:
"""Compute cosine similarity between two vectors."""
dot = sum(x*y for x, y in zip(a, b))
norm_a = sum(x*x for x in a) ** 0.5
norm_b = sum(x*x for x in b) ** 0.5
if norm_a == 0 or norm_b == 0:
return 0.0
return dot / (norm_a * norm_b)
def _keyword_overlap(query: str, content: str) -> float:
"""Simple keyword overlap score as fallback."""
query_words = set(query.lower().split())
content_words = set(content.lower().split())
if not query_words:
return 0.0
overlap = len(query_words & content_words)
return overlap / len(query_words)
def get_memory_context(
query: str,
max_tokens: int = 2000,
**filters
) -> str:
"""Get relevant memory context as formatted text for LLM prompts.
Args:
query: Search query
max_tokens: Approximate maximum tokens to return
**filters: Additional filters (agent_id, session_id, etc.)
Returns:
Formatted context string for inclusion in prompts
"""
memories = search_memories(query, limit=20, **filters)
context_parts = []
total_chars = 0
max_chars = max_tokens * 4 # Rough approximation
for mem in memories:
formatted = f"[{mem.source}]: {mem.content}"
if total_chars + len(formatted) > max_chars:
break
context_parts.append(formatted)
total_chars += len(formatted)
if not context_parts:
return ""
return "Relevant context from memory:\n" + "\n\n".join(context_parts)
def recall_personal_facts(agent_id: Optional[str] = None) -> list[str]:
"""Recall personal facts about the user or system.
Args:
agent_id: Optional agent filter
Returns:
List of fact strings
"""
conn = _get_conn()
if agent_id:
rows = conn.execute(
"""
SELECT content FROM memory_entries
WHERE context_type = 'fact' AND agent_id = ?
ORDER BY timestamp DESC
LIMIT 100
""",
(agent_id,),
).fetchall()
else:
rows = conn.execute(
"""
SELECT content FROM memory_entries
WHERE context_type = 'fact'
ORDER BY timestamp DESC
LIMIT 100
""",
).fetchall()
conn.close()
return [r["content"] for r in rows]
def store_personal_fact(fact: str, agent_id: Optional[str] = None) -> MemoryEntry:
"""Store a personal fact about the user or system.
Args:
fact: The fact to store
agent_id: Associated agent
Returns:
The stored MemoryEntry
"""
return store_memory(
content=fact,
source="system",
context_type="fact",
agent_id=agent_id,
metadata={"auto_extracted": False},
)
def delete_memory(memory_id: str) -> bool:
"""Delete a memory entry by ID.
Returns:
True if deleted, False if not found
"""
conn = _get_conn()
cursor = conn.execute(
"DELETE FROM memory_entries WHERE id = ?",
(memory_id,),
)
conn.commit()
deleted = cursor.rowcount > 0
conn.close()
return deleted
def get_memory_stats() -> dict:
"""Get statistics about the memory store.
Returns:
Dict with counts by type, total entries, etc.
"""
conn = _get_conn()
total = conn.execute(
"SELECT COUNT(*) as count FROM memory_entries"
).fetchone()["count"]
by_type = {}
rows = conn.execute(
"SELECT context_type, COUNT(*) as count FROM memory_entries GROUP BY context_type"
).fetchall()
for row in rows:
by_type[row["context_type"]] = row["count"]
with_embeddings = conn.execute(
"SELECT COUNT(*) as count FROM memory_entries WHERE embedding IS NOT NULL"
).fetchone()["count"]
conn.close()
return {
"total_entries": total,
"by_type": by_type,
"with_embeddings": with_embeddings,
"has_embedding_model": _has_embeddings,
}
def prune_memories(older_than_days: int = 90, keep_facts: bool = True) -> int:
"""Delete old memories to manage storage.
Args:
older_than_days: Delete memories older than this
keep_facts: Whether to preserve fact-type memories
Returns:
Number of entries deleted
"""
from datetime import timedelta
cutoff = (datetime.now(timezone.utc) - timedelta(days=older_than_days)).isoformat()
conn = _get_conn()
if keep_facts:
cursor = conn.execute(
"""
DELETE FROM memory_entries
WHERE timestamp < ? AND context_type != 'fact'
""",
(cutoff,),
)
else:
cursor = conn.execute(
"DELETE FROM memory_entries WHERE timestamp < ?",
(cutoff,),
)
deleted = cursor.rowcount
conn.commit()
conn.close()
return deleted