feat: complete Event Log, Ledger, Memory, Cascade Router, Upgrade Queue, Activity Feed

This commit implements six major features:

1. Event Log System (src/swarm/event_log.py)
   - SQLite-based audit trail for all swarm events
   - Task lifecycle tracking (created, assigned, completed, failed)
   - Agent lifecycle tracking (joined, left, status changes)
   - Integrated with coordinator for automatic logging
   - Dashboard page at /swarm/events

2. Lightning Ledger (src/lightning/ledger.py)
   - Transaction tracking for Lightning Network payments
   - Balance calculations (incoming, outgoing, net, available)
   - Integrated with payment_handler for automatic logging
   - Dashboard page at /lightning/ledger

3. Semantic Memory / Vector Store (src/memory/vector_store.py)
   - Embedding-based similarity search for Echo agent
   - Fallback to keyword matching if sentence-transformers unavailable
   - Personal facts storage and retrieval
   - Dashboard page at /memory

4. Cascade Router Integration (src/timmy/cascade_adapter.py)
   - Automatic LLM failover between providers (Ollama → AirLLM → API)
   - Circuit breaker pattern for failing providers
   - Metrics tracking per provider (latency, error rates)
   - Dashboard status page at /router/status

5. Self-Upgrade Approval Queue (src/upgrades/)
   - State machine for self-modifications: proposed → approved/rejected → applied/failed
   - Human approval required before applying changes
   - Git integration for branch management
   - Dashboard queue at /self-modify/queue

6. Real-Time Activity Feed (src/events/broadcaster.py)
   - WebSocket-based live activity streaming
   - Bridges event_log to dashboard clients
   - Activity panel on /swarm/live

Tests:
- 101 unit tests passing
- 4 new E2E test files for Selenium testing
- Run with: SELENIUM_UI=1 pytest tests/functional/ -v --headed

Documentation:
- 6 ADRs (017-022) documenting architecture decisions
- Implementation summary in docs/IMPLEMENTATION_SUMMARY.md
- Architecture diagram in docs/architecture-v2.md
This commit is contained in:
Alexander Payne
2026-02-26 08:01:01 -05:00
parent 8d85f95ee5
commit d8d976aa60
41 changed files with 6735 additions and 254 deletions

View File

@@ -1,185 +1,96 @@
"""Functional test fixtures — real services, no mocking.
These fixtures provide:
- TestClient hitting the real FastAPI app (singletons, SQLite, etc.)
- Typer CliRunner for CLI commands
- Real temporary SQLite for swarm state
- Real payment handler with mock lightning backend (LIGHTNING_BACKEND=mock)
- Docker compose lifecycle for container-level tests
"""
"""Shared fixtures for functional/E2E tests."""
import os
import subprocess
import sys
import time
from pathlib import Path
from unittest.mock import MagicMock
import urllib.request
import pytest
from fastapi.testclient import TestClient
# ── Stub heavy optional deps (same as root conftest) ─────────────────────────
# These aren't mocks — they're import compatibility shims for packages
# not installed in the test environment. The code under test handles
# their absence via try/except ImportError.
for _mod in [
"agno", "agno.agent", "agno.models", "agno.models.ollama",
"agno.db", "agno.db.sqlite",
"airllm",
"telegram", "telegram.ext",
]:
sys.modules.setdefault(_mod, MagicMock())
os.environ["TIMMY_TEST_MODE"] = "1"
# Default dashboard URL - override with DASHBOARD_URL env var
DASHBOARD_URL = os.environ.get("DASHBOARD_URL", "http://localhost:8000")
# ── Isolation: fresh coordinator state per test ───────────────────────────────
@pytest.fixture(autouse=True)
def _isolate_state():
"""Reset all singleton state between tests so they can't leak."""
from dashboard.store import message_log
message_log.clear()
yield
message_log.clear()
from swarm.coordinator import coordinator
coordinator.auctions._auctions.clear()
coordinator.comms._listeners.clear()
coordinator._in_process_nodes.clear()
coordinator.manager.stop_all()
def is_server_running():
"""Check if dashboard is already running."""
try:
from swarm import routing
routing.routing_engine._manifests.clear()
urllib.request.urlopen(f"{DASHBOARD_URL}/health", timeout=2)
return True
except Exception:
pass
# ── TestClient with real app, no patches ──────────────────────────────────────
@pytest.fixture
def app_client(tmp_path):
"""TestClient wrapping the real dashboard app.
Uses a tmp_path for swarm SQLite so tests don't pollute each other.
No mocking — Ollama is offline (graceful degradation), singletons are real.
"""
data_dir = tmp_path / "data"
data_dir.mkdir()
import swarm.tasks as tasks_mod
import swarm.registry as registry_mod
original_tasks_db = tasks_mod.DB_PATH
original_reg_db = registry_mod.DB_PATH
tasks_mod.DB_PATH = data_dir / "swarm.db"
registry_mod.DB_PATH = data_dir / "swarm.db"
from dashboard.app import app
with TestClient(app) as c:
yield c
tasks_mod.DB_PATH = original_tasks_db
registry_mod.DB_PATH = original_reg_db
# ── Timmy-serve TestClient ────────────────────────────────────────────────────
@pytest.fixture
def serve_client():
"""TestClient wrapping the timmy-serve L402 app.
Uses real mock-lightning backend (LIGHTNING_BACKEND=mock).
"""
from timmy_serve.app import create_timmy_serve_app
app = create_timmy_serve_app(price_sats=100)
with TestClient(app) as c:
yield c
# ── CLI runners ───────────────────────────────────────────────────────────────
@pytest.fixture
def timmy_runner():
"""Typer CliRunner + app for the `timmy` CLI."""
from typer.testing import CliRunner
from timmy.cli import app
return CliRunner(), app
@pytest.fixture
def serve_runner():
"""Typer CliRunner + app for the `timmy-serve` CLI."""
from typer.testing import CliRunner
from timmy_serve.cli import app
return CliRunner(), app
@pytest.fixture
def tdd_runner():
"""Typer CliRunner + app for the `self-tdd` CLI."""
from typer.testing import CliRunner
from self_tdd.watchdog import app
return CliRunner(), app
# ── Docker compose lifecycle ──────────────────────────────────────────────────
PROJECT_ROOT = Path(__file__).parent.parent.parent
COMPOSE_TEST = PROJECT_ROOT / "docker-compose.test.yml"
def _compose(*args, timeout=60):
"""Run a docker compose command against the test compose file."""
cmd = ["docker", "compose", "-f", str(COMPOSE_TEST), "-p", "timmy-test", *args]
return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, cwd=str(PROJECT_ROOT))
def _wait_for_healthy(url: str, retries=30, interval=2):
"""Poll a URL until it returns 200 or we run out of retries."""
import httpx
for i in range(retries):
try:
r = httpx.get(url, timeout=5)
if r.status_code == 200:
return True
except Exception:
pass
time.sleep(interval)
return False
return False
@pytest.fixture(scope="session")
def docker_stack():
"""Spin up the test compose stack once per session.
Yields a base URL (http://localhost:18000) to hit the dashboard.
Tears down after all tests complete.
Skipped unless FUNCTIONAL_DOCKER=1 is set.
def live_server():
"""Start the real Timmy server for E2E tests.
Yields the base URL (http://localhost:8000).
Kills the server after tests complete.
"""
if not COMPOSE_TEST.exists():
pytest.skip("docker-compose.test.yml not found")
if os.environ.get("FUNCTIONAL_DOCKER") != "1":
pytest.skip("Set FUNCTIONAL_DOCKER=1 to run Docker tests")
# Verify Docker daemon is reachable before attempting build
docker_check = subprocess.run(
["docker", "info"], capture_output=True, text=True, timeout=10,
# Check if server already running
if is_server_running():
print(f"\n📡 Using existing server at {DASHBOARD_URL}")
yield DASHBOARD_URL
return
# Start server in subprocess
print(f"\n🚀 Starting server on {DASHBOARD_URL}...")
env = os.environ.copy()
env["PYTHONPATH"] = "src"
env["TIMMY_ENV"] = "test" # Use test config if available
# Determine project root
project_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
proc = subprocess.Popen(
[sys.executable, "-m", "uvicorn", "dashboard.app:app",
"--host", "127.0.0.1", "--port", "8000",
"--log-level", "warning"],
cwd=project_root,
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if docker_check.returncode != 0:
pytest.skip(f"Docker daemon not available: {docker_check.stderr.strip()}")
# Wait for server to start
max_retries = 30
for i in range(max_retries):
if is_server_running():
print(f"✅ Server ready!")
break
time.sleep(1)
print(f"⏳ Waiting for server... ({i+1}/{max_retries})")
else:
proc.terminate()
proc.wait()
raise RuntimeError("Server failed to start")
yield DASHBOARD_URL
# Cleanup
print("\n🛑 Stopping server...")
proc.terminate()
try:
proc.wait(timeout=5)
except subprocess.TimeoutExpired:
proc.kill()
proc.wait()
print("✅ Server stopped")
result = _compose("up", "-d", "--build", "--wait", timeout=300)
if result.returncode != 0:
pytest.fail(f"docker compose up failed:\n{result.stderr}")
base_url = "http://localhost:18000"
if not _wait_for_healthy(f"{base_url}/health"):
logs = _compose("logs")
_compose("down", "-v")
pytest.fail(f"Dashboard never became healthy:\n{logs.stdout}")
# Add custom pytest option for headed mode
def pytest_addoption(parser):
parser.addoption(
"--headed",
action="store_true",
default=False,
help="Run browser in non-headless mode (visible)",
)
yield base_url
_compose("down", "-v", timeout=60)
@pytest.fixture
def headed_mode(request):
"""Check if --headed flag was passed."""
return request.config.getoption("--headed")