feat: complete Event Log, Ledger, Memory, Cascade Router, Upgrade Queue, Activity Feed

This commit implements six major features: 1. Event Log System (src/swarm/event_log.py) - SQLite-based audit trail for all swarm events - Task lifecycle tracking (created, assigned, completed, failed) - Agent lifecycle tracking (joined, left, status changes) - Integrated with coordinator for automatic logging - Dashboard page at /swarm/events 2. Lightning Ledger (src/lightning/ledger.py) - Transaction tracking for Lightning Network payments - Balance calculations (incoming, outgoing, net, available) - Integrated with payment_handler for automatic logging - Dashboard page at /lightning/ledger 3. Semantic Memory / Vector Store (src/memory/vector_store.py) - Embedding-based similarity search for Echo agent - Fallback to keyword matching if sentence-transformers unavailable - Personal facts storage and retrieval - Dashboard page at /memory 4. Cascade Router Integration (src/timmy/cascade_adapter.py) - Automatic LLM failover between providers (Ollama → AirLLM → API) - Circuit breaker pattern for failing providers - Metrics tracking per provider (latency, error rates) - Dashboard status page at /router/status 5. Self-Upgrade Approval Queue (src/upgrades/) - State machine for self-modifications: proposed → approved/rejected → applied/failed - Human approval required before applying changes - Git integration for branch management - Dashboard queue at /self-modify/queue 6. Real-Time Activity Feed (src/events/broadcaster.py) - WebSocket-based live activity streaming - Bridges event_log to dashboard clients - Activity panel on /swarm/live Tests: - 101 unit tests passing - 4 new E2E test files for Selenium testing - Run with: SELENIUM_UI=1 pytest tests/functional/ -v --headed Documentation: - 6 ADRs (017-022) documenting architecture decisions - Implementation summary in docs/IMPLEMENTATION_SUMMARY.md - Architecture diagram in docs/architecture-v2.md
2026-02-26 08:01:01 -05:00
parent 8d85f95ee5
commit d8d976aa60
41 changed files with 6735 additions and 254 deletions
--- a/tests/functional/conftest.py
+++ b/tests/functional/conftest.py
@@ -1,185 +1,96 @@
-"""Functional test fixtures — real services, no mocking.
-
-These fixtures provide:
- TestClient hitting the real FastAPI app (singletons, SQLite, etc.)
- Typer CliRunner for CLI commands
- Real temporary SQLite for swarm state
- Real payment handler with mock lightning backend (LIGHTNING_BACKEND=mock)
- Docker compose lifecycle for container-level tests
-"""
+"""Shared fixtures for functional/E2E tests."""

 import os
 import subprocess
 import sys
 import time
-from pathlib import Path
-from unittest.mock import MagicMock
+import urllib.request

 import pytest
-from fastapi.testclient import TestClient

-# ── Stub heavy optional deps (same as root conftest) ─────────────────────────
-# These aren't mocks — they're import compatibility shims for packages
-# not installed in the test environment.  The code under test handles
-# their absence via try/except ImportError.
-for _mod in [
-    "agno", "agno.agent", "agno.models", "agno.models.ollama",
-    "agno.db", "agno.db.sqlite",
-    "airllm",
-    "telegram", "telegram.ext",
-]:
-    sys.modules.setdefault(_mod, MagicMock())
-
-os.environ["TIMMY_TEST_MODE"] = "1"
+# Default dashboard URL - override with DASHBOARD_URL env var
+DASHBOARD_URL = os.environ.get("DASHBOARD_URL", "http://localhost:8000")


-# ── Isolation: fresh coordinator state per test ───────────────────────────────
-
-@pytest.fixture(autouse=True)
-def _isolate_state():
-    """Reset all singleton state between tests so they can't leak."""
-    from dashboard.store import message_log
-    message_log.clear()
-    yield
-    message_log.clear()
-    from swarm.coordinator import coordinator
-    coordinator.auctions._auctions.clear()
-    coordinator.comms._listeners.clear()
-    coordinator._in_process_nodes.clear()
-    coordinator.manager.stop_all()
+def is_server_running():
+    """Check if dashboard is already running."""
    try:
-        from swarm import routing
-        routing.routing_engine._manifests.clear()
+        urllib.request.urlopen(f"{DASHBOARD_URL}/health", timeout=2)
+        return True
    except Exception:
-        pass
-
-
-# ── TestClient with real app, no patches ──────────────────────────────────────
-
-@pytest.fixture
-def app_client(tmp_path):
-    """TestClient wrapping the real dashboard app.
-
-    Uses a tmp_path for swarm SQLite so tests don't pollute each other.
-    No mocking — Ollama is offline (graceful degradation), singletons are real.
-    """
-    data_dir = tmp_path / "data"
-    data_dir.mkdir()
-
-    import swarm.tasks as tasks_mod
-    import swarm.registry as registry_mod
-    original_tasks_db = tasks_mod.DB_PATH
-    original_reg_db = registry_mod.DB_PATH
-
-    tasks_mod.DB_PATH = data_dir / "swarm.db"
-    registry_mod.DB_PATH = data_dir / "swarm.db"
-
-    from dashboard.app import app
-    with TestClient(app) as c:
-        yield c
-
-    tasks_mod.DB_PATH = original_tasks_db
-    registry_mod.DB_PATH = original_reg_db
-
-
-# ── Timmy-serve TestClient ────────────────────────────────────────────────────
-
-@pytest.fixture
-def serve_client():
-    """TestClient wrapping the timmy-serve L402 app.
-
-    Uses real mock-lightning backend (LIGHTNING_BACKEND=mock).
-    """
-    from timmy_serve.app import create_timmy_serve_app
-
-    app = create_timmy_serve_app(price_sats=100)
-    with TestClient(app) as c:
-        yield c
-
-
-# ── CLI runners ───────────────────────────────────────────────────────────────
-
-@pytest.fixture
-def timmy_runner():
-    """Typer CliRunner + app for the `timmy` CLI."""
-    from typer.testing import CliRunner
-    from timmy.cli import app
-    return CliRunner(), app
-
-
-@pytest.fixture
-def serve_runner():
-    """Typer CliRunner + app for the `timmy-serve` CLI."""
-    from typer.testing import CliRunner
-    from timmy_serve.cli import app
-    return CliRunner(), app
-
-
-@pytest.fixture
-def tdd_runner():
-    """Typer CliRunner + app for the `self-tdd` CLI."""
-    from typer.testing import CliRunner
-    from self_tdd.watchdog import app
-    return CliRunner(), app
-
-
-# ── Docker compose lifecycle ──────────────────────────────────────────────────
-
-PROJECT_ROOT = Path(__file__).parent.parent.parent
-COMPOSE_TEST = PROJECT_ROOT / "docker-compose.test.yml"
-
-
-def _compose(*args, timeout=60):
-    """Run a docker compose command against the test compose file."""
-    cmd = ["docker", "compose", "-f", str(COMPOSE_TEST), "-p", "timmy-test", *args]
-    return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, cwd=str(PROJECT_ROOT))
-
-
-def _wait_for_healthy(url: str, retries=30, interval=2):
-    """Poll a URL until it returns 200 or we run out of retries."""
-    import httpx
-    for i in range(retries):
-        try:
-            r = httpx.get(url, timeout=5)
-            if r.status_code == 200:
-                return True
-        except Exception:
-            pass
-        time.sleep(interval)
-    return False
+        return False


@pytest.fixture(scope="session")
-def docker_stack():
-    """Spin up the test compose stack once per session.
-
-    Yields a base URL (http://localhost:18000) to hit the dashboard.
-    Tears down after all tests complete.
-
-    Skipped unless FUNCTIONAL_DOCKER=1 is set.
+def live_server():
+    """Start the real Timmy server for E2E tests.
+    
+    Yields the base URL (http://localhost:8000).
+    Kills the server after tests complete.
    """
-    if not COMPOSE_TEST.exists():
-        pytest.skip("docker-compose.test.yml not found")
-    if os.environ.get("FUNCTIONAL_DOCKER") != "1":
-        pytest.skip("Set FUNCTIONAL_DOCKER=1 to run Docker tests")
-
-    # Verify Docker daemon is reachable before attempting build
-    docker_check = subprocess.run(
-        ["docker", "info"], capture_output=True, text=True, timeout=10,
+    # Check if server already running
+    if is_server_running():
+        print(f"\n📡 Using existing server at {DASHBOARD_URL}")
+        yield DASHBOARD_URL
+        return
+    
+    # Start server in subprocess
+    print(f"\n🚀 Starting server on {DASHBOARD_URL}...")
+    
+    env = os.environ.copy()
+    env["PYTHONPATH"] = "src"
+    env["TIMMY_ENV"] = "test"  # Use test config if available
+    
+    # Determine project root
+    project_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+    
+    proc = subprocess.Popen(
+        [sys.executable, "-m", "uvicorn", "dashboard.app:app", 
+         "--host", "127.0.0.1", "--port", "8000",
+         "--log-level", "warning"],
+        cwd=project_root,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
    )
-    if docker_check.returncode != 0:
-        pytest.skip(f"Docker daemon not available: {docker_check.stderr.strip()}")
+    
+    # Wait for server to start
+    max_retries = 30
+    for i in range(max_retries):
+        if is_server_running():
+            print(f"✅ Server ready!")
+            break
+        time.sleep(1)
+        print(f"⏳ Waiting for server... ({i+1}/{max_retries})")
+    else:
+        proc.terminate()
+        proc.wait()
+        raise RuntimeError("Server failed to start")
+    
+    yield DASHBOARD_URL
+    
+    # Cleanup
+    print("\n🛑 Stopping server...")
+    proc.terminate()
+    try:
+        proc.wait(timeout=5)
+    except subprocess.TimeoutExpired:
+        proc.kill()
+        proc.wait()
+    print("✅ Server stopped")

-    result = _compose("up", "-d", "--build", "--wait", timeout=300)
-    if result.returncode != 0:
-        pytest.fail(f"docker compose up failed:\n{result.stderr}")

-    base_url = "http://localhost:18000"
-    if not _wait_for_healthy(f"{base_url}/health"):
-        logs = _compose("logs")
-        _compose("down", "-v")
-        pytest.fail(f"Dashboard never became healthy:\n{logs.stdout}")
+# Add custom pytest option for headed mode
+def pytest_addoption(parser):
+    parser.addoption(
+        "--headed",
+        action="store_true",
+        default=False,
+        help="Run browser in non-headless mode (visible)",
+    )

-    yield base_url

-    _compose("down", "-v", timeout=60)
+@pytest.fixture
+def headed_mode(request):
+    """Check if --headed flag was passed."""
+    return request.config.getoption("--headed")