From c91e02e7c571497c19d557a0f43ec46c6d548193 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 25 Feb 2026 00:46:22 +0000 Subject: [PATCH] test: add functional test suite with real fixtures, no mocking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three-tier functional test infrastructure: - CLI tests via Typer CliRunner (timmy, timmy-serve, self-tdd) - Dashboard integration tests with real TestClient, real SQLite, real coordinator (no patch/mock — Ollama offline = graceful degradation) - Docker compose container-level tests (gated by FUNCTIONAL_DOCKER=1) - End-to-end L402 payment flow with real mock-lightning backend 42 new tests (8 Docker tests skipped without FUNCTIONAL_DOCKER=1). All 849 tests pass. https://claude.ai/code/session_01WU4h3cQQiouMwmgYmAgkMM --- docker-compose.test.yml | 70 +++++++++ tests/functional/__init__.py | 0 tests/functional/conftest.py | 178 +++++++++++++++++++++++ tests/functional/test_cli.py | 124 ++++++++++++++++ tests/functional/test_dashboard.py | 199 ++++++++++++++++++++++++++ tests/functional/test_docker_swarm.py | 150 +++++++++++++++++++ tests/functional/test_l402_flow.py | 106 ++++++++++++++ 7 files changed, 827 insertions(+) create mode 100644 docker-compose.test.yml create mode 100644 tests/functional/__init__.py create mode 100644 tests/functional/conftest.py create mode 100644 tests/functional/test_cli.py create mode 100644 tests/functional/test_dashboard.py create mode 100644 tests/functional/test_docker_swarm.py create mode 100644 tests/functional/test_l402_flow.py diff --git a/docker-compose.test.yml b/docker-compose.test.yml new file mode 100644 index 00000000..517dbdb2 --- /dev/null +++ b/docker-compose.test.yml @@ -0,0 +1,70 @@ +# ── Timmy Time — test stack ────────────────────────────────────────────────── +# +# Lightweight compose for functional tests. Runs the dashboard on port 18000 +# and optional agent workers on the swarm-test-net network. +# +# Usage: +# FUNCTIONAL_DOCKER=1 pytest tests/functional/test_docker_swarm.py -v +# +# Or manually: +# docker compose -f docker-compose.test.yml -p timmy-test up -d --build --wait +# curl http://localhost:18000/health +# docker compose -f docker-compose.test.yml -p timmy-test down -v + +services: + + dashboard: + build: . + image: timmy-time:test + container_name: timmy-test-dashboard + ports: + - "18000:8000" + volumes: + - test-data:/app/data + - ./src:/app/src + - ./static:/app/static + environment: + DEBUG: "true" + TIMMY_TEST_MODE: "1" + OLLAMA_URL: "http://host.docker.internal:11434" + LIGHTNING_BACKEND: "mock" + extra_hosts: + - "host.docker.internal:host-gateway" + networks: + - swarm-test-net + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 5s + timeout: 3s + retries: 10 + start_period: 10s + + agent: + build: . + image: timmy-time:test + profiles: + - agents + volumes: + - test-data:/app/data + - ./src:/app/src + environment: + COORDINATOR_URL: "http://dashboard:8000" + OLLAMA_URL: "http://host.docker.internal:11434" + AGENT_NAME: "${AGENT_NAME:-TestWorker}" + AGENT_CAPABILITIES: "${AGENT_CAPABILITIES:-general}" + TIMMY_TEST_MODE: "1" + extra_hosts: + - "host.docker.internal:host-gateway" + command: ["sh", "-c", "python -m swarm.agent_runner --agent-id agent-$(hostname) --name $${AGENT_NAME:-TestWorker}"] + networks: + - swarm-test-net + depends_on: + dashboard: + condition: service_healthy + +volumes: + test-data: + +networks: + swarm-test-net: + driver: bridge diff --git a/tests/functional/__init__.py b/tests/functional/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py new file mode 100644 index 00000000..01c64790 --- /dev/null +++ b/tests/functional/conftest.py @@ -0,0 +1,178 @@ +"""Functional test fixtures — real services, no mocking. + +These fixtures provide: +- TestClient hitting the real FastAPI app (singletons, SQLite, etc.) +- Typer CliRunner for CLI commands +- Real temporary SQLite for swarm state +- Real payment handler with mock lightning backend (LIGHTNING_BACKEND=mock) +- Docker compose lifecycle for container-level tests +""" + +import os +import subprocess +import sys +import time +from pathlib import Path +from unittest.mock import MagicMock + +import pytest +from fastapi.testclient import TestClient + +# ── Stub heavy optional deps (same as root conftest) ───────────────────────── +# These aren't mocks — they're import compatibility shims for packages +# not installed in the test environment. The code under test handles +# their absence via try/except ImportError. +for _mod in [ + "agno", "agno.agent", "agno.models", "agno.models.ollama", + "agno.db", "agno.db.sqlite", + "airllm", + "telegram", "telegram.ext", +]: + sys.modules.setdefault(_mod, MagicMock()) + +os.environ["TIMMY_TEST_MODE"] = "1" + + +# ── Isolation: fresh coordinator state per test ─────────────────────────────── + +@pytest.fixture(autouse=True) +def _isolate_state(): + """Reset all singleton state between tests so they can't leak.""" + from dashboard.store import message_log + message_log.clear() + yield + message_log.clear() + from swarm.coordinator import coordinator + coordinator.auctions._auctions.clear() + coordinator.comms._listeners.clear() + coordinator._in_process_nodes.clear() + coordinator.manager.stop_all() + try: + from swarm import routing + routing.routing_engine._manifests.clear() + except Exception: + pass + + +# ── TestClient with real app, no patches ────────────────────────────────────── + +@pytest.fixture +def app_client(tmp_path): + """TestClient wrapping the real dashboard app. + + Uses a tmp_path for swarm SQLite so tests don't pollute each other. + No mocking — Ollama is offline (graceful degradation), singletons are real. + """ + data_dir = tmp_path / "data" + data_dir.mkdir() + + import swarm.tasks as tasks_mod + import swarm.registry as registry_mod + original_tasks_db = tasks_mod.DB_PATH + original_reg_db = registry_mod.DB_PATH + + tasks_mod.DB_PATH = data_dir / "swarm.db" + registry_mod.DB_PATH = data_dir / "swarm.db" + + from dashboard.app import app + with TestClient(app) as c: + yield c + + tasks_mod.DB_PATH = original_tasks_db + registry_mod.DB_PATH = original_reg_db + + +# ── Timmy-serve TestClient ──────────────────────────────────────────────────── + +@pytest.fixture +def serve_client(): + """TestClient wrapping the timmy-serve L402 app. + + Uses real mock-lightning backend (LIGHTNING_BACKEND=mock). + """ + from timmy_serve.app import create_timmy_serve_app + + app = create_timmy_serve_app(price_sats=100) + with TestClient(app) as c: + yield c + + +# ── CLI runners ─────────────────────────────────────────────────────────────── + +@pytest.fixture +def timmy_runner(): + """Typer CliRunner + app for the `timmy` CLI.""" + from typer.testing import CliRunner + from timmy.cli import app + return CliRunner(), app + + +@pytest.fixture +def serve_runner(): + """Typer CliRunner + app for the `timmy-serve` CLI.""" + from typer.testing import CliRunner + from timmy_serve.cli import app + return CliRunner(), app + + +@pytest.fixture +def tdd_runner(): + """Typer CliRunner + app for the `self-tdd` CLI.""" + from typer.testing import CliRunner + from self_tdd.watchdog import app + return CliRunner(), app + + +# ── Docker compose lifecycle ────────────────────────────────────────────────── + +PROJECT_ROOT = Path(__file__).parent.parent.parent +COMPOSE_TEST = PROJECT_ROOT / "docker-compose.test.yml" + + +def _compose(*args, timeout=60): + """Run a docker compose command against the test compose file.""" + cmd = ["docker", "compose", "-f", str(COMPOSE_TEST), "-p", "timmy-test", *args] + return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, cwd=str(PROJECT_ROOT)) + + +def _wait_for_healthy(url: str, retries=30, interval=2): + """Poll a URL until it returns 200 or we run out of retries.""" + import httpx + for i in range(retries): + try: + r = httpx.get(url, timeout=5) + if r.status_code == 200: + return True + except Exception: + pass + time.sleep(interval) + return False + + +@pytest.fixture(scope="session") +def docker_stack(): + """Spin up the test compose stack once per session. + + Yields a base URL (http://localhost:18000) to hit the dashboard. + Tears down after all tests complete. + + Skipped unless FUNCTIONAL_DOCKER=1 is set. + """ + if not COMPOSE_TEST.exists(): + pytest.skip("docker-compose.test.yml not found") + if os.environ.get("FUNCTIONAL_DOCKER") != "1": + pytest.skip("Set FUNCTIONAL_DOCKER=1 to run Docker tests") + + result = _compose("up", "-d", "--build", "--wait", timeout=300) + if result.returncode != 0: + pytest.fail(f"docker compose up failed:\n{result.stderr}") + + base_url = "http://localhost:18000" + if not _wait_for_healthy(f"{base_url}/health"): + logs = _compose("logs") + _compose("down", "-v") + pytest.fail(f"Dashboard never became healthy:\n{logs.stdout}") + + yield base_url + + _compose("down", "-v", timeout=60) diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py new file mode 100644 index 00000000..c7930cc0 --- /dev/null +++ b/tests/functional/test_cli.py @@ -0,0 +1,124 @@ +"""Functional tests for CLI entry points via Typer's CliRunner. + +Each test invokes the real CLI command. Ollama is not running, so +commands that need inference will fail gracefully — and that's a valid +user scenario we want to verify. +""" + +import pytest + + +# ── timmy CLI ───────────────────────────────────────────────────────────────── + + +class TestTimmyCLI: + """Tests the `timmy` command (chat, think, status).""" + + def test_status_runs(self, timmy_runner): + runner, app = timmy_runner + result = runner.invoke(app, ["status"]) + # Ollama is offline, so this should either: + # - Print an error about Ollama being unreachable, OR + # - Exit non-zero + # Either way, the CLI itself shouldn't crash with an unhandled exception. + # The exit code tells us if the command ran at all. + assert result.exit_code is not None + + def test_chat_requires_message(self, timmy_runner): + runner, app = timmy_runner + result = runner.invoke(app, ["chat"]) + # Missing required argument + assert result.exit_code != 0 + assert "Missing argument" in result.output or "Usage" in result.output + + def test_think_requires_topic(self, timmy_runner): + runner, app = timmy_runner + result = runner.invoke(app, ["think"]) + assert result.exit_code != 0 + assert "Missing argument" in result.output or "Usage" in result.output + + def test_chat_with_message_runs(self, timmy_runner): + """Chat with a real message — Ollama offline means graceful failure.""" + runner, app = timmy_runner + result = runner.invoke(app, ["chat", "hello"]) + # Will fail because Ollama isn't running, but the CLI should handle it + assert result.exit_code is not None + + def test_backend_flag_accepted(self, timmy_runner): + runner, app = timmy_runner + result = runner.invoke(app, ["status", "--backend", "ollama"]) + assert result.exit_code is not None + + def test_help_text(self, timmy_runner): + runner, app = timmy_runner + result = runner.invoke(app, ["--help"]) + assert result.exit_code == 0 + assert "Timmy" in result.output or "sovereign" in result.output.lower() + + +# ── timmy-serve CLI ─────────────────────────────────────────────────────────── + + +class TestTimmyServeCLI: + """Tests the `timmy-serve` command (start, invoice, status).""" + + def test_start_dry_run(self, serve_runner): + """--dry-run should print config and exit cleanly.""" + runner, app = serve_runner + result = runner.invoke(app, ["start", "--dry-run"]) + assert result.exit_code == 0 + assert "Starting Timmy Serve" in result.output + assert "Dry run" in result.output or "dry run" in result.output + + def test_start_dry_run_custom_port(self, serve_runner): + runner, app = serve_runner + result = runner.invoke(app, ["start", "--dry-run", "--port", "9999"]) + assert result.exit_code == 0 + assert "9999" in result.output + + def test_start_dry_run_custom_price(self, serve_runner): + runner, app = serve_runner + result = runner.invoke(app, ["start", "--dry-run", "--price", "500"]) + assert result.exit_code == 0 + assert "500" in result.output + + def test_invoice_creates_real_invoice(self, serve_runner): + """Create a real Lightning invoice via the mock backend.""" + runner, app = serve_runner + result = runner.invoke(app, ["invoice", "--amount", "200", "--memo", "test invoice"]) + assert result.exit_code == 0 + assert "Invoice created" in result.output + assert "200" in result.output + assert "Payment hash" in result.output or "payment_hash" in result.output.lower() + + def test_status_shows_earnings(self, serve_runner): + runner, app = serve_runner + result = runner.invoke(app, ["status"]) + assert result.exit_code == 0 + assert "Total invoices" in result.output or "invoices" in result.output.lower() + assert "sats" in result.output.lower() + + def test_help_text(self, serve_runner): + runner, app = serve_runner + result = runner.invoke(app, ["--help"]) + assert result.exit_code == 0 + assert "Serve" in result.output or "Lightning" in result.output + + +# ── self-tdd CLI ────────────────────────────────────────────────────────────── + + +class TestSelfTddCLI: + """Tests the `self-tdd` command (watch).""" + + def test_help_text(self, tdd_runner): + runner, app = tdd_runner + result = runner.invoke(app, ["--help"]) + assert result.exit_code == 0 + assert "watchdog" in result.output.lower() or "test" in result.output.lower() + + def test_watch_help(self, tdd_runner): + runner, app = tdd_runner + result = runner.invoke(app, ["watch", "--help"]) + assert result.exit_code == 0 + assert "interval" in result.output.lower() diff --git a/tests/functional/test_dashboard.py b/tests/functional/test_dashboard.py new file mode 100644 index 00000000..c85948aa --- /dev/null +++ b/tests/functional/test_dashboard.py @@ -0,0 +1,199 @@ +"""Functional tests for the dashboard — real HTTP requests, no mocking. + +The dashboard runs with Ollama offline (graceful degradation). +These tests verify what a real user sees when they open the browser. +""" + +import pytest + + +class TestDashboardLoads: + """Verify the dashboard serves real HTML pages.""" + + def test_index_page(self, app_client): + response = app_client.get("/") + assert response.status_code == 200 + assert "text/html" in response.headers["content-type"] + # The real rendered page should have the base HTML structure + assert "= 1 # At least some registered + + _compose("--profile", "agents", "down", timeout=30) diff --git a/tests/functional/test_l402_flow.py b/tests/functional/test_l402_flow.py new file mode 100644 index 00000000..ca24c1b9 --- /dev/null +++ b/tests/functional/test_l402_flow.py @@ -0,0 +1,106 @@ +"""Functional test for the full L402 payment flow. + +Uses the real mock-lightning backend (LIGHTNING_BACKEND=mock) — no patching. +This exercises the entire payment lifecycle a real client would go through: + +1. Hit protected endpoint → get 402 + invoice + macaroon +2. "Pay" the invoice (settle via mock backend) +3. Present macaroon:preimage → get access +""" + +import pytest + + +class TestL402PaymentFlow: + """End-to-end L402 payment lifecycle.""" + + def test_unprotected_endpoints_work(self, serve_client): + """Status and health don't require payment.""" + resp = serve_client.get("/serve/status") + assert resp.status_code == 200 + data = resp.json() + assert data["status"] == "active" + assert data["price_sats"] == 100 + + health = serve_client.get("/health") + assert health.status_code == 200 + + def test_chat_without_payment_returns_402(self, serve_client): + """Hitting /serve/chat without an L402 token gives 402.""" + resp = serve_client.post( + "/serve/chat", + json={"message": "hello"}, + ) + assert resp.status_code == 402 + data = resp.json() + assert data["error"] == "Payment Required" + assert data["code"] == "L402" + assert "macaroon" in data + assert "invoice" in data + assert "payment_hash" in data + assert data["amount_sats"] == 100 + + # WWW-Authenticate header should be present + assert "WWW-Authenticate" in resp.headers + assert "L402" in resp.headers["WWW-Authenticate"] + + def test_chat_with_garbage_token_returns_402(self, serve_client): + resp = serve_client.post( + "/serve/chat", + json={"message": "hello"}, + headers={"Authorization": "L402 garbage:token"}, + ) + assert resp.status_code == 402 + + def test_full_payment_lifecycle(self, serve_client): + """Complete flow: get challenge → pay → access.""" + from timmy_serve.payment_handler import payment_handler + + # Step 1: Hit protected endpoint, get 402 challenge + challenge_resp = serve_client.post( + "/serve/chat", + json={"message": "hello"}, + ) + assert challenge_resp.status_code == 402 + challenge = challenge_resp.json() + macaroon = challenge["macaroon"] + payment_hash = challenge["payment_hash"] + + # Step 2: "Pay" the invoice via the mock backend's auto-settle + # The mock backend settles invoices when you provide the correct preimage. + # Get the preimage from the mock backend's internal state. + invoice = payment_handler.get_invoice(payment_hash) + assert invoice is not None + preimage = invoice.preimage # mock backend exposes this + + # Step 3: Present macaroon:preimage to access the endpoint + resp = serve_client.post( + "/serve/chat", + json={"message": "hello after paying"}, + headers={"Authorization": f"L402 {macaroon}:{preimage}"}, + ) + # The chat will fail because Ollama isn't running, but the + # L402 middleware should let us through (status != 402). + # We accept 200 (success) or 500 (Ollama offline) — NOT 402. + assert resp.status_code != 402 + + def test_create_invoice_via_api(self, serve_client): + """POST /serve/invoice creates a real invoice.""" + resp = serve_client.post( + "/serve/invoice", + json={"amount_sats": 500, "memo": "premium access"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["amount_sats"] == 500 + assert data["payment_hash"] + assert data["payment_request"] + + def test_status_reflects_invoices(self, serve_client): + """Creating invoices should be reflected in /serve/status.""" + serve_client.post("/serve/invoice", json={"amount_sats": 100, "memo": "test"}) + serve_client.post("/serve/invoice", json={"amount_sats": 200, "memo": "test2"}) + + resp = serve_client.get("/serve/status") + data = resp.json() + assert data["total_invoices"] >= 2