"""End-to-end tests with a real Ollama container. These tests spin up the full Docker stack **including** an Ollama container running a tiny model (qwen2.5:0.5b, ~400 MB, CPU-only). They verify that the dashboard can actually generate LLM responses — not just degrade gracefully when Ollama is offline. Run with: FUNCTIONAL_DOCKER=1 pytest tests/functional/test_ollama_chat.py -v Requirements: - Docker daemon running - ~1 GB free disk (Ollama image + model weights) - No GPU required — qwen2.5:0.5b runs fine on CPU The ``ollama_stack`` fixture brings up Ollama + dashboard via docker compose, pulls the model, and tears everything down after the session. """ import os import subprocess import time from pathlib import Path import pytest httpx = pytest.importorskip("httpx") PROJECT_ROOT = Path(__file__).parent.parent.parent COMPOSE_TEST = PROJECT_ROOT / "docker-compose.test.yml" # Tiny model that runs on CPU without GPU. ~400 MB download. TEST_MODEL = os.environ.get("OLLAMA_TEST_MODEL", "qwen2.5:0.5b") # ── helpers ────────────────────────────────────────────────────────────────── def _compose(*args, timeout=120): """Run a docker compose command against the test compose file.""" cmd = [ "docker", "compose", "-f", str(COMPOSE_TEST), "-p", "timmy-test", *args, ] return subprocess.run( cmd, capture_output=True, text=True, timeout=timeout, cwd=str(PROJECT_ROOT), ) def _wait_for_healthy(url: str, retries=40, interval=3): """Poll *url* until it returns HTTP 200.""" for _ in range(retries): try: r = httpx.get(url, timeout=5) if r.status_code == 200: return True except Exception: pass time.sleep(interval) return False def _pull_model(model: str, retries=3): """Ask the containerised Ollama to pull *model*.""" for attempt in range(retries): result = subprocess.run( [ "docker", "exec", "timmy-test-ollama", "ollama", "pull", model, ], capture_output=True, text=True, timeout=600, ) if result.returncode == 0: return True time.sleep(5 * (attempt + 1)) return False # ── fixtures ───────────────────────────────────────────────────────────────── @pytest.fixture(scope="session") def ollama_stack(): """Bring up Ollama + dashboard, pull the test model, yield base URL. Skipped unless ``FUNCTIONAL_DOCKER=1`` is set and Docker is available. """ if os.environ.get("FUNCTIONAL_DOCKER") != "1": pytest.skip("Set FUNCTIONAL_DOCKER=1 to run Docker tests") if not COMPOSE_TEST.exists(): pytest.skip("docker-compose.test.yml not found") # Verify Docker daemon docker_check = subprocess.run( ["docker", "info"], capture_output=True, text=True, timeout=10, ) if docker_check.returncode != 0: pytest.skip(f"Docker daemon not available: {docker_check.stderr.strip()}") # Bring up Ollama + dashboard with the ollama profile. # OLLAMA_URL tells the dashboard to reach the sibling container. env = { **os.environ, "OLLAMA_URL": "http://ollama:11434", "OLLAMA_MODEL": TEST_MODEL, } result = subprocess.run( [ "docker", "compose", "-f", str(COMPOSE_TEST), "-p", "timmy-test", "--profile", "ollama", "up", "-d", "--build", "--wait", ], capture_output=True, text=True, timeout=300, cwd=str(PROJECT_ROOT), env=env, ) if result.returncode != 0: pytest.fail(f"docker compose up failed:\n{result.stderr}") # Wait for Ollama to be ready ollama_ready = _wait_for_healthy("http://localhost:18000/health") if not ollama_ready: logs = _compose("logs") _compose("--profile", "ollama", "down", "-v") pytest.fail(f"Stack never became healthy:\n{logs.stdout}") # Pull the tiny test model into the Ollama container if not _pull_model(TEST_MODEL): logs = _compose("logs", "ollama") _compose("--profile", "ollama", "down", "-v") pytest.fail(f"Failed to pull {TEST_MODEL}:\n{logs.stdout}") yield "http://localhost:18000" # Teardown subprocess.run( [ "docker", "compose", "-f", str(COMPOSE_TEST), "-p", "timmy-test", "--profile", "ollama", "down", "-v", ], capture_output=True, text=True, timeout=60, cwd=str(PROJECT_ROOT), ) # ── tests ──────────────────────────────────────────────────────────────────── class TestOllamaHealth: """Verify the dashboard can reach the Ollama container.""" def test_health_reports_ollama_up(self, ollama_stack): """GET /health should report ollama as 'up'.""" resp = httpx.get(f"{ollama_stack}/health", timeout=10) assert resp.status_code == 200 data = resp.json() services = data.get("services", {}) assert services.get("ollama") == "up", f"Expected ollama=up, got: {services}" class TestOllamaChat: """Send a real prompt through the dashboard and get an LLM response.""" def test_chat_returns_llm_response(self, ollama_stack): """POST /agents/timmy/chat with a real message → non-error HTML.""" resp = httpx.post( f"{ollama_stack}/agents/timmy/chat", data={"message": "Say hello in exactly three words."}, timeout=120, # first inference can be slow on CPU ) assert resp.status_code == 200 body = resp.text.lower() # The response should contain actual content, not an error fallback assert "error" not in body or "hello" in body, ( f"Expected LLM response, got error:\n{resp.text[:500]}" ) def test_chat_history_contains_response(self, ollama_stack): """After chatting, history should include both user and agent messages.""" # Send a message httpx.post( f"{ollama_stack}/agents/timmy/chat", data={"message": "What is 2+2?"}, timeout=120, ) # Fetch history hist = httpx.get(f"{ollama_stack}/agents/timmy/history", timeout=10) assert hist.status_code == 200 body = hist.text.lower() assert "2+2" in body or "2 + 2" in body def test_multiple_turns(self, ollama_stack): """Verify the agent handles a second turn without crashing.""" # First turn r1 = httpx.post( f"{ollama_stack}/agents/timmy/chat", data={"message": "Remember the word: pineapple"}, timeout=120, ) assert r1.status_code == 200 # Second turn r2 = httpx.post( f"{ollama_stack}/agents/timmy/chat", data={"message": "What word did I just ask you to remember?"}, timeout=120, ) assert r2.status_code == 200 # We don't assert "pineapple" — tiny models have weak memory. # The point is it doesn't crash on multi-turn. class TestOllamaDirectAPI: """Hit the Ollama container directly to verify the model is loaded.""" def test_ollama_api_tags(self, ollama_stack): """Ollama /api/tags should list the pulled test model.""" # Ollama isn't port-mapped, so we exec into the container result = subprocess.run( [ "docker", "exec", "timmy-test-ollama", "curl", "-sf", "http://localhost:11434/api/tags", ], capture_output=True, text=True, timeout=10, ) assert result.returncode == 0 assert TEST_MODEL.split(":")[0] in result.stdout