forked from Rockachopa/Timmy-time-dashboard
* polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
272 lines
8.4 KiB
Python
272 lines
8.4 KiB
Python
"""End-to-end tests with a real Ollama container.
|
|
|
|
These tests spin up the full Docker stack **including** an Ollama container
|
|
running a tiny model (qwen2.5:0.5b, ~400 MB, CPU-only). They verify that
|
|
the dashboard can actually generate LLM responses — not just degrade
|
|
gracefully when Ollama is offline.
|
|
|
|
Run with:
|
|
FUNCTIONAL_DOCKER=1 pytest tests/functional/test_ollama_chat.py -v
|
|
|
|
Requirements:
|
|
- Docker daemon running
|
|
- ~1 GB free disk (Ollama image + model weights)
|
|
- No GPU required — qwen2.5:0.5b runs fine on CPU
|
|
|
|
The ``ollama_stack`` fixture brings up Ollama + dashboard via docker compose,
|
|
pulls the model, and tears everything down after the session.
|
|
"""
|
|
|
|
import os
|
|
import subprocess
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
httpx = pytest.importorskip("httpx")
|
|
|
|
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
|
COMPOSE_TEST = PROJECT_ROOT / "docker-compose.test.yml"
|
|
|
|
# Tiny model that runs on CPU without GPU. ~400 MB download.
|
|
TEST_MODEL = os.environ.get("OLLAMA_TEST_MODEL", "qwen2.5:0.5b")
|
|
|
|
# ── helpers ──────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def _compose(*args, timeout=120):
|
|
"""Run a docker compose command against the test compose file."""
|
|
cmd = [
|
|
"docker",
|
|
"compose",
|
|
"-f",
|
|
str(COMPOSE_TEST),
|
|
"-p",
|
|
"timmy-test",
|
|
*args,
|
|
]
|
|
return subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
cwd=str(PROJECT_ROOT),
|
|
)
|
|
|
|
|
|
def _wait_for_healthy(url: str, retries=40, interval=3):
|
|
"""Poll *url* until it returns HTTP 200."""
|
|
for _ in range(retries):
|
|
try:
|
|
r = httpx.get(url, timeout=5)
|
|
if r.status_code == 200:
|
|
return True
|
|
except Exception:
|
|
pass
|
|
time.sleep(interval)
|
|
return False
|
|
|
|
|
|
def _pull_model(model: str, retries=3):
|
|
"""Ask the containerised Ollama to pull *model*."""
|
|
for attempt in range(retries):
|
|
result = subprocess.run(
|
|
[
|
|
"docker",
|
|
"exec",
|
|
"timmy-test-ollama",
|
|
"ollama",
|
|
"pull",
|
|
model,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=600,
|
|
)
|
|
if result.returncode == 0:
|
|
return True
|
|
time.sleep(5 * (attempt + 1))
|
|
return False
|
|
|
|
|
|
# ── fixtures ─────────────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def ollama_stack():
|
|
"""Bring up Ollama + dashboard, pull the test model, yield base URL.
|
|
|
|
Skipped unless ``FUNCTIONAL_DOCKER=1`` is set and Docker is available.
|
|
"""
|
|
if os.environ.get("FUNCTIONAL_DOCKER") != "1":
|
|
pytest.skip("Set FUNCTIONAL_DOCKER=1 to run Docker tests")
|
|
if not COMPOSE_TEST.exists():
|
|
pytest.skip("docker-compose.test.yml not found")
|
|
|
|
# Verify Docker daemon
|
|
docker_check = subprocess.run(
|
|
["docker", "info"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10,
|
|
)
|
|
if docker_check.returncode != 0:
|
|
pytest.skip(f"Docker daemon not available: {docker_check.stderr.strip()}")
|
|
|
|
# Bring up Ollama + dashboard with the ollama profile.
|
|
# OLLAMA_URL tells the dashboard to reach the sibling container.
|
|
env = {
|
|
**os.environ,
|
|
"OLLAMA_URL": "http://ollama:11434",
|
|
"OLLAMA_MODEL": TEST_MODEL,
|
|
}
|
|
result = subprocess.run(
|
|
[
|
|
"docker",
|
|
"compose",
|
|
"-f",
|
|
str(COMPOSE_TEST),
|
|
"-p",
|
|
"timmy-test",
|
|
"--profile",
|
|
"ollama",
|
|
"up",
|
|
"-d",
|
|
"--build",
|
|
"--wait",
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300,
|
|
cwd=str(PROJECT_ROOT),
|
|
env=env,
|
|
)
|
|
if result.returncode != 0:
|
|
pytest.fail(f"docker compose up failed:\n{result.stderr}")
|
|
|
|
# Wait for Ollama to be ready
|
|
ollama_ready = _wait_for_healthy("http://localhost:18000/health")
|
|
if not ollama_ready:
|
|
logs = _compose("logs")
|
|
_compose("--profile", "ollama", "down", "-v")
|
|
pytest.fail(f"Stack never became healthy:\n{logs.stdout}")
|
|
|
|
# Pull the tiny test model into the Ollama container
|
|
if not _pull_model(TEST_MODEL):
|
|
logs = _compose("logs", "ollama")
|
|
_compose("--profile", "ollama", "down", "-v")
|
|
pytest.fail(f"Failed to pull {TEST_MODEL}:\n{logs.stdout}")
|
|
|
|
yield "http://localhost:18000"
|
|
|
|
# Teardown
|
|
subprocess.run(
|
|
[
|
|
"docker",
|
|
"compose",
|
|
"-f",
|
|
str(COMPOSE_TEST),
|
|
"-p",
|
|
"timmy-test",
|
|
"--profile",
|
|
"ollama",
|
|
"down",
|
|
"-v",
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=60,
|
|
cwd=str(PROJECT_ROOT),
|
|
)
|
|
|
|
|
|
# ── tests ────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestOllamaHealth:
|
|
"""Verify the dashboard can reach the Ollama container."""
|
|
|
|
def test_health_reports_ollama_up(self, ollama_stack):
|
|
"""GET /health should report ollama as 'up'."""
|
|
resp = httpx.get(f"{ollama_stack}/health", timeout=10)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
services = data.get("services", {})
|
|
assert services.get("ollama") == "up", f"Expected ollama=up, got: {services}"
|
|
|
|
|
|
class TestOllamaChat:
|
|
"""Send a real prompt through the dashboard and get an LLM response."""
|
|
|
|
def test_chat_returns_llm_response(self, ollama_stack):
|
|
"""POST /agents/timmy/chat with a real message → non-error HTML."""
|
|
resp = httpx.post(
|
|
f"{ollama_stack}/agents/timmy/chat",
|
|
data={"message": "Say hello in exactly three words."},
|
|
timeout=120, # first inference can be slow on CPU
|
|
)
|
|
assert resp.status_code == 200
|
|
body = resp.text.lower()
|
|
# The response should contain actual content, not an error fallback
|
|
assert "error" not in body or "hello" in body, (
|
|
f"Expected LLM response, got error:\n{resp.text[:500]}"
|
|
)
|
|
|
|
def test_chat_history_contains_response(self, ollama_stack):
|
|
"""After chatting, history should include both user and agent messages."""
|
|
# Send a message
|
|
httpx.post(
|
|
f"{ollama_stack}/agents/timmy/chat",
|
|
data={"message": "What is 2+2?"},
|
|
timeout=120,
|
|
)
|
|
# Fetch history
|
|
hist = httpx.get(f"{ollama_stack}/agents/timmy/history", timeout=10)
|
|
assert hist.status_code == 200
|
|
body = hist.text.lower()
|
|
assert "2+2" in body or "2 + 2" in body
|
|
|
|
def test_multiple_turns(self, ollama_stack):
|
|
"""Verify the agent handles a second turn without crashing."""
|
|
# First turn
|
|
r1 = httpx.post(
|
|
f"{ollama_stack}/agents/timmy/chat",
|
|
data={"message": "Remember the word: pineapple"},
|
|
timeout=120,
|
|
)
|
|
assert r1.status_code == 200
|
|
|
|
# Second turn
|
|
r2 = httpx.post(
|
|
f"{ollama_stack}/agents/timmy/chat",
|
|
data={"message": "What word did I just ask you to remember?"},
|
|
timeout=120,
|
|
)
|
|
assert r2.status_code == 200
|
|
# We don't assert "pineapple" — tiny models have weak memory.
|
|
# The point is it doesn't crash on multi-turn.
|
|
|
|
|
|
class TestOllamaDirectAPI:
|
|
"""Hit the Ollama container directly to verify the model is loaded."""
|
|
|
|
def test_ollama_api_tags(self, ollama_stack):
|
|
"""Ollama /api/tags should list the pulled test model."""
|
|
# Ollama isn't port-mapped, so we exec into the container
|
|
result = subprocess.run(
|
|
[
|
|
"docker",
|
|
"exec",
|
|
"timmy-test-ollama",
|
|
"curl",
|
|
"-sf",
|
|
"http://localhost:11434/api/tags",
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10,
|
|
)
|
|
assert result.returncode == 0
|
|
assert TEST_MODEL.split(":")[0] in result.stdout
|