test: remove hardcoded sleeps, add pytest-timeout (#69)

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

Co-authored-by: Alexander Payne <apayne@MM.local>
This commit is contained in:
Alexander Whitestone
2026-02-26 22:52:36 -05:00
committed by GitHub
parent bf0e388d2a
commit 51140fb7f0
7 changed files with 371 additions and 274 deletions

View File

@@ -16,7 +16,6 @@ from pathlib import Path
import pytest
# Try to import httpx for real HTTP calls to containers
httpx = pytest.importorskip("httpx")
PROJECT_ROOT = Path(__file__).parent.parent.parent
@@ -25,7 +24,25 @@ COMPOSE_TEST = PROJECT_ROOT / "docker-compose.test.yml"
def _compose(*args, timeout=60):
cmd = ["docker", "compose", "-f", str(COMPOSE_TEST), "-p", "timmy-test", *args]
return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, cwd=str(PROJECT_ROOT))
return subprocess.run(
cmd, capture_output=True, text=True, timeout=timeout, cwd=str(PROJECT_ROOT)
)
def _wait_for_agents(dashboard_url, timeout=30, interval=1):
"""Poll /swarm/agents until at least one agent appears."""
start = time.monotonic()
while time.monotonic() - start < timeout:
try:
resp = httpx.get(f"{dashboard_url}/swarm/agents", timeout=10)
if resp.status_code == 200:
agents = resp.json().get("agents", [])
if agents:
return agents
except Exception:
pass
time.sleep(interval)
return []
class TestDockerDashboard:
@@ -80,13 +97,18 @@ class TestDockerAgentSwarm:
"""Scale up one agent worker and verify it appears in the registry."""
# Start one agent
result = _compose(
"--profile", "agents", "up", "-d", "--scale", "agent=1",
"--profile",
"agents",
"up",
"-d",
"--scale",
"agent=1",
timeout=120,
)
assert result.returncode == 0, f"Failed to start agent:\n{result.stderr}"
# Give the agent time to register via HTTP
time.sleep(8)
# Wait for agent to register via polling
_wait_for_agents(docker_stack)
resp = httpx.get(f"{docker_stack}/swarm/agents", timeout=10)
assert resp.status_code == 200
@@ -101,13 +123,18 @@ class TestDockerAgentSwarm:
"""Start an agent, post a task, verify the agent bids on it."""
# Start agent
result = _compose(
"--profile", "agents", "up", "-d", "--scale", "agent=1",
"--profile",
"agents",
"up",
"-d",
"--scale",
"agent=1",
timeout=120,
)
assert result.returncode == 0
# Wait for agent to register
time.sleep(8)
# Wait for agent to register via polling
_wait_for_agents(docker_stack)
# Post a task — this triggers an auction
task_resp = httpx.post(
@@ -118,8 +145,13 @@ class TestDockerAgentSwarm:
assert task_resp.status_code == 200
task_id = task_resp.json()["task_id"]
# Give the agent time to poll and bid
time.sleep(12)
# Poll until task exists (agent may poll and bid)
start = time.monotonic()
while time.monotonic() - start < 15:
task = httpx.get(f"{docker_stack}/swarm/tasks/{task_id}", timeout=10)
if task.status_code == 200:
break
time.sleep(1)
# Check task status — may have been assigned
task = httpx.get(f"{docker_stack}/swarm/tasks/{task_id}", timeout=10)
@@ -133,18 +165,25 @@ class TestDockerAgentSwarm:
def test_multiple_agents(self, docker_stack):
"""Scale to 3 agents and verify all register."""
result = _compose(
"--profile", "agents", "up", "-d", "--scale", "agent=3",
"--profile",
"agents",
"up",
"-d",
"--scale",
"agent=3",
timeout=120,
)
assert result.returncode == 0
# Wait for registration
time.sleep(12)
# Wait for agents to register via polling
_wait_for_agents(docker_stack)
resp = httpx.get(f"{docker_stack}/swarm/agents", timeout=10)
agents = resp.json()["agents"]
# Should have at least the 3 agents we started (plus possibly Timmy and auto-spawned ones)
worker_count = sum(1 for a in agents if "Worker" in a["name"] or "TestWorker" in a["name"])
worker_count = sum(
1 for a in agents if "Worker" in a["name"] or "TestWorker" in a["name"]
)
assert worker_count >= 1 # At least some registered
_compose("--profile", "agents", "down", timeout=30)

View File

@@ -4,7 +4,6 @@ RUN: SELENIUM_UI=1 pytest tests/functional/test_fast_e2e.py -v
"""
import os
import time
import pytest
import httpx
@@ -31,7 +30,7 @@ def driver():
opts.add_argument("--disable-dev-shm-usage")
opts.add_argument("--disable-gpu")
opts.add_argument("--window-size=1280,900")
d = webdriver.Chrome(options=opts)
d.implicitly_wait(2) # Reduced from 5s
yield d
@@ -52,7 +51,7 @@ def dashboard_url():
class TestAllPagesLoad:
"""Single test that checks all pages load - much faster than separate tests."""
def test_all_dashboard_pages_exist(self, driver, dashboard_url):
"""Verify all new feature pages load successfully in one browser session."""
pages = [
@@ -63,9 +62,9 @@ class TestAllPagesLoad:
("/self-modify/queue", "Upgrade"),
("/swarm/live", "Swarm"), # Live page has "Swarm" not "Live"
]
failures = []
for path, expected_text in pages:
try:
driver.get(f"{dashboard_url}{path}")
@@ -73,55 +72,63 @@ class TestAllPagesLoad:
WebDriverWait(driver, 3).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# Verify page has expected content
body_text = driver.find_element(By.TAG_NAME, "body").text
if expected_text.lower() not in body_text.lower():
failures.append(f"{path}: missing '{expected_text}'")
except Exception as exc:
failures.append(f"{path}: {type(exc).__name__}")
if failures:
pytest.fail(f"Pages failed to load: {', '.join(failures)}")
class TestAllFeaturesWork:
"""Combined functional tests - single browser session."""
def test_event_log_and_memory_and_ledger_functional(self, driver, dashboard_url):
"""Test Event Log, Memory, and Ledger functionality in one go."""
# 1. Event Log - verify events display
driver.get(f"{dashboard_url}/swarm/events")
time.sleep(0.5)
WebDriverWait(driver, 3).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# Should have header and either events or empty state
body = driver.find_element(By.TAG_NAME, "body").text
assert "Event" in body or "event" in body, "Event log page missing header"
# Create a task via API to generate an event
try:
httpx.post(
f"{dashboard_url}/swarm/tasks",
data={"description": "E2E test task"},
timeout=2
timeout=2,
)
except Exception:
pass # Ignore, just checking page exists
# 2. Memory - verify search works
driver.get(f"{dashboard_url}/memory?query=test")
time.sleep(0.5)
WebDriverWait(driver, 3).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# Should have search input
search = driver.find_elements(By.CSS_SELECTOR, "input[type='search'], input[name='query']")
search = driver.find_elements(
By.CSS_SELECTOR, "input[type='search'], input[name='query']"
)
assert search, "Memory page missing search input"
# 3. Ledger - verify balance display
driver.get(f"{dashboard_url}/lightning/ledger")
time.sleep(0.5)
WebDriverWait(driver, 3).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
body = driver.find_element(By.TAG_NAME, "body").text
# Should show balance-related text
has_balance = any(x in body.lower() for x in ["balance", "sats", "transaction"])
@@ -130,73 +137,88 @@ class TestAllFeaturesWork:
class TestCascadeRouter:
"""Cascade Router - combined checks."""
def test_router_status_and_navigation(self, driver, dashboard_url):
"""Verify router status page and nav link in one test."""
# Check router status page
driver.get(f"{dashboard_url}/router/status")
time.sleep(0.5)
WebDriverWait(driver, 3).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
body = driver.find_element(By.TAG_NAME, "body").text
# Should show providers or config message
has_content = any(x in body.lower() for x in [
"provider", "router", "ollama", "config", "status"
])
has_content = any(
x in body.lower()
for x in ["provider", "router", "ollama", "config", "status"]
)
assert has_content, "Router status page missing content"
# Check nav has router link
driver.get(dashboard_url)
time.sleep(0.3)
WebDriverWait(driver, 3).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
nav_links = driver.find_elements(By.XPATH, "//a[contains(@href, '/router')]")
assert nav_links, "Navigation missing router link"
class TestUpgradeQueue:
"""Upgrade Queue - combined checks."""
def test_upgrade_queue_page_and_elements(self, driver, dashboard_url):
"""Verify upgrade queue page loads with expected elements."""
driver.get(f"{dashboard_url}/self-modify/queue")
time.sleep(0.5)
WebDriverWait(driver, 3).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
body = driver.find_element(By.TAG_NAME, "body").text
# Should have queue header
assert "upgrade" in body.lower() or "queue" in body.lower(), "Missing queue header"
assert "upgrade" in body.lower() or "queue" in body.lower(), (
"Missing queue header"
)
# Should have pending section or empty state
has_pending = "pending" in body.lower() or "no pending" in body.lower()
assert has_pending, "Missing pending upgrades section"
# Check for approve/reject buttons if upgrades exist
approve_btns = driver.find_elements(By.XPATH, "//button[contains(text(), 'Approve')]")
reject_btns = driver.find_elements(By.XPATH, "//button[contains(text(), 'Reject')]")
approve_btns = driver.find_elements(
By.XPATH, "//button[contains(text(), 'Approve')]"
)
reject_btns = driver.find_elements(
By.XPATH, "//button[contains(text(), 'Reject')]"
)
# Either no upgrades (no buttons) or buttons exist
# This is a soft check - page structure is valid either way
class TestActivityFeed:
"""Activity Feed - combined checks."""
def test_swarm_live_page_and_activity_feed(self, driver, dashboard_url):
"""Verify swarm live page has activity feed elements."""
driver.get(f"{dashboard_url}/swarm/live")
time.sleep(0.5)
WebDriverWait(driver, 3).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
body = driver.find_element(By.TAG_NAME, "body").text
# Should have live indicator or activity section
has_live = any(x in body.lower() for x in [
"live", "activity", "swarm", "agents", "tasks"
])
has_live = any(
x in body.lower() for x in ["live", "activity", "swarm", "agents", "tasks"]
)
assert has_live, "Swarm live page missing content"
# Check for WebSocket connection indicator (if implemented)
# or just basic structure
panels = driver.find_elements(By.CSS_SELECTOR, ".card, .panel, .mc-panel")
@@ -205,7 +227,7 @@ class TestActivityFeed:
class TestFastSmoke:
"""Ultra-fast smoke tests using HTTP where possible."""
def test_all_routes_respond_200(self, dashboard_url):
"""HTTP-only test - no browser, very fast."""
routes = [
@@ -216,16 +238,18 @@ class TestFastSmoke:
"/self-modify/queue",
"/swarm/live",
]
failures = []
for route in routes:
try:
r = httpx.get(f"{dashboard_url}{route}", timeout=3, follow_redirects=True)
r = httpx.get(
f"{dashboard_url}{route}", timeout=3, follow_redirects=True
)
if r.status_code != 200:
failures.append(f"{route}: {r.status_code}")
except Exception as exc:
failures.append(f"{route}: {type(exc).__name__}")
if failures:
pytest.fail(f"Routes failed: {', '.join(failures)}")

View File

@@ -10,7 +10,6 @@ Run:
"""
import os
import time
import pytest
from selenium import webdriver
@@ -96,7 +95,8 @@ def _send_chat_and_wait(driver, message):
# Wait for a NEW agent response (not one from a prior test)
WebDriverWait(driver, 30).until(
lambda d: len(d.find_elements(By.CSS_SELECTOR, ".chat-message.agent")) > existing
lambda d: len(d.find_elements(By.CSS_SELECTOR, ".chat-message.agent"))
> existing
)
return existing
@@ -158,10 +158,14 @@ class TestChatInteraction:
"""Full chat roundtrip: send message, get response, input clears, chat scrolls."""
_load_dashboard(driver)
# Wait for any initial HTMX requests (history load) to settle
time.sleep(2)
# Wait for page to be ready
WebDriverWait(driver, 10).until(
lambda d: d.execute_script("return document.readyState") == "complete"
)
existing_agents = len(driver.find_elements(By.CSS_SELECTOR, ".chat-message.agent"))
existing_agents = len(
driver.find_elements(By.CSS_SELECTOR, ".chat-message.agent")
)
inp = driver.find_element(By.CSS_SELECTOR, "input[name='message']")
inp.send_keys("hello from selenium")
@@ -169,26 +173,29 @@ class TestChatInteraction:
# 1. User bubble appears immediately
WebDriverWait(driver, 5).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, ".chat-message.user")
)
EC.presence_of_element_located((By.CSS_SELECTOR, ".chat-message.user"))
)
# 2. Agent response arrives
WebDriverWait(driver, 30).until(
lambda d: len(d.find_elements(By.CSS_SELECTOR, ".chat-message.agent")) > existing_agents
lambda d: len(d.find_elements(By.CSS_SELECTOR, ".chat-message.agent"))
> existing_agents
)
# 3. Input cleared (regression test)
time.sleep(0.5)
# Already waited for agent response via WebDriverWait above
inp = driver.find_element(By.CSS_SELECTOR, "input[name='message']")
assert inp.get_attribute("value") == "", "Input should be empty after sending"
# 4. Chat scrolled to bottom (regression test)
chat_log = driver.find_element(By.ID, "chat-log")
scroll_top = driver.execute_script("return arguments[0].scrollTop", chat_log)
scroll_height = driver.execute_script("return arguments[0].scrollHeight", chat_log)
client_height = driver.execute_script("return arguments[0].clientHeight", chat_log)
scroll_height = driver.execute_script(
"return arguments[0].scrollHeight", chat_log
)
client_height = driver.execute_script(
"return arguments[0].clientHeight", chat_log
)
if scroll_height > client_height:
gap = scroll_height - scroll_top - client_height
@@ -252,9 +259,7 @@ class TestAgentSidebar:
def test_sidebar_header_shows(self, driver):
_load_dashboard(driver)
_wait_for_sidebar(driver)
header = driver.find_element(
By.XPATH, "//*[contains(text(), 'SWARM AGENTS')]"
)
header = driver.find_element(By.XPATH, "//*[contains(text(), 'SWARM AGENTS')]")
assert header.is_displayed()
def test_sidebar_shows_status_when_agents_exist(self, driver):