test: remove hardcoded sleeps, add pytest-timeout (#69)

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait - Add pytest-timeout dependency and --timeout=30 to prevent hangs - Fixes test flakiness and improves test suite speed Co-authored-by: Alexander Payne <apayne@MM.local>
2026-02-26 22:52:36 -05:00
parent bf0e388d2a
commit 51140fb7f0
7 changed files with 371 additions and 274 deletions
--- a/tests/functional/test_docker_swarm.py
+++ b/tests/functional/test_docker_swarm.py
@@ -16,7 +16,6 @@ from pathlib import Path

 import pytest

-# Try to import httpx for real HTTP calls to containers
 httpx = pytest.importorskip("httpx")

 PROJECT_ROOT = Path(__file__).parent.parent.parent
@@ -25,7 +24,25 @@ COMPOSE_TEST = PROJECT_ROOT / "docker-compose.test.yml"

 def _compose(*args, timeout=60):
    cmd = ["docker", "compose", "-f", str(COMPOSE_TEST), "-p", "timmy-test", *args]
-    return subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, cwd=str(PROJECT_ROOT))
+    return subprocess.run(
+        cmd, capture_output=True, text=True, timeout=timeout, cwd=str(PROJECT_ROOT)
+    )
+
+
+def _wait_for_agents(dashboard_url, timeout=30, interval=1):
+    """Poll /swarm/agents until at least one agent appears."""
+    start = time.monotonic()
+    while time.monotonic() - start < timeout:
+        try:
+            resp = httpx.get(f"{dashboard_url}/swarm/agents", timeout=10)
+            if resp.status_code == 200:
+                agents = resp.json().get("agents", [])
+                if agents:
+                    return agents
+        except Exception:
+            pass
+        time.sleep(interval)
+    return []


 class TestDockerDashboard:
@@ -80,13 +97,18 @@ class TestDockerAgentSwarm:
        """Scale up one agent worker and verify it appears in the registry."""
        # Start one agent
        result = _compose(
-            "--profile", "agents", "up", "-d", "--scale", "agent=1",
+            "--profile",
+            "agents",
+            "up",
+            "-d",
+            "--scale",
+            "agent=1",
            timeout=120,
        )
        assert result.returncode == 0, f"Failed to start agent:\n{result.stderr}"

-        # Give the agent time to register via HTTP
-        time.sleep(8)
+        # Wait for agent to register via polling
+        _wait_for_agents(docker_stack)

        resp = httpx.get(f"{docker_stack}/swarm/agents", timeout=10)
        assert resp.status_code == 200
@@ -101,13 +123,18 @@ class TestDockerAgentSwarm:
        """Start an agent, post a task, verify the agent bids on it."""
        # Start agent
        result = _compose(
-            "--profile", "agents", "up", "-d", "--scale", "agent=1",
+            "--profile",
+            "agents",
+            "up",
+            "-d",
+            "--scale",
+            "agent=1",
            timeout=120,
        )
        assert result.returncode == 0

-        # Wait for agent to register
-        time.sleep(8)
+        # Wait for agent to register via polling
+        _wait_for_agents(docker_stack)

        # Post a task — this triggers an auction
        task_resp = httpx.post(
@@ -118,8 +145,13 @@ class TestDockerAgentSwarm:
        assert task_resp.status_code == 200
        task_id = task_resp.json()["task_id"]

-        # Give the agent time to poll and bid
-        time.sleep(12)
+        # Poll until task exists (agent may poll and bid)
+        start = time.monotonic()
+        while time.monotonic() - start < 15:
+            task = httpx.get(f"{docker_stack}/swarm/tasks/{task_id}", timeout=10)
+            if task.status_code == 200:
+                break
+            time.sleep(1)

        # Check task status — may have been assigned
        task = httpx.get(f"{docker_stack}/swarm/tasks/{task_id}", timeout=10)
@@ -133,18 +165,25 @@ class TestDockerAgentSwarm:
    def test_multiple_agents(self, docker_stack):
        """Scale to 3 agents and verify all register."""
        result = _compose(
-            "--profile", "agents", "up", "-d", "--scale", "agent=3",
+            "--profile",
+            "agents",
+            "up",
+            "-d",
+            "--scale",
+            "agent=3",
            timeout=120,
        )
        assert result.returncode == 0

-        # Wait for registration
-        time.sleep(12)
+        # Wait for agents to register via polling
+        _wait_for_agents(docker_stack)

        resp = httpx.get(f"{docker_stack}/swarm/agents", timeout=10)
        agents = resp.json()["agents"]
        # Should have at least the 3 agents we started (plus possibly Timmy and auto-spawned ones)
-        worker_count = sum(1 for a in agents if "Worker" in a["name"] or "TestWorker" in a["name"])
+        worker_count = sum(
+            1 for a in agents if "Worker" in a["name"] or "TestWorker" in a["name"]
+        )
        assert worker_count >= 1  # At least some registered

        _compose("--profile", "agents", "down", timeout=30)
--- a/tests/functional/test_fast_e2e.py
+++ b/tests/functional/test_fast_e2e.py
@@ -4,7 +4,6 @@ RUN: SELENIUM_UI=1 pytest tests/functional/test_fast_e2e.py -v
 """

 import os
-import time

 import pytest
 import httpx
@@ -31,7 +30,7 @@ def driver():
    opts.add_argument("--disable-dev-shm-usage")
    opts.add_argument("--disable-gpu")
    opts.add_argument("--window-size=1280,900")
-    
+
    d = webdriver.Chrome(options=opts)
    d.implicitly_wait(2)  # Reduced from 5s
    yield d
@@ -52,7 +51,7 @@ def dashboard_url():

 class TestAllPagesLoad:
    """Single test that checks all pages load - much faster than separate tests."""
-    
+
    def test_all_dashboard_pages_exist(self, driver, dashboard_url):
        """Verify all new feature pages load successfully in one browser session."""
        pages = [
@@ -63,9 +62,9 @@ class TestAllPagesLoad:
            ("/self-modify/queue", "Upgrade"),
            ("/swarm/live", "Swarm"),  # Live page has "Swarm" not "Live"
        ]
-        
+
        failures = []
-        
+
        for path, expected_text in pages:
            try:
                driver.get(f"{dashboard_url}{path}")
@@ -73,55 +72,63 @@ class TestAllPagesLoad:
                WebDriverWait(driver, 3).until(
                    EC.presence_of_element_located((By.TAG_NAME, "body"))
                )
-                
+
                # Verify page has expected content
                body_text = driver.find_element(By.TAG_NAME, "body").text
                if expected_text.lower() not in body_text.lower():
                    failures.append(f"{path}: missing '{expected_text}'")
-                    
+
            except Exception as exc:
                failures.append(f"{path}: {type(exc).__name__}")
-        
+
        if failures:
            pytest.fail(f"Pages failed to load: {', '.join(failures)}")


 class TestAllFeaturesWork:
    """Combined functional tests - single browser session."""
-    
+
    def test_event_log_and_memory_and_ledger_functional(self, driver, dashboard_url):
        """Test Event Log, Memory, and Ledger functionality in one go."""
-        
+
        # 1. Event Log - verify events display
        driver.get(f"{dashboard_url}/swarm/events")
-        time.sleep(0.5)
-        
+        WebDriverWait(driver, 3).until(
+            EC.presence_of_element_located((By.TAG_NAME, "body"))
+        )
+
        # Should have header and either events or empty state
        body = driver.find_element(By.TAG_NAME, "body").text
        assert "Event" in body or "event" in body, "Event log page missing header"
-        
+
        # Create a task via API to generate an event
        try:
            httpx.post(
                f"{dashboard_url}/swarm/tasks",
                data={"description": "E2E test task"},
-                timeout=2
+                timeout=2,
            )
        except Exception:
            pass  # Ignore, just checking page exists
-        
+
        # 2. Memory - verify search works
        driver.get(f"{dashboard_url}/memory?query=test")
-        time.sleep(0.5)
-        
+        WebDriverWait(driver, 3).until(
+            EC.presence_of_element_located((By.TAG_NAME, "body"))
+        )
+
        # Should have search input
-        search = driver.find_elements(By.CSS_SELECTOR, "input[type='search'], input[name='query']")
+        search = driver.find_elements(
+            By.CSS_SELECTOR, "input[type='search'], input[name='query']"
+        )
        assert search, "Memory page missing search input"
-        
+
        # 3. Ledger - verify balance display
        driver.get(f"{dashboard_url}/lightning/ledger")
-        time.sleep(0.5)
-        
+        WebDriverWait(driver, 3).until(
+            EC.presence_of_element_located((By.TAG_NAME, "body"))
+        )
+
        body = driver.find_element(By.TAG_NAME, "body").text
        # Should show balance-related text
        has_balance = any(x in body.lower() for x in ["balance", "sats", "transaction"])
@@ -130,73 +137,88 @@ class TestAllFeaturesWork:

 class TestCascadeRouter:
    """Cascade Router - combined checks."""
-    
+
    def test_router_status_and_navigation(self, driver, dashboard_url):
        """Verify router status page and nav link in one test."""
-        
+
        # Check router status page
        driver.get(f"{dashboard_url}/router/status")
-        time.sleep(0.5)
-        
+        WebDriverWait(driver, 3).until(
+            EC.presence_of_element_located((By.TAG_NAME, "body"))
+        )
+
        body = driver.find_element(By.TAG_NAME, "body").text
-        
+
        # Should show providers or config message
-        has_content = any(x in body.lower() for x in [
-            "provider", "router", "ollama", "config", "status"
-        ])
+        has_content = any(
+            x in body.lower()
+            for x in ["provider", "router", "ollama", "config", "status"]
+        )
        assert has_content, "Router status page missing content"
-        
+
        # Check nav has router link
        driver.get(dashboard_url)
-        time.sleep(0.3)
-        
+        WebDriverWait(driver, 3).until(
+            EC.presence_of_element_located((By.TAG_NAME, "body"))
+        )
+
        nav_links = driver.find_elements(By.XPATH, "//a[contains(@href, '/router')]")
        assert nav_links, "Navigation missing router link"


 class TestUpgradeQueue:
    """Upgrade Queue - combined checks."""
-    
+
    def test_upgrade_queue_page_and_elements(self, driver, dashboard_url):
        """Verify upgrade queue page loads with expected elements."""
-        
+
        driver.get(f"{dashboard_url}/self-modify/queue")
-        time.sleep(0.5)
-        
+        WebDriverWait(driver, 3).until(
+            EC.presence_of_element_located((By.TAG_NAME, "body"))
+        )
+
        body = driver.find_element(By.TAG_NAME, "body").text
-        
+
        # Should have queue header
-        assert "upgrade" in body.lower() or "queue" in body.lower(), "Missing queue header"
-        
+        assert "upgrade" in body.lower() or "queue" in body.lower(), (
+            "Missing queue header"
+        )
+
        # Should have pending section or empty state
        has_pending = "pending" in body.lower() or "no pending" in body.lower()
        assert has_pending, "Missing pending upgrades section"
-        
+
        # Check for approve/reject buttons if upgrades exist
-        approve_btns = driver.find_elements(By.XPATH, "//button[contains(text(), 'Approve')]")
-        reject_btns = driver.find_elements(By.XPATH, "//button[contains(text(), 'Reject')]")
-        
+        approve_btns = driver.find_elements(
+            By.XPATH, "//button[contains(text(), 'Approve')]"
+        )
+        reject_btns = driver.find_elements(
+            By.XPATH, "//button[contains(text(), 'Reject')]"
+        )
+
        # Either no upgrades (no buttons) or buttons exist
        # This is a soft check - page structure is valid either way


 class TestActivityFeed:
    """Activity Feed - combined checks."""
-    
+
    def test_swarm_live_page_and_activity_feed(self, driver, dashboard_url):
        """Verify swarm live page has activity feed elements."""
-        
+
        driver.get(f"{dashboard_url}/swarm/live")
-        time.sleep(0.5)
-        
+        WebDriverWait(driver, 3).until(
+            EC.presence_of_element_located((By.TAG_NAME, "body"))
+        )
+
        body = driver.find_element(By.TAG_NAME, "body").text
-        
+
        # Should have live indicator or activity section
-        has_live = any(x in body.lower() for x in [
-            "live", "activity", "swarm", "agents", "tasks"
-        ])
+        has_live = any(
+            x in body.lower() for x in ["live", "activity", "swarm", "agents", "tasks"]
+        )
        assert has_live, "Swarm live page missing content"
-        
+
        # Check for WebSocket connection indicator (if implemented)
        # or just basic structure
        panels = driver.find_elements(By.CSS_SELECTOR, ".card, .panel, .mc-panel")
@@ -205,7 +227,7 @@ class TestActivityFeed:

 class TestFastSmoke:
    """Ultra-fast smoke tests using HTTP where possible."""
-    
+
    def test_all_routes_respond_200(self, dashboard_url):
        """HTTP-only test - no browser, very fast."""
        routes = [
@@ -216,16 +238,18 @@ class TestFastSmoke:
            "/self-modify/queue",
            "/swarm/live",
        ]
-        
+
        failures = []
-        
+
        for route in routes:
            try:
-                r = httpx.get(f"{dashboard_url}{route}", timeout=3, follow_redirects=True)
+                r = httpx.get(
+                    f"{dashboard_url}{route}", timeout=3, follow_redirects=True
+                )
                if r.status_code != 200:
                    failures.append(f"{route}: {r.status_code}")
            except Exception as exc:
                failures.append(f"{route}: {type(exc).__name__}")
-        
+
        if failures:
            pytest.fail(f"Routes failed: {', '.join(failures)}")
--- a/tests/functional/test_ui_selenium.py
+++ b/tests/functional/test_ui_selenium.py
@@ -10,7 +10,6 @@ Run:
 """

 import os
-import time

 import pytest
 from selenium import webdriver
@@ -96,7 +95,8 @@ def _send_chat_and_wait(driver, message):

    # Wait for a NEW agent response (not one from a prior test)
    WebDriverWait(driver, 30).until(
-        lambda d: len(d.find_elements(By.CSS_SELECTOR, ".chat-message.agent")) > existing
+        lambda d: len(d.find_elements(By.CSS_SELECTOR, ".chat-message.agent"))
+        > existing
    )

    return existing
@@ -158,10 +158,14 @@ class TestChatInteraction:
        """Full chat roundtrip: send message, get response, input clears, chat scrolls."""
        _load_dashboard(driver)

-        # Wait for any initial HTMX requests (history load) to settle
-        time.sleep(2)
+        # Wait for page to be ready
+        WebDriverWait(driver, 10).until(
+            lambda d: d.execute_script("return document.readyState") == "complete"
+        )

-        existing_agents = len(driver.find_elements(By.CSS_SELECTOR, ".chat-message.agent"))
+        existing_agents = len(
+            driver.find_elements(By.CSS_SELECTOR, ".chat-message.agent")
+        )

        inp = driver.find_element(By.CSS_SELECTOR, "input[name='message']")
        inp.send_keys("hello from selenium")
@@ -169,26 +173,29 @@ class TestChatInteraction:

        # 1. User bubble appears immediately
        WebDriverWait(driver, 5).until(
-            EC.presence_of_element_located(
-                (By.CSS_SELECTOR, ".chat-message.user")
-            )
+            EC.presence_of_element_located((By.CSS_SELECTOR, ".chat-message.user"))
        )

        # 2. Agent response arrives
        WebDriverWait(driver, 30).until(
-            lambda d: len(d.find_elements(By.CSS_SELECTOR, ".chat-message.agent")) > existing_agents
+            lambda d: len(d.find_elements(By.CSS_SELECTOR, ".chat-message.agent"))
+            > existing_agents
        )

        # 3. Input cleared (regression test)
-        time.sleep(0.5)
+        # Already waited for agent response via WebDriverWait above
        inp = driver.find_element(By.CSS_SELECTOR, "input[name='message']")
        assert inp.get_attribute("value") == "", "Input should be empty after sending"

        # 4. Chat scrolled to bottom (regression test)
        chat_log = driver.find_element(By.ID, "chat-log")
        scroll_top = driver.execute_script("return arguments[0].scrollTop", chat_log)
-        scroll_height = driver.execute_script("return arguments[0].scrollHeight", chat_log)
-        client_height = driver.execute_script("return arguments[0].clientHeight", chat_log)
+        scroll_height = driver.execute_script(
+            "return arguments[0].scrollHeight", chat_log
+        )
+        client_height = driver.execute_script(
+            "return arguments[0].clientHeight", chat_log
+        )

        if scroll_height > client_height:
            gap = scroll_height - scroll_top - client_height
@@ -252,9 +259,7 @@ class TestAgentSidebar:
    def test_sidebar_header_shows(self, driver):
        _load_dashboard(driver)
        _wait_for_sidebar(driver)
-        header = driver.find_element(
-            By.XPATH, "//*[contains(text(), 'SWARM AGENTS')]"
-        )
+        header = driver.find_element(By.XPATH, "//*[contains(text(), 'SWARM AGENTS')]")
        assert header.is_displayed()

    def test_sidebar_shows_status_when_agents_exist(self, driver):