Files
the-nexus/tests/ws_load_test.py
Alexander Whitestone a70c18f20f
Some checks failed
CI / test (pull_request) Failing after 1m31s
CI / validate (pull_request) Failing after 1m36s
Review Approval Gate / verify-review (pull_request) Successful in 10s
test: WebSocket load testing infrastructure (#1505)
Add tests/ws_load_test.py — concurrent connection load test.

Measures:
- Connection success rate
- Message broadcast latency (P50/P95/P99)
- Throughput (msg/s)
- Memory usage delta
- Fan-out via broadcast receiver

Usage:
  python3 tests/ws_load_test.py --connections 50 --duration 30
  python3 tests/ws_load_test.py --connections 200 --duration 60 --messages 200

Pass/fail verdict based on:
- Connection success rate
- Error rate < 1%
- P95 latency < 500ms

Closes #1505
2026-04-14 22:45:14 -04:00

348 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3
"""
WebSocket Load Test for The Nexus Gateway (#1505).
Simulates concurrent WebSocket connections to measure:
- Connection success rate
- Message broadcast latency
- Memory usage under load
- Throughput (messages/second)
Usage:
python3 tests/ws_load_test.py [--connections 50] [--duration 30] [--messages 100]
Requirements:
pip install websockets psutil (psutil optional, for memory tracking)
"""
import argparse
import asyncio
import json
import os
import statistics
import sys
import time
from dataclasses import dataclass, field
from typing import List, Optional
try:
import websockets
except ImportError:
print("ERROR: websockets required. Install: pip install websockets")
sys.exit(1)
try:
import psutil
HAS_PSUTIL = True
except ImportError:
HAS_PSUTIL = False
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
DEFAULT_URL = "ws://localhost:8765"
DEFAULT_CONNECTIONS = 50
DEFAULT_DURATION = 30 # seconds
DEFAULT_MESSAGES = 100 # per connection
@dataclass
class ConnectionStats:
"""Stats for a single connection."""
connected: bool = False
connect_time_ms: float = 0
messages_sent: int = 0
messages_received: int = 0
errors: int = 0
latencies_ms: List[float] = field(default_factory=list)
@dataclass
class LoadTestResult:
"""Aggregated results from load test."""
total_connections: int = 0
successful_connections: int = 0
failed_connections: int = 0
total_messages_sent: int = 0
total_messages_received: int = 0
total_errors: int = 0
duration_seconds: float = 0
messages_per_second: float = 0
avg_latency_ms: float = 0
p50_latency_ms: float = 0
p95_latency_ms: float = 0
p99_latency_ms: float = 0
memory_start_mb: float = 0
memory_end_mb: float = 0
memory_delta_mb: float = 0
# ---------------------------------------------------------------------------
# Single connection worker
# ---------------------------------------------------------------------------
async def connection_worker(
worker_id: int,
url: str,
num_messages: int,
stats: ConnectionStats,
stop_event: asyncio.Event,
):
"""A single WebSocket connection that sends and receives messages."""
try:
start = time.perf_counter()
async with websockets.connect(url, open_timeout=5) as ws:
stats.connect_time_ms = (time.perf_counter() - start) * 1000
stats.connected = True
# Send messages with timestamps for latency measurement
for i in range(num_messages):
if stop_event.is_set():
break
msg = json.dumps({
"type": "load_test",
"worker_id": worker_id,
"seq": i,
"timestamp": time.time(),
})
try:
send_time = time.perf_counter()
await ws.send(msg)
stats.messages_sent += 1
# Wait for echo/broadcast back (with timeout)
try:
response = await asyncio.wait_for(ws.recv(), timeout=2.0)
recv_time = time.perf_counter()
latency_ms = (recv_time - send_time) * 1000
stats.latencies_ms.append(latency_ms)
stats.messages_received += 1
except asyncio.TimeoutError:
pass # No response is OK for broadcast servers
except Exception as e:
stats.errors += 1
# Small delay between messages to avoid overwhelming
await asyncio.sleep(0.01)
except Exception as e:
stats.errors += 1
stats.connected = False
# ---------------------------------------------------------------------------
# Broadcast receiver (separate connection that only receives)
# ---------------------------------------------------------------------------
async def broadcast_receiver(
url: str,
received_count: dict,
stop_event: asyncio.Event,
):
"""A connection that only receives broadcasts to measure fan-out."""
try:
async with websockets.connect(url, open_timeout=5) as ws:
while not stop_event.is_set():
try:
msg = await asyncio.wait_for(ws.recv(), timeout=1.0)
received_count["total"] += 1
except asyncio.TimeoutError:
continue
except Exception:
pass
# ---------------------------------------------------------------------------
# Memory monitoring
# ---------------------------------------------------------------------------
def get_memory_mb() -> float:
"""Get current process memory in MB."""
if HAS_PSUTIL:
return psutil.Process().memory_info().rss / (1024 * 1024)
return 0.0
# ---------------------------------------------------------------------------
# Main load test
# ---------------------------------------------------------------------------
async def run_load_test(
url: str,
num_connections: int,
duration: int,
messages_per_connection: int,
) -> LoadTestResult:
"""Run the WebSocket load test."""
result = LoadTestResult()
result.total_connections = num_connections
result.duration_seconds = duration
memory_start = get_memory_mb()
result.memory_start_mb = memory_start
stats_list: List[ConnectionStats] = [ConnectionStats() for _ in range(num_connections)]
stop_event = asyncio.Event()
# Also add a broadcast receiver to measure fan-out
broadcast_count = {"total": 0}
print(f"\n{'='*60}")
print(f"WebSocket Load Test")
print(f"{'='*60}")
print(f" URL: {url}")
print(f" Connections: {num_connections}")
print(f" Duration: {duration}s")
print(f" Messages/connection: {messages_per_connection}")
print(f"{'='*60}\n")
# Start timer
test_start = time.perf_counter()
# Create all connection tasks
tasks = []
for i in range(num_connections):
task = asyncio.create_task(
connection_worker(i, url, messages_per_connection, stats_list[i], stop_event)
)
tasks.append(task)
# Add broadcast receiver
recv_task = asyncio.create_task(broadcast_receiver(url, broadcast_count, stop_event))
# Wait for duration, then signal stop
print(f"Running load test for {duration} seconds...")
await asyncio.sleep(duration)
stop_event.set()
# Wait for all tasks to finish
await asyncio.gather(*tasks, return_exceptions=True)
await asyncio.gather(recv_task, return_exceptions=True)
test_end = time.perf_counter()
actual_duration = test_end - test_start
# Aggregate results
all_latencies = []
for stats in stats_list:
if stats.connected:
result.successful_connections += 1
else:
result.failed_connections += 1
result.total_messages_sent += stats.messages_sent
result.total_messages_received += stats.messages_received
result.total_errors += stats.errors
all_latencies.extend(stats.latencies_ms)
result.duration_seconds = actual_duration
result.messages_per_second = result.total_messages_sent / actual_duration if actual_duration > 0 else 0
if all_latencies:
result.avg_latency_ms = statistics.mean(all_latencies)
sorted_latencies = sorted(all_latencies)
result.p50_latency_ms = sorted_latencies[len(sorted_latencies) // 2]
result.p95_latency_ms = sorted_latencies[int(len(sorted_latencies) * 0.95)]
result.p99_latency_ms = sorted_latencies[int(len(sorted_latencies) * 0.99)]
result.memory_end_mb = get_memory_mb()
result.memory_delta_mb = result.memory_end_mb - result.memory_start_mb
return result
# ---------------------------------------------------------------------------
# Report
# ---------------------------------------------------------------------------
def print_report(result: LoadTestResult):
"""Print load test results."""
print(f"\n{'='*60}")
print(f"Load Test Results")
print(f"{'='*60}")
print(f"\n--- Connections ---")
print(f" Total: {result.total_connections}")
print(f" Successful: {result.successful_connections}")
print(f" Failed: {result.failed_connections}")
conn_rate = result.successful_connections / result.total_connections * 100 if result.total_connections else 0
print(f" Success rate: {conn_rate:.1f}%")
print(f"\n--- Messages ---")
print(f" Sent: {result.total_messages_sent}")
print(f" Received: {result.total_messages_received}")
print(f" Errors: {result.total_errors}")
print(f" Throughput: {result.messages_per_second:.1f} msg/s")
print(f"\n--- Latency ---")
if result.avg_latency_ms > 0:
print(f" Average: {result.avg_latency_ms:.2f} ms")
print(f" P50: {result.p50_latency_ms:.2f} ms")
print(f" P95: {result.p95_latency_ms:.2f} ms")
print(f" P99: {result.p99_latency_ms:.2f} ms")
else:
print(f" No latency data (server may not echo)")
print(f"\n--- Memory ---")
if HAS_PSUTIL:
print(f" Start: {result.memory_start_mb:.1f} MB")
print(f" End: {result.memory_end_mb:.1f} MB")
print(f" Delta: {result.memory_delta_mb:+.1f} MB")
else:
print(f" psutil not installed — memory tracking disabled")
print(f"\n--- Duration ---")
print(f" {result.duration_seconds:.1f} seconds")
print(f"\n{'='*60}")
# Pass/fail verdict
issues = []
if result.failed_connections > 0:
issues.append(f"{result.failed_connections} connections failed")
if result.total_errors > result.total_messages_sent * 0.01:
issues.append(f"Error rate {result.total_errors/result.total_messages_sent*100:.1f}% exceeds 1%")
if result.p95_latency_ms > 500:
issues.append(f"P95 latency {result.p95_latency_ms:.0f}ms exceeds 500ms")
if issues:
print(f"ISSUES FOUND:")
for issue in issues:
print(f"{issue}")
print(f"\nVERDICT: FAIL")
return False
else:
print(f"VERDICT: PASS")
return True
# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(description="WebSocket load test for The Nexus")
parser.add_argument("--url", default=DEFAULT_URL, help=f"WebSocket URL (default: {DEFAULT_URL})")
parser.add_argument("--connections", type=int, default=DEFAULT_CONNECTIONS, help=f"Number of concurrent connections (default: {DEFAULT_CONNECTIONS})")
parser.add_argument("--duration", type=int, default=DEFAULT_DURATION, help=f"Test duration in seconds (default: {DEFAULT_DURATION})")
parser.add_argument("--messages", type=int, default=DEFAULT_MESSAGES, help=f"Messages per connection (default: {DEFAULT_MESSAGES})")
args = parser.parse_args()
result = asyncio.run(run_load_test(
url=args.url,
num_connections=args.connections,
duration=args.duration,
messages_per_connection=args.messages,
))
passed = print_report(result)
sys.exit(0 if passed else 1)
if __name__ == "__main__":
main()