Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3fed634955 |
@@ -1,143 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Portal Health Check — Auto-disable broken portals in the Nexus.
|
||||
|
||||
Checks each portal's destination URL and updates status:
|
||||
- online: URL reachable
|
||||
- offline: URL unreachable → dim portal + tooltip "Offline"
|
||||
- online again: auto-re-enable
|
||||
|
||||
Usage:
|
||||
python3 scripts/portal-health-check.py # check and update
|
||||
python3 scripts/portal-health-check.py --dry-run # check only
|
||||
python3 scripts/portal-health-check.py --json # JSON output
|
||||
|
||||
Ref: #1539
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List
|
||||
|
||||
PORTALS_FILE = os.environ.get("PORTALS_FILE", "portals.json")
|
||||
CHECK_TIMEOUT = int(os.environ.get("PORTAL_CHECK_TIMEOUT", "5"))
|
||||
|
||||
|
||||
def load_portals(path: str = PORTALS_FILE) -> List[dict]:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def save_portals(portals: List[dict], path: str = PORTALS_FILE):
|
||||
with open(path, "w") as f:
|
||||
json.dump(portals, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def check_portal_url(url: str, timeout: int = CHECK_TIMEOUT) -> dict:
|
||||
"""Check if a portal URL is reachable."""
|
||||
if not url or url.startswith("./") or url.startswith("../"):
|
||||
return {"reachable": True, "status": "local", "latency_ms": 0}
|
||||
|
||||
try:
|
||||
import time
|
||||
start = time.time()
|
||||
req = urllib.request.Request(url, method="HEAD")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
latency = int((time.time() - start) * 1000)
|
||||
return {"reachable": True, "status": resp.status, "latency_ms": latency}
|
||||
except urllib.error.HTTPError as e:
|
||||
# 4xx/5xx means the server responded — portal is reachable
|
||||
return {"reachable": True, "status": e.code, "latency_ms": 0}
|
||||
except Exception as e:
|
||||
return {"reachable": False, "status": "unreachable", "error": str(e)[:100]}
|
||||
|
||||
|
||||
def check_all_portals(dry_run: bool = False) -> List[dict]:
|
||||
"""Check all portals and update status."""
|
||||
portals = load_portals()
|
||||
results = []
|
||||
changes = 0
|
||||
|
||||
for portal in portals:
|
||||
pid = portal["id"]
|
||||
dest = portal.get("destination") or {}
|
||||
url = dest.get("url")
|
||||
old_status = portal.get("status", "unknown")
|
||||
|
||||
# Check health
|
||||
health = check_portal_url(url) if url else {"reachable": True, "status": "no_url"}
|
||||
is_reachable = health.get("reachable", True)
|
||||
|
||||
# Determine new status
|
||||
if old_status == "offline" and is_reachable:
|
||||
new_status = "online"
|
||||
action = "RE-ENABLED"
|
||||
changes += 1
|
||||
elif old_status == "online" and not is_reachable:
|
||||
new_status = "offline"
|
||||
action = "DISABLED"
|
||||
changes += 1
|
||||
elif old_status == "offline" and not is_reachable:
|
||||
new_status = "offline"
|
||||
action = "still offline"
|
||||
else:
|
||||
new_status = old_status
|
||||
action = "ok"
|
||||
|
||||
if not dry_run and new_status != old_status:
|
||||
portal["status"] = new_status
|
||||
|
||||
results.append({
|
||||
"id": pid,
|
||||
"name": portal.get("name", pid),
|
||||
"old_status": old_status,
|
||||
"new_status": new_status,
|
||||
"action": action,
|
||||
"reachable": is_reachable,
|
||||
"latency_ms": health.get("latency_ms", 0),
|
||||
"url": url or "local",
|
||||
})
|
||||
|
||||
if not dry_run and changes > 0:
|
||||
save_portals(portals)
|
||||
|
||||
return results, changes
|
||||
|
||||
|
||||
def print_report(results: List[dict], changes: int):
|
||||
print(f"\n{'='*70}")
|
||||
print(f" PORTAL HEALTH CHECK")
|
||||
print(f" {datetime.now().isoformat()}")
|
||||
print(f"{'='*70}\n")
|
||||
|
||||
print(f" {'Portal':20} {'Status':10} {'Action':15} {'Latency':10} {'URL'}")
|
||||
print(f" {'-'*20} {'-'*10} {'-'*15} {'-'*10} {'-'*30}")
|
||||
|
||||
for r in results:
|
||||
latency = f"{r['latency_ms']}ms" if r['latency_ms'] else "—"
|
||||
url = (r["url"] or "local")[:30]
|
||||
print(f" {r['name']:20} {r['new_status']:10} {r['action']:15} {latency:10} {url}")
|
||||
|
||||
print(f"\n Changes: {changes} portal(s) updated")
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Portal Health Check")
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument("--json", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
results, changes = check_all_portals(dry_run=args.dry_run)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps({"results": results, "changes": changes}, indent=2))
|
||||
else:
|
||||
print_report(results, changes)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
193
tests/load/websocket_load_test.py
Normal file
193
tests/load/websocket_load_test.py
Normal file
@@ -0,0 +1,193 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
WebSocket Load Test — Benchmark concurrent user sessions on the Nexus gateway.
|
||||
|
||||
Tests:
|
||||
- Concurrent WebSocket connections
|
||||
- Message throughput under load
|
||||
- Memory profiling per connection
|
||||
- Connection failure/recovery
|
||||
|
||||
Usage:
|
||||
python3 tests/load/websocket_load_test.py # default (50 users)
|
||||
python3 tests/load/websocket_load_test.py --users 200 # 200 concurrent
|
||||
python3 tests/load/websocket_load_test.py --duration 60 # 60 second test
|
||||
python3 tests/load/websocket_load_test.py --json # JSON output
|
||||
|
||||
Ref: #1505
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import argparse
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
|
||||
WS_URL = os.environ.get("WS_URL", "ws://localhost:8765")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConnectionStats:
|
||||
connected: bool = False
|
||||
connect_time_ms: float = 0
|
||||
messages_sent: int = 0
|
||||
messages_received: int = 0
|
||||
errors: int = 0
|
||||
latencies: List[float] = field(default_factory=list)
|
||||
disconnected: bool = False
|
||||
|
||||
|
||||
async def ws_client(user_id: int, duration: int, stats: ConnectionStats, ws_url: str = WS_URL):
|
||||
"""Single WebSocket client for load testing."""
|
||||
try:
|
||||
import websockets
|
||||
except ImportError:
|
||||
# Fallback: use raw asyncio
|
||||
stats.errors += 1
|
||||
return
|
||||
|
||||
try:
|
||||
start = time.time()
|
||||
async with websockets.connect(ws_url, open_timeout=5) as ws:
|
||||
stats.connect_time_ms = (time.time() - start) * 1000
|
||||
stats.connected = True
|
||||
|
||||
# Send periodic messages for the duration
|
||||
end_time = time.time() + duration
|
||||
msg_count = 0
|
||||
while time.time() < end_time:
|
||||
try:
|
||||
msg_start = time.time()
|
||||
message = json.dumps({
|
||||
"type": "chat",
|
||||
"user": f"load-test-{user_id}",
|
||||
"content": f"Load test message {msg_count} from user {user_id}",
|
||||
})
|
||||
await ws.send(message)
|
||||
stats.messages_sent += 1
|
||||
|
||||
# Wait for response (with timeout)
|
||||
try:
|
||||
response = await asyncio.wait_for(ws.recv(), timeout=5.0)
|
||||
stats.messages_received += 1
|
||||
latency = (time.time() - msg_start) * 1000
|
||||
stats.latencies.append(latency)
|
||||
except asyncio.TimeoutError:
|
||||
stats.errors += 1
|
||||
|
||||
msg_count += 1
|
||||
await asyncio.sleep(0.5) # 2 messages/sec per user
|
||||
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
stats.disconnected = True
|
||||
break
|
||||
except Exception:
|
||||
stats.errors += 1
|
||||
|
||||
except Exception as e:
|
||||
stats.errors += 1
|
||||
if "Connection refused" in str(e) or "connect" in str(e).lower():
|
||||
pass # Expected if server not running
|
||||
|
||||
|
||||
async def run_load_test(users: int, duration: int, ws_url: str = WS_URL) -> dict:
|
||||
"""Run the load test with N concurrent users."""
|
||||
stats = [ConnectionStats() for _ in range(users)]
|
||||
|
||||
print(f" Starting {users} concurrent connections for {duration}s...")
|
||||
start = time.time()
|
||||
|
||||
tasks = [ws_client(i, duration, stats[i], ws_url) for i in range(users)]
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
total_time = time.time() - start
|
||||
|
||||
# Aggregate results
|
||||
connected = sum(1 for s in stats if s.connected)
|
||||
total_sent = sum(s.messages_sent for s in stats)
|
||||
total_received = sum(s.messages_received for s in stats)
|
||||
total_errors = sum(s.errors for s in stats)
|
||||
disconnected = sum(1 for s in stats if s.disconnected)
|
||||
|
||||
all_latencies = []
|
||||
for s in stats:
|
||||
all_latencies.extend(s.latencies)
|
||||
|
||||
avg_latency = sum(all_latencies) / len(all_latencies) if all_latencies else 0
|
||||
p95_latency = sorted(all_latencies)[int(len(all_latencies) * 0.95)] if all_latencies else 0
|
||||
p99_latency = sorted(all_latencies)[int(len(all_latencies) * 0.99)] if all_latencies else 0
|
||||
|
||||
avg_connect_time = sum(s.connect_time_ms for s in stats if s.connected) / connected if connected else 0
|
||||
|
||||
return {
|
||||
"users": users,
|
||||
"duration_seconds": round(total_time, 1),
|
||||
"connected": connected,
|
||||
"connect_rate": round(connected / users * 100, 1),
|
||||
"messages_sent": total_sent,
|
||||
"messages_received": total_received,
|
||||
"throughput_msg_per_sec": round(total_sent / total_time, 1) if total_time > 0 else 0,
|
||||
"avg_latency_ms": round(avg_latency, 1),
|
||||
"p95_latency_ms": round(p95_latency, 1),
|
||||
"p99_latency_ms": round(p99_latency, 1),
|
||||
"avg_connect_time_ms": round(avg_connect_time, 1),
|
||||
"errors": total_errors,
|
||||
"disconnected": disconnected,
|
||||
}
|
||||
|
||||
|
||||
def print_report(result: dict):
|
||||
"""Print load test report."""
|
||||
print(f"\n{'='*60}")
|
||||
print(f" WEBSOCKET LOAD TEST REPORT")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
print(f" Connections: {result['connected']}/{result['users']} ({result['connect_rate']}%)")
|
||||
print(f" Duration: {result['duration_seconds']}s")
|
||||
print(f" Messages sent: {result['messages_sent']}")
|
||||
print(f" Messages recv: {result['messages_received']}")
|
||||
print(f" Throughput: {result['throughput_msg_per_sec']} msg/s")
|
||||
print(f" Avg connect: {result['avg_connect_time_ms']}ms")
|
||||
print()
|
||||
print(f" Latency:")
|
||||
print(f" Avg: {result['avg_latency_ms']}ms")
|
||||
print(f" P95: {result['p95_latency_ms']}ms")
|
||||
print(f" P99: {result['p99_latency_ms']}ms")
|
||||
print()
|
||||
print(f" Errors: {result['errors']}")
|
||||
print(f" Disconnected: {result['disconnected']}")
|
||||
|
||||
# Verdict
|
||||
if result['connect_rate'] >= 95 and result['errors'] == 0:
|
||||
print(f"\n ✅ PASS")
|
||||
elif result['connect_rate'] >= 80:
|
||||
print(f"\n ⚠️ DEGRADED")
|
||||
else:
|
||||
print(f"\n ❌ FAIL")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="WebSocket Load Test")
|
||||
parser.add_argument("--users", type=int, default=50, help="Concurrent users")
|
||||
parser.add_argument("--duration", type=int, default=30, help="Test duration in seconds")
|
||||
parser.add_argument("--json", action="store_true", help="JSON output")
|
||||
parser.add_argument("--url", default=WS_URL, help="WebSocket URL")
|
||||
args = parser.parse_args()
|
||||
|
||||
ws_url = args.url
|
||||
|
||||
print(f"\nWebSocket Load Test — {args.users} users, {args.duration}s\n")
|
||||
|
||||
result = asyncio.run(run_load_test(args.users, args.duration, ws_url))
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
print_report(result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user