"""Comprehensive tests for spark.advisor module. Covers all advisory-generation helpers: - _check_failure_patterns (grouped agent failures) - _check_agent_performance (top / struggling agents) - _check_bid_patterns (spread + high average) - _check_prediction_accuracy (low / high accuracy) - _check_system_activity (idle / tasks-posted-but-no-completions) - generate_advisories (integration, sorting, min-events guard) """ import json from spark.advisor import ( _MIN_EVENTS, Advisory, _check_agent_performance, _check_bid_patterns, _check_failure_patterns, _check_prediction_accuracy, _check_system_activity, generate_advisories, ) from spark.memory import record_event # ── Advisory dataclass ───────────────────────────────────────────────────── class TestAdvisoryDataclass: def test_defaults(self): a = Advisory( category="test", priority=0.5, title="T", detail="D", suggested_action="A", ) assert a.subject is None assert a.evidence_count == 0 def test_all_fields(self): a = Advisory( category="c", priority=0.9, title="T", detail="D", suggested_action="A", subject="agent-1", evidence_count=7, ) assert a.subject == "agent-1" assert a.evidence_count == 7 # ── _check_failure_patterns ──────────────────────────────────────────────── class TestCheckFailurePatterns: def test_no_failures_returns_empty(self): assert _check_failure_patterns() == [] def test_single_failure_not_enough(self): record_event("task_failed", "once", agent_id="a1", task_id="t1") assert _check_failure_patterns() == [] def test_two_failures_triggers_advisory(self): for i in range(2): record_event("task_failed", f"fail {i}", agent_id="agent-abc", task_id=f"t{i}") results = _check_failure_patterns() assert len(results) == 1 assert results[0].category == "failure_prevention" assert results[0].subject == "agent-abc" assert results[0].evidence_count == 2 def test_priority_scales_with_count(self): for i in range(5): record_event("task_failed", f"fail {i}", agent_id="agent-x", task_id=f"f{i}") results = _check_failure_patterns() assert len(results) == 1 assert results[0].priority > 0.5 def test_priority_capped_at_one(self): for i in range(20): record_event("task_failed", f"fail {i}", agent_id="agent-y", task_id=f"ff{i}") results = _check_failure_patterns() assert results[0].priority <= 1.0 def test_multiple_agents_separate_advisories(self): for i in range(3): record_event("task_failed", f"a fail {i}", agent_id="agent-a", task_id=f"a{i}") record_event("task_failed", f"b fail {i}", agent_id="agent-b", task_id=f"b{i}") results = _check_failure_patterns() assert len(results) == 2 subjects = {r.subject for r in results} assert subjects == {"agent-a", "agent-b"} def test_events_without_agent_id_skipped(self): for i in range(3): record_event("task_failed", f"no-agent {i}", task_id=f"na{i}") assert _check_failure_patterns() == [] # ── _check_agent_performance ─────────────────────────────────────────────── class TestCheckAgentPerformance: def test_no_events_returns_empty(self): assert _check_agent_performance() == [] def test_too_few_tasks_skipped(self): record_event("task_completed", "done", agent_id="agent-1", task_id="t1") assert _check_agent_performance() == [] def test_high_performer_detected(self): for i in range(4): record_event("task_completed", f"done {i}", agent_id="agent-star", task_id=f"s{i}") results = _check_agent_performance() perf = [r for r in results if r.category == "agent_performance"] assert len(perf) == 1 assert "excels" in perf[0].title assert perf[0].subject == "agent-star" def test_struggling_agent_detected(self): # 1 success, 4 failures = 20% rate record_event("task_completed", "ok", agent_id="agent-bad", task_id="ok1") for i in range(4): record_event("task_failed", f"nope {i}", agent_id="agent-bad", task_id=f"bad{i}") results = _check_agent_performance() struggling = [r for r in results if "struggling" in r.title] assert len(struggling) == 1 assert struggling[0].priority > 0.5 def test_middling_agent_no_advisory(self): # 50% success rate — neither excelling nor struggling for i in range(3): record_event("task_completed", f"ok {i}", agent_id="agent-mid", task_id=f"m{i}") for i in range(3): record_event("task_failed", f"nope {i}", agent_id="agent-mid", task_id=f"mf{i}") results = _check_agent_performance() mid_advisories = [r for r in results if r.subject == "agent-mid"] assert mid_advisories == [] def test_events_without_agent_id_skipped(self): for i in range(5): record_event("task_completed", f"done {i}", task_id=f"no-agent-{i}") assert _check_agent_performance() == [] # ── _check_bid_patterns ──────────────────────────────────────────────────── class TestCheckBidPatterns: def _record_bids(self, amounts): for i, sats in enumerate(amounts): record_event( "bid_submitted", f"bid {i}", agent_id=f"a{i}", task_id=f"bt{i}", data=json.dumps({"bid_sats": sats}), ) def test_too_few_bids_returns_empty(self): self._record_bids([10, 20, 30]) assert _check_bid_patterns() == [] def test_wide_spread_detected(self): # avg=50, spread=90 > 50*1.5=75 self._record_bids([5, 10, 50, 90, 95]) results = _check_bid_patterns() spread_advisories = [r for r in results if "spread" in r.title.lower()] assert len(spread_advisories) == 1 def test_high_average_detected(self): self._record_bids([80, 85, 90, 95, 100]) results = _check_bid_patterns() high_avg = [r for r in results if "High average" in r.title] assert len(high_avg) == 1 def test_normal_bids_no_advisory(self): # Tight spread, low average self._record_bids([30, 32, 28, 31, 29]) results = _check_bid_patterns() assert results == [] def test_invalid_json_data_skipped(self): for i in range(6): record_event( "bid_submitted", f"bid {i}", agent_id=f"a{i}", task_id=f"inv{i}", data="not-json", ) results = _check_bid_patterns() assert results == [] def test_zero_bid_sats_skipped(self): for i in range(6): record_event( "bid_submitted", f"bid {i}", data=json.dumps({"bid_sats": 0}), ) assert _check_bid_patterns() == [] def test_both_spread_and_high_avg(self): # Wide spread AND high average: avg=82, spread=150 > 82*1.5=123 self._record_bids([5, 80, 90, 100, 155]) results = _check_bid_patterns() assert len(results) == 2 # ── _check_prediction_accuracy ───────────────────────────────────────────── class TestCheckPredictionAccuracy: def test_too_few_evaluations(self): assert _check_prediction_accuracy() == [] def test_low_accuracy_advisory(self): from spark.eidos import evaluate_prediction, predict_task_outcome for i in range(4): predict_task_outcome(f"pa-{i}", "task", ["agent-a"]) evaluate_prediction(f"pa-{i}", "agent-wrong", task_succeeded=False, winning_bid=999) results = _check_prediction_accuracy() low = [r for r in results if "Low prediction" in r.title] assert len(low) == 1 assert low[0].priority > 0.5 def test_high_accuracy_advisory(self): from spark.eidos import evaluate_prediction, predict_task_outcome for i in range(4): predict_task_outcome(f"ph-{i}", "task", ["agent-a"]) evaluate_prediction(f"ph-{i}", "agent-a", task_succeeded=True, winning_bid=30) results = _check_prediction_accuracy() high = [r for r in results if "Strong prediction" in r.title] assert len(high) == 1 def test_middling_accuracy_no_advisory(self): from spark.eidos import evaluate_prediction, predict_task_outcome # Mix of correct and incorrect to get ~0.5 accuracy for i in range(3): predict_task_outcome(f"pm-{i}", "task", ["agent-a"]) evaluate_prediction(f"pm-{i}", "agent-a", task_succeeded=True, winning_bid=30) for i in range(3): predict_task_outcome(f"pmx-{i}", "task", ["agent-a"]) evaluate_prediction(f"pmx-{i}", "agent-wrong", task_succeeded=False, winning_bid=999) results = _check_prediction_accuracy() # avg should be middling — neither low nor high advisory low = [r for r in results if "Low" in r.title] high = [r for r in results if "Strong" in r.title] # At least one side should be empty (depends on exact accuracy) assert not (low and high) # ── _check_system_activity ───────────────────────────────────────────────── class TestCheckSystemActivity: def test_no_events_idle_advisory(self): results = _check_system_activity() assert len(results) == 1 assert "No swarm activity" in results[0].title def test_has_events_no_idle_advisory(self): record_event("task_completed", "done", task_id="t1") results = _check_system_activity() idle = [r for r in results if "No swarm activity" in r.title] assert idle == [] def test_tasks_posted_but_none_completing(self): for i in range(5): record_event("task_posted", f"posted {i}", task_id=f"tp{i}") results = _check_system_activity() stalled = [r for r in results if "none completing" in r.title.lower()] assert len(stalled) == 1 assert stalled[0].evidence_count >= 4 def test_posts_with_completions_no_stalled_advisory(self): for i in range(5): record_event("task_posted", f"posted {i}", task_id=f"tpx{i}") record_event("task_completed", "done", task_id="tpx0") results = _check_system_activity() stalled = [r for r in results if "none completing" in r.title.lower()] assert stalled == [] # ── generate_advisories (integration) ────────────────────────────────────── class TestGenerateAdvisories: def test_below_min_events_returns_insufficient(self): advisories = generate_advisories() assert len(advisories) >= 1 assert advisories[0].title == "Insufficient data" assert advisories[0].evidence_count == 0 def test_exactly_at_min_events_proceeds(self): for i in range(_MIN_EVENTS): record_event("task_posted", f"ev {i}", task_id=f"min{i}") advisories = generate_advisories() insufficient = [a for a in advisories if a.title == "Insufficient data"] assert insufficient == [] def test_results_sorted_by_priority_descending(self): for i in range(5): record_event("task_posted", f"posted {i}", task_id=f"sp{i}") for i in range(3): record_event("task_failed", f"fail {i}", agent_id="agent-fail", task_id=f"sf{i}") advisories = generate_advisories() if len(advisories) >= 2: for i in range(len(advisories) - 1): assert advisories[i].priority >= advisories[i + 1].priority def test_multiple_categories_produced(self): # Create failures + posted-no-completions for i in range(5): record_event("task_failed", f"fail {i}", agent_id="agent-bad", task_id=f"mf{i}") for i in range(5): record_event("task_posted", f"posted {i}", task_id=f"mp{i}") advisories = generate_advisories() categories = {a.category for a in advisories} assert len(categories) >= 2