forked from Rockachopa/Timmy-time-dashboard
328 lines
13 KiB
Python
328 lines
13 KiB
Python
"""Comprehensive tests for spark.advisor module.
|
|
|
|
Covers all advisory-generation helpers:
|
|
- _check_failure_patterns (grouped agent failures)
|
|
- _check_agent_performance (top / struggling agents)
|
|
- _check_bid_patterns (spread + high average)
|
|
- _check_prediction_accuracy (low / high accuracy)
|
|
- _check_system_activity (idle / tasks-posted-but-no-completions)
|
|
- generate_advisories (integration, sorting, min-events guard)
|
|
"""
|
|
|
|
import json
|
|
|
|
from spark.advisor import (
|
|
_MIN_EVENTS,
|
|
Advisory,
|
|
_check_agent_performance,
|
|
_check_bid_patterns,
|
|
_check_failure_patterns,
|
|
_check_prediction_accuracy,
|
|
_check_system_activity,
|
|
generate_advisories,
|
|
)
|
|
from spark.memory import record_event
|
|
|
|
# ── Advisory dataclass ─────────────────────────────────────────────────────
|
|
|
|
|
|
class TestAdvisoryDataclass:
|
|
def test_defaults(self):
|
|
a = Advisory(
|
|
category="test",
|
|
priority=0.5,
|
|
title="T",
|
|
detail="D",
|
|
suggested_action="A",
|
|
)
|
|
assert a.subject is None
|
|
assert a.evidence_count == 0
|
|
|
|
def test_all_fields(self):
|
|
a = Advisory(
|
|
category="c",
|
|
priority=0.9,
|
|
title="T",
|
|
detail="D",
|
|
suggested_action="A",
|
|
subject="agent-1",
|
|
evidence_count=7,
|
|
)
|
|
assert a.subject == "agent-1"
|
|
assert a.evidence_count == 7
|
|
|
|
|
|
# ── _check_failure_patterns ────────────────────────────────────────────────
|
|
|
|
|
|
class TestCheckFailurePatterns:
|
|
def test_no_failures_returns_empty(self):
|
|
assert _check_failure_patterns() == []
|
|
|
|
def test_single_failure_not_enough(self):
|
|
record_event("task_failed", "once", agent_id="a1", task_id="t1")
|
|
assert _check_failure_patterns() == []
|
|
|
|
def test_two_failures_triggers_advisory(self):
|
|
for i in range(2):
|
|
record_event("task_failed", f"fail {i}", agent_id="agent-abc", task_id=f"t{i}")
|
|
results = _check_failure_patterns()
|
|
assert len(results) == 1
|
|
assert results[0].category == "failure_prevention"
|
|
assert results[0].subject == "agent-abc"
|
|
assert results[0].evidence_count == 2
|
|
|
|
def test_priority_scales_with_count(self):
|
|
for i in range(5):
|
|
record_event("task_failed", f"fail {i}", agent_id="agent-x", task_id=f"f{i}")
|
|
results = _check_failure_patterns()
|
|
assert len(results) == 1
|
|
assert results[0].priority > 0.5
|
|
|
|
def test_priority_capped_at_one(self):
|
|
for i in range(20):
|
|
record_event("task_failed", f"fail {i}", agent_id="agent-y", task_id=f"ff{i}")
|
|
results = _check_failure_patterns()
|
|
assert results[0].priority <= 1.0
|
|
|
|
def test_multiple_agents_separate_advisories(self):
|
|
for i in range(3):
|
|
record_event("task_failed", f"a fail {i}", agent_id="agent-a", task_id=f"a{i}")
|
|
record_event("task_failed", f"b fail {i}", agent_id="agent-b", task_id=f"b{i}")
|
|
results = _check_failure_patterns()
|
|
assert len(results) == 2
|
|
subjects = {r.subject for r in results}
|
|
assert subjects == {"agent-a", "agent-b"}
|
|
|
|
def test_events_without_agent_id_skipped(self):
|
|
for i in range(3):
|
|
record_event("task_failed", f"no-agent {i}", task_id=f"na{i}")
|
|
assert _check_failure_patterns() == []
|
|
|
|
|
|
# ── _check_agent_performance ───────────────────────────────────────────────
|
|
|
|
|
|
class TestCheckAgentPerformance:
|
|
def test_no_events_returns_empty(self):
|
|
assert _check_agent_performance() == []
|
|
|
|
def test_too_few_tasks_skipped(self):
|
|
record_event("task_completed", "done", agent_id="agent-1", task_id="t1")
|
|
assert _check_agent_performance() == []
|
|
|
|
def test_high_performer_detected(self):
|
|
for i in range(4):
|
|
record_event("task_completed", f"done {i}", agent_id="agent-star", task_id=f"s{i}")
|
|
results = _check_agent_performance()
|
|
perf = [r for r in results if r.category == "agent_performance"]
|
|
assert len(perf) == 1
|
|
assert "excels" in perf[0].title
|
|
assert perf[0].subject == "agent-star"
|
|
|
|
def test_struggling_agent_detected(self):
|
|
# 1 success, 4 failures = 20% rate
|
|
record_event("task_completed", "ok", agent_id="agent-bad", task_id="ok1")
|
|
for i in range(4):
|
|
record_event("task_failed", f"nope {i}", agent_id="agent-bad", task_id=f"bad{i}")
|
|
results = _check_agent_performance()
|
|
struggling = [r for r in results if "struggling" in r.title]
|
|
assert len(struggling) == 1
|
|
assert struggling[0].priority > 0.5
|
|
|
|
def test_middling_agent_no_advisory(self):
|
|
# 50% success rate — neither excelling nor struggling
|
|
for i in range(3):
|
|
record_event("task_completed", f"ok {i}", agent_id="agent-mid", task_id=f"m{i}")
|
|
for i in range(3):
|
|
record_event("task_failed", f"nope {i}", agent_id="agent-mid", task_id=f"mf{i}")
|
|
results = _check_agent_performance()
|
|
mid_advisories = [r for r in results if r.subject == "agent-mid"]
|
|
assert mid_advisories == []
|
|
|
|
def test_events_without_agent_id_skipped(self):
|
|
for i in range(5):
|
|
record_event("task_completed", f"done {i}", task_id=f"no-agent-{i}")
|
|
assert _check_agent_performance() == []
|
|
|
|
|
|
# ── _check_bid_patterns ────────────────────────────────────────────────────
|
|
|
|
|
|
class TestCheckBidPatterns:
|
|
def _record_bids(self, amounts):
|
|
for i, sats in enumerate(amounts):
|
|
record_event(
|
|
"bid_submitted",
|
|
f"bid {i}",
|
|
agent_id=f"a{i}",
|
|
task_id=f"bt{i}",
|
|
data=json.dumps({"bid_sats": sats}),
|
|
)
|
|
|
|
def test_too_few_bids_returns_empty(self):
|
|
self._record_bids([10, 20, 30])
|
|
assert _check_bid_patterns() == []
|
|
|
|
def test_wide_spread_detected(self):
|
|
# avg=50, spread=90 > 50*1.5=75
|
|
self._record_bids([5, 10, 50, 90, 95])
|
|
results = _check_bid_patterns()
|
|
spread_advisories = [r for r in results if "spread" in r.title.lower()]
|
|
assert len(spread_advisories) == 1
|
|
|
|
def test_high_average_detected(self):
|
|
self._record_bids([80, 85, 90, 95, 100])
|
|
results = _check_bid_patterns()
|
|
high_avg = [r for r in results if "High average" in r.title]
|
|
assert len(high_avg) == 1
|
|
|
|
def test_normal_bids_no_advisory(self):
|
|
# Tight spread, low average
|
|
self._record_bids([30, 32, 28, 31, 29])
|
|
results = _check_bid_patterns()
|
|
assert results == []
|
|
|
|
def test_invalid_json_data_skipped(self):
|
|
for i in range(6):
|
|
record_event(
|
|
"bid_submitted",
|
|
f"bid {i}",
|
|
agent_id=f"a{i}",
|
|
task_id=f"inv{i}",
|
|
data="not-json",
|
|
)
|
|
results = _check_bid_patterns()
|
|
assert results == []
|
|
|
|
def test_zero_bid_sats_skipped(self):
|
|
for i in range(6):
|
|
record_event(
|
|
"bid_submitted",
|
|
f"bid {i}",
|
|
data=json.dumps({"bid_sats": 0}),
|
|
)
|
|
assert _check_bid_patterns() == []
|
|
|
|
def test_both_spread_and_high_avg(self):
|
|
# Wide spread AND high average: avg=82, spread=150 > 82*1.5=123
|
|
self._record_bids([5, 80, 90, 100, 155])
|
|
results = _check_bid_patterns()
|
|
assert len(results) == 2
|
|
|
|
|
|
# ── _check_prediction_accuracy ─────────────────────────────────────────────
|
|
|
|
|
|
class TestCheckPredictionAccuracy:
|
|
def test_too_few_evaluations(self):
|
|
assert _check_prediction_accuracy() == []
|
|
|
|
def test_low_accuracy_advisory(self):
|
|
from spark.eidos import evaluate_prediction, predict_task_outcome
|
|
|
|
for i in range(4):
|
|
predict_task_outcome(f"pa-{i}", "task", ["agent-a"])
|
|
evaluate_prediction(f"pa-{i}", "agent-wrong", task_succeeded=False, winning_bid=999)
|
|
results = _check_prediction_accuracy()
|
|
low = [r for r in results if "Low prediction" in r.title]
|
|
assert len(low) == 1
|
|
assert low[0].priority > 0.5
|
|
|
|
def test_high_accuracy_advisory(self):
|
|
from spark.eidos import evaluate_prediction, predict_task_outcome
|
|
|
|
for i in range(4):
|
|
predict_task_outcome(f"ph-{i}", "task", ["agent-a"])
|
|
evaluate_prediction(f"ph-{i}", "agent-a", task_succeeded=True, winning_bid=30)
|
|
results = _check_prediction_accuracy()
|
|
high = [r for r in results if "Strong prediction" in r.title]
|
|
assert len(high) == 1
|
|
|
|
def test_middling_accuracy_no_advisory(self):
|
|
from spark.eidos import evaluate_prediction, predict_task_outcome
|
|
|
|
# Mix of correct and incorrect to get ~0.5 accuracy
|
|
for i in range(3):
|
|
predict_task_outcome(f"pm-{i}", "task", ["agent-a"])
|
|
evaluate_prediction(f"pm-{i}", "agent-a", task_succeeded=True, winning_bid=30)
|
|
for i in range(3):
|
|
predict_task_outcome(f"pmx-{i}", "task", ["agent-a"])
|
|
evaluate_prediction(f"pmx-{i}", "agent-wrong", task_succeeded=False, winning_bid=999)
|
|
results = _check_prediction_accuracy()
|
|
# avg should be middling — neither low nor high advisory
|
|
low = [r for r in results if "Low" in r.title]
|
|
high = [r for r in results if "Strong" in r.title]
|
|
# At least one side should be empty (depends on exact accuracy)
|
|
assert not (low and high)
|
|
|
|
|
|
# ── _check_system_activity ─────────────────────────────────────────────────
|
|
|
|
|
|
class TestCheckSystemActivity:
|
|
def test_no_events_idle_advisory(self):
|
|
results = _check_system_activity()
|
|
assert len(results) == 1
|
|
assert "No swarm activity" in results[0].title
|
|
|
|
def test_has_events_no_idle_advisory(self):
|
|
record_event("task_completed", "done", task_id="t1")
|
|
results = _check_system_activity()
|
|
idle = [r for r in results if "No swarm activity" in r.title]
|
|
assert idle == []
|
|
|
|
def test_tasks_posted_but_none_completing(self):
|
|
for i in range(5):
|
|
record_event("task_posted", f"posted {i}", task_id=f"tp{i}")
|
|
results = _check_system_activity()
|
|
stalled = [r for r in results if "none completing" in r.title.lower()]
|
|
assert len(stalled) == 1
|
|
assert stalled[0].evidence_count >= 4
|
|
|
|
def test_posts_with_completions_no_stalled_advisory(self):
|
|
for i in range(5):
|
|
record_event("task_posted", f"posted {i}", task_id=f"tpx{i}")
|
|
record_event("task_completed", "done", task_id="tpx0")
|
|
results = _check_system_activity()
|
|
stalled = [r for r in results if "none completing" in r.title.lower()]
|
|
assert stalled == []
|
|
|
|
|
|
# ── generate_advisories (integration) ──────────────────────────────────────
|
|
|
|
|
|
class TestGenerateAdvisories:
|
|
def test_below_min_events_returns_insufficient(self):
|
|
advisories = generate_advisories()
|
|
assert len(advisories) >= 1
|
|
assert advisories[0].title == "Insufficient data"
|
|
assert advisories[0].evidence_count == 0
|
|
|
|
def test_exactly_at_min_events_proceeds(self):
|
|
for i in range(_MIN_EVENTS):
|
|
record_event("task_posted", f"ev {i}", task_id=f"min{i}")
|
|
advisories = generate_advisories()
|
|
insufficient = [a for a in advisories if a.title == "Insufficient data"]
|
|
assert insufficient == []
|
|
|
|
def test_results_sorted_by_priority_descending(self):
|
|
for i in range(5):
|
|
record_event("task_posted", f"posted {i}", task_id=f"sp{i}")
|
|
for i in range(3):
|
|
record_event("task_failed", f"fail {i}", agent_id="agent-fail", task_id=f"sf{i}")
|
|
advisories = generate_advisories()
|
|
if len(advisories) >= 2:
|
|
for i in range(len(advisories) - 1):
|
|
assert advisories[i].priority >= advisories[i + 1].priority
|
|
|
|
def test_multiple_categories_produced(self):
|
|
# Create failures + posted-no-completions
|
|
for i in range(5):
|
|
record_event("task_failed", f"fail {i}", agent_id="agent-bad", task_id=f"mf{i}")
|
|
for i in range(5):
|
|
record_event("task_posted", f"posted {i}", task_id=f"mp{i}")
|
|
advisories = generate_advisories()
|
|
categories = {a.category for a in advisories}
|
|
assert len(categories) >= 2
|