This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/tests/spark/test_advisor.py

328 lines
13 KiB
Python

"""Comprehensive tests for spark.advisor module.
Covers all advisory-generation helpers:
- _check_failure_patterns (grouped agent failures)
- _check_agent_performance (top / struggling agents)
- _check_bid_patterns (spread + high average)
- _check_prediction_accuracy (low / high accuracy)
- _check_system_activity (idle / tasks-posted-but-no-completions)
- generate_advisories (integration, sorting, min-events guard)
"""
import json
from spark.advisor import (
_MIN_EVENTS,
Advisory,
_check_agent_performance,
_check_bid_patterns,
_check_failure_patterns,
_check_prediction_accuracy,
_check_system_activity,
generate_advisories,
)
from spark.memory import record_event
# ── Advisory dataclass ─────────────────────────────────────────────────────
class TestAdvisoryDataclass:
def test_defaults(self):
a = Advisory(
category="test",
priority=0.5,
title="T",
detail="D",
suggested_action="A",
)
assert a.subject is None
assert a.evidence_count == 0
def test_all_fields(self):
a = Advisory(
category="c",
priority=0.9,
title="T",
detail="D",
suggested_action="A",
subject="agent-1",
evidence_count=7,
)
assert a.subject == "agent-1"
assert a.evidence_count == 7
# ── _check_failure_patterns ────────────────────────────────────────────────
class TestCheckFailurePatterns:
def test_no_failures_returns_empty(self):
assert _check_failure_patterns() == []
def test_single_failure_not_enough(self):
record_event("task_failed", "once", agent_id="a1", task_id="t1")
assert _check_failure_patterns() == []
def test_two_failures_triggers_advisory(self):
for i in range(2):
record_event("task_failed", f"fail {i}", agent_id="agent-abc", task_id=f"t{i}")
results = _check_failure_patterns()
assert len(results) == 1
assert results[0].category == "failure_prevention"
assert results[0].subject == "agent-abc"
assert results[0].evidence_count == 2
def test_priority_scales_with_count(self):
for i in range(5):
record_event("task_failed", f"fail {i}", agent_id="agent-x", task_id=f"f{i}")
results = _check_failure_patterns()
assert len(results) == 1
assert results[0].priority > 0.5
def test_priority_capped_at_one(self):
for i in range(20):
record_event("task_failed", f"fail {i}", agent_id="agent-y", task_id=f"ff{i}")
results = _check_failure_patterns()
assert results[0].priority <= 1.0
def test_multiple_agents_separate_advisories(self):
for i in range(3):
record_event("task_failed", f"a fail {i}", agent_id="agent-a", task_id=f"a{i}")
record_event("task_failed", f"b fail {i}", agent_id="agent-b", task_id=f"b{i}")
results = _check_failure_patterns()
assert len(results) == 2
subjects = {r.subject for r in results}
assert subjects == {"agent-a", "agent-b"}
def test_events_without_agent_id_skipped(self):
for i in range(3):
record_event("task_failed", f"no-agent {i}", task_id=f"na{i}")
assert _check_failure_patterns() == []
# ── _check_agent_performance ───────────────────────────────────────────────
class TestCheckAgentPerformance:
def test_no_events_returns_empty(self):
assert _check_agent_performance() == []
def test_too_few_tasks_skipped(self):
record_event("task_completed", "done", agent_id="agent-1", task_id="t1")
assert _check_agent_performance() == []
def test_high_performer_detected(self):
for i in range(4):
record_event("task_completed", f"done {i}", agent_id="agent-star", task_id=f"s{i}")
results = _check_agent_performance()
perf = [r for r in results if r.category == "agent_performance"]
assert len(perf) == 1
assert "excels" in perf[0].title
assert perf[0].subject == "agent-star"
def test_struggling_agent_detected(self):
# 1 success, 4 failures = 20% rate
record_event("task_completed", "ok", agent_id="agent-bad", task_id="ok1")
for i in range(4):
record_event("task_failed", f"nope {i}", agent_id="agent-bad", task_id=f"bad{i}")
results = _check_agent_performance()
struggling = [r for r in results if "struggling" in r.title]
assert len(struggling) == 1
assert struggling[0].priority > 0.5
def test_middling_agent_no_advisory(self):
# 50% success rate — neither excelling nor struggling
for i in range(3):
record_event("task_completed", f"ok {i}", agent_id="agent-mid", task_id=f"m{i}")
for i in range(3):
record_event("task_failed", f"nope {i}", agent_id="agent-mid", task_id=f"mf{i}")
results = _check_agent_performance()
mid_advisories = [r for r in results if r.subject == "agent-mid"]
assert mid_advisories == []
def test_events_without_agent_id_skipped(self):
for i in range(5):
record_event("task_completed", f"done {i}", task_id=f"no-agent-{i}")
assert _check_agent_performance() == []
# ── _check_bid_patterns ────────────────────────────────────────────────────
class TestCheckBidPatterns:
def _record_bids(self, amounts):
for i, sats in enumerate(amounts):
record_event(
"bid_submitted",
f"bid {i}",
agent_id=f"a{i}",
task_id=f"bt{i}",
data=json.dumps({"bid_sats": sats}),
)
def test_too_few_bids_returns_empty(self):
self._record_bids([10, 20, 30])
assert _check_bid_patterns() == []
def test_wide_spread_detected(self):
# avg=50, spread=90 > 50*1.5=75
self._record_bids([5, 10, 50, 90, 95])
results = _check_bid_patterns()
spread_advisories = [r for r in results if "spread" in r.title.lower()]
assert len(spread_advisories) == 1
def test_high_average_detected(self):
self._record_bids([80, 85, 90, 95, 100])
results = _check_bid_patterns()
high_avg = [r for r in results if "High average" in r.title]
assert len(high_avg) == 1
def test_normal_bids_no_advisory(self):
# Tight spread, low average
self._record_bids([30, 32, 28, 31, 29])
results = _check_bid_patterns()
assert results == []
def test_invalid_json_data_skipped(self):
for i in range(6):
record_event(
"bid_submitted",
f"bid {i}",
agent_id=f"a{i}",
task_id=f"inv{i}",
data="not-json",
)
results = _check_bid_patterns()
assert results == []
def test_zero_bid_sats_skipped(self):
for i in range(6):
record_event(
"bid_submitted",
f"bid {i}",
data=json.dumps({"bid_sats": 0}),
)
assert _check_bid_patterns() == []
def test_both_spread_and_high_avg(self):
# Wide spread AND high average: avg=82, spread=150 > 82*1.5=123
self._record_bids([5, 80, 90, 100, 155])
results = _check_bid_patterns()
assert len(results) == 2
# ── _check_prediction_accuracy ─────────────────────────────────────────────
class TestCheckPredictionAccuracy:
def test_too_few_evaluations(self):
assert _check_prediction_accuracy() == []
def test_low_accuracy_advisory(self):
from spark.eidos import evaluate_prediction, predict_task_outcome
for i in range(4):
predict_task_outcome(f"pa-{i}", "task", ["agent-a"])
evaluate_prediction(f"pa-{i}", "agent-wrong", task_succeeded=False, winning_bid=999)
results = _check_prediction_accuracy()
low = [r for r in results if "Low prediction" in r.title]
assert len(low) == 1
assert low[0].priority > 0.5
def test_high_accuracy_advisory(self):
from spark.eidos import evaluate_prediction, predict_task_outcome
for i in range(4):
predict_task_outcome(f"ph-{i}", "task", ["agent-a"])
evaluate_prediction(f"ph-{i}", "agent-a", task_succeeded=True, winning_bid=30)
results = _check_prediction_accuracy()
high = [r for r in results if "Strong prediction" in r.title]
assert len(high) == 1
def test_middling_accuracy_no_advisory(self):
from spark.eidos import evaluate_prediction, predict_task_outcome
# Mix of correct and incorrect to get ~0.5 accuracy
for i in range(3):
predict_task_outcome(f"pm-{i}", "task", ["agent-a"])
evaluate_prediction(f"pm-{i}", "agent-a", task_succeeded=True, winning_bid=30)
for i in range(3):
predict_task_outcome(f"pmx-{i}", "task", ["agent-a"])
evaluate_prediction(f"pmx-{i}", "agent-wrong", task_succeeded=False, winning_bid=999)
results = _check_prediction_accuracy()
# avg should be middling — neither low nor high advisory
low = [r for r in results if "Low" in r.title]
high = [r for r in results if "Strong" in r.title]
# At least one side should be empty (depends on exact accuracy)
assert not (low and high)
# ── _check_system_activity ─────────────────────────────────────────────────
class TestCheckSystemActivity:
def test_no_events_idle_advisory(self):
results = _check_system_activity()
assert len(results) == 1
assert "No swarm activity" in results[0].title
def test_has_events_no_idle_advisory(self):
record_event("task_completed", "done", task_id="t1")
results = _check_system_activity()
idle = [r for r in results if "No swarm activity" in r.title]
assert idle == []
def test_tasks_posted_but_none_completing(self):
for i in range(5):
record_event("task_posted", f"posted {i}", task_id=f"tp{i}")
results = _check_system_activity()
stalled = [r for r in results if "none completing" in r.title.lower()]
assert len(stalled) == 1
assert stalled[0].evidence_count >= 4
def test_posts_with_completions_no_stalled_advisory(self):
for i in range(5):
record_event("task_posted", f"posted {i}", task_id=f"tpx{i}")
record_event("task_completed", "done", task_id="tpx0")
results = _check_system_activity()
stalled = [r for r in results if "none completing" in r.title.lower()]
assert stalled == []
# ── generate_advisories (integration) ──────────────────────────────────────
class TestGenerateAdvisories:
def test_below_min_events_returns_insufficient(self):
advisories = generate_advisories()
assert len(advisories) >= 1
assert advisories[0].title == "Insufficient data"
assert advisories[0].evidence_count == 0
def test_exactly_at_min_events_proceeds(self):
for i in range(_MIN_EVENTS):
record_event("task_posted", f"ev {i}", task_id=f"min{i}")
advisories = generate_advisories()
insufficient = [a for a in advisories if a.title == "Insufficient data"]
assert insufficient == []
def test_results_sorted_by_priority_descending(self):
for i in range(5):
record_event("task_posted", f"posted {i}", task_id=f"sp{i}")
for i in range(3):
record_event("task_failed", f"fail {i}", agent_id="agent-fail", task_id=f"sf{i}")
advisories = generate_advisories()
if len(advisories) >= 2:
for i in range(len(advisories) - 1):
assert advisories[i].priority >= advisories[i + 1].priority
def test_multiple_categories_produced(self):
# Create failures + posted-no-completions
for i in range(5):
record_event("task_failed", f"fail {i}", agent_id="agent-bad", task_id=f"mf{i}")
for i in range(5):
record_event("task_posted", f"posted {i}", task_id=f"mp{i}")
advisories = generate_advisories()
categories = {a.category for a in advisories}
assert len(categories) >= 2