"""Tests for agent scorecard functionality.""" from datetime import UTC, datetime, timedelta from unittest.mock import MagicMock, patch from dashboard.services.scorecard_service import ( AgentMetrics, PeriodType, ScorecardSummary, _aggregate_metrics, _detect_patterns, _extract_actor_from_event, _generate_narrative_bullets, _get_period_bounds, _is_tracked_agent, _query_token_transactions, generate_all_scorecards, generate_scorecard, get_tracked_agents, ) from infrastructure.events.bus import Event class TestPeriodBounds: """Test period boundary calculations.""" def test_daily_period_bounds(self): """Test daily period returns correct 24-hour window.""" reference = datetime(2026, 3, 21, 12, 30, 45, tzinfo=UTC) start, end = _get_period_bounds(PeriodType.daily, reference) assert end == datetime(2026, 3, 21, 0, 0, 0, tzinfo=UTC) assert start == datetime(2026, 3, 20, 0, 0, 0, tzinfo=UTC) assert (end - start) == timedelta(days=1) def test_weekly_period_bounds(self): """Test weekly period returns correct 7-day window.""" reference = datetime(2026, 3, 21, 12, 30, 45, tzinfo=UTC) start, end = _get_period_bounds(PeriodType.weekly, reference) assert end == datetime(2026, 3, 21, 0, 0, 0, tzinfo=UTC) assert start == datetime(2026, 3, 14, 0, 0, 0, tzinfo=UTC) assert (end - start) == timedelta(days=7) def test_default_reference_date(self): """Test default reference date uses current time.""" start, end = _get_period_bounds(PeriodType.daily) now = datetime.now(UTC) # End should be start of current day (midnight) expected_end = now.replace(hour=0, minute=0, second=0, microsecond=0) assert end == expected_end # Start should be 24 hours before end assert (end - start) == timedelta(days=1) class TestTrackedAgents: """Test agent tracking functions.""" def test_get_tracked_agents(self): """Test get_tracked_agents returns sorted list.""" agents = get_tracked_agents() assert isinstance(agents, list) assert "kimi" in agents assert "claude" in agents assert "gemini" in agents assert "hermes" in agents assert "manus" in agents assert agents == sorted(agents) def test_is_tracked_agent_true(self): """Test _is_tracked_agent returns True for tracked agents.""" assert _is_tracked_agent("kimi") is True assert _is_tracked_agent("KIMI") is True # case insensitive assert _is_tracked_agent("claude") is True assert _is_tracked_agent("hermes") is True def test_is_tracked_agent_false(self): """Test _is_tracked_agent returns False for untracked agents.""" assert _is_tracked_agent("unknown") is False assert _is_tracked_agent("rockachopa") is False assert _is_tracked_agent("") is False class TestExtractActor: """Test actor extraction from events.""" def test_extract_from_actor_field(self): """Test extraction from data.actor field.""" event = Event(type="test", source="system", data={"actor": "kimi"}) assert _extract_actor_from_event(event) == "kimi" def test_extract_from_agent_id_field(self): """Test extraction from data.agent_id field.""" event = Event(type="test", source="system", data={"agent_id": "claude"}) assert _extract_actor_from_event(event) == "claude" def test_extract_from_source_fallback(self): """Test fallback to event.source.""" event = Event(type="test", source="gemini", data={}) assert _extract_actor_from_event(event) == "gemini" def test_actor_priority_over_agent_id(self): """Test actor field takes priority over agent_id.""" event = Event(type="test", source="system", data={"actor": "kimi", "agent_id": "claude"}) assert _extract_actor_from_event(event) == "kimi" class TestAggregateMetrics: """Test metrics aggregation from events.""" def test_empty_events(self): """Test aggregation with no events returns empty dict.""" result = _aggregate_metrics([]) assert result == {} def test_push_event_aggregation(self): """Test push events aggregate commits correctly.""" events = [ Event(type="gitea.push", source="gitea", data={"actor": "kimi", "num_commits": 3}), Event(type="gitea.push", source="gitea", data={"actor": "kimi", "num_commits": 2}), ] result = _aggregate_metrics(events) assert "kimi" in result assert result["kimi"].commits == 5 def test_issue_opened_aggregation(self): """Test issue opened events aggregate correctly.""" events = [ Event( type="gitea.issue.opened", source="gitea", data={"actor": "claude", "issue_number": 100}, ), Event( type="gitea.issue.opened", source="gitea", data={"actor": "claude", "issue_number": 101}, ), ] result = _aggregate_metrics(events) assert "claude" in result assert len(result["claude"].issues_touched) == 2 assert 100 in result["claude"].issues_touched assert 101 in result["claude"].issues_touched def test_comment_aggregation(self): """Test comment events aggregate correctly.""" events = [ Event( type="gitea.issue.comment", source="gitea", data={"actor": "gemini", "issue_number": 100}, ), Event( type="gitea.issue.comment", source="gitea", data={"actor": "gemini", "issue_number": 101}, ), ] result = _aggregate_metrics(events) assert "gemini" in result assert result["gemini"].comments == 2 assert len(result["gemini"].issues_touched) == 2 # Comments touch issues too def test_pr_events_aggregation(self): """Test PR open and merge events aggregate correctly.""" events = [ Event( type="gitea.pull_request", source="gitea", data={"actor": "kimi", "pr_number": 50, "action": "opened"}, ), Event( type="gitea.pull_request", source="gitea", data={"actor": "kimi", "pr_number": 50, "action": "closed", "merged": True}, ), Event( type="gitea.pull_request", source="gitea", data={"actor": "kimi", "pr_number": 51, "action": "opened"}, ), ] result = _aggregate_metrics(events) assert "kimi" in result assert len(result["kimi"].prs_opened) == 2 assert len(result["kimi"].prs_merged) == 1 assert 50 in result["kimi"].prs_merged def test_untracked_agent_filtered(self): """Test events from untracked agents are filtered out.""" events = [ Event( type="gitea.push", source="gitea", data={"actor": "rockachopa", "num_commits": 5} ), ] result = _aggregate_metrics(events) assert "rockachopa" not in result def test_task_completion_aggregation(self): """Test task completion events aggregate test files.""" events = [ Event( type="agent.task.completed", source="gitea", data={ "agent_id": "kimi", "tests_affected": ["test_foo.py", "test_bar.py"], "token_reward": 10, }, ), ] result = _aggregate_metrics(events) assert "kimi" in result assert len(result["kimi"].tests_affected) == 2 assert "test_foo.py" in result["kimi"].tests_affected assert result["kimi"].tokens_earned == 10 class TestAgentMetrics: """Test AgentMetrics class.""" def test_merge_rate_zero_prs(self): """Test merge rate is 0 when no PRs opened.""" metrics = AgentMetrics(agent_id="kimi") assert metrics.pr_merge_rate == 0.0 def test_merge_rate_perfect(self): """Test 100% merge rate calculation.""" metrics = AgentMetrics(agent_id="kimi", prs_opened={1, 2, 3}, prs_merged={1, 2, 3}) assert metrics.pr_merge_rate == 1.0 def test_merge_rate_partial(self): """Test partial merge rate calculation.""" metrics = AgentMetrics(agent_id="kimi", prs_opened={1, 2, 3, 4}, prs_merged={1, 2}) assert metrics.pr_merge_rate == 0.5 class TestDetectPatterns: """Test pattern detection logic.""" def test_high_merge_rate_pattern(self): """Test detection of high merge rate pattern.""" metrics = AgentMetrics( agent_id="kimi", prs_opened={1, 2, 3, 4, 5}, prs_merged={1, 2, 3, 4}, # 80% merge rate ) patterns = _detect_patterns(metrics) assert any("High merge rate" in p for p in patterns) def test_low_merge_rate_pattern(self): """Test detection of low merge rate pattern.""" metrics = AgentMetrics( agent_id="kimi", prs_opened={1, 2, 3, 4, 5}, prs_merged={1}, # 20% merge rate ) patterns = _detect_patterns(metrics) assert any("low merge rate" in p for p in patterns) def test_high_commits_no_prs_pattern(self): """Test detection of direct-to-main commits pattern.""" metrics = AgentMetrics( agent_id="kimi", commits=15, prs_opened=set(), ) patterns = _detect_patterns(metrics) assert any("High commit volume without PRs" in p for p in patterns) def test_silent_worker_pattern(self): """Test detection of silent worker pattern.""" metrics = AgentMetrics( agent_id="kimi", issues_touched={1, 2, 3, 4, 5, 6}, comments=0, ) patterns = _detect_patterns(metrics) assert any("silent worker" in p for p in patterns) def test_communicative_pattern(self): """Test detection of highly communicative pattern.""" metrics = AgentMetrics( agent_id="kimi", issues_touched={1, 2}, # 2 issues comments=10, # 5x comments per issue ) patterns = _detect_patterns(metrics) assert any("Highly communicative" in p for p in patterns) def test_token_accumulation_pattern(self): """Test detection of token accumulation pattern.""" metrics = AgentMetrics( agent_id="kimi", tokens_earned=150, tokens_spent=10, ) patterns = _detect_patterns(metrics) assert any("Strong token accumulation" in p for p in patterns) def test_token_spend_pattern(self): """Test detection of high token spend pattern.""" metrics = AgentMetrics( agent_id="kimi", tokens_earned=10, tokens_spent=100, ) patterns = _detect_patterns(metrics) assert any("High token spend" in p for p in patterns) class TestGenerateNarrative: """Test narrative bullet generation.""" def test_empty_metrics_narrative(self): """Test narrative for empty metrics mentions no activity.""" metrics = AgentMetrics(agent_id="kimi") bullets = _generate_narrative_bullets(metrics, PeriodType.daily) assert len(bullets) == 1 assert "No recorded activity" in bullets[0] def test_activity_summary_narrative(self): """Test narrative includes activity summary.""" metrics = AgentMetrics( agent_id="kimi", commits=5, prs_opened={1, 2}, prs_merged={1}, ) bullets = _generate_narrative_bullets(metrics, PeriodType.daily) activity_bullet = next((b for b in bullets if "Active across" in b), None) assert activity_bullet is not None assert "5 commits" in activity_bullet assert "2 PRs opened" in activity_bullet assert "1 PR merged" in activity_bullet def test_tests_affected_narrative(self): """Test narrative includes tests affected.""" metrics = AgentMetrics( agent_id="kimi", tests_affected={"test_a.py", "test_b.py"}, ) bullets = _generate_narrative_bullets(metrics, PeriodType.daily) assert any("2 test files" in b for b in bullets) def test_tokens_earned_narrative(self): """Test narrative includes token earnings.""" metrics = AgentMetrics( agent_id="kimi", tokens_earned=100, tokens_spent=20, ) bullets = _generate_narrative_bullets(metrics, PeriodType.daily) assert any("Net earned 80 tokens" in b for b in bullets) def test_tokens_spent_narrative(self): """Test narrative includes token spending.""" metrics = AgentMetrics( agent_id="kimi", tokens_earned=20, tokens_spent=100, ) bullets = _generate_narrative_bullets(metrics, PeriodType.daily) assert any("Net spent 80 tokens" in b for b in bullets) def test_balanced_tokens_narrative(self): """Test narrative for balanced token flow.""" metrics = AgentMetrics( agent_id="kimi", tokens_earned=100, tokens_spent=100, ) bullets = _generate_narrative_bullets(metrics, PeriodType.daily) assert any("Balanced token flow" in b for b in bullets) class TestScorecardSummary: """Test ScorecardSummary dataclass.""" def test_to_dict_structure(self): """Test to_dict returns expected structure.""" metrics = AgentMetrics( agent_id="kimi", issues_touched={1, 2}, prs_opened={10, 11}, prs_merged={10}, tokens_earned=100, tokens_spent=20, ) summary = ScorecardSummary( agent_id="kimi", period_type=PeriodType.daily, period_start=datetime.now(UTC), period_end=datetime.now(UTC), metrics=metrics, narrative_bullets=["Test bullet"], patterns=["Test pattern"], ) data = summary.to_dict() assert data["agent_id"] == "kimi" assert data["period_type"] == "daily" assert "metrics" in data assert data["metrics"]["issues_touched"] == 2 assert data["metrics"]["prs_opened"] == 2 assert data["metrics"]["prs_merged"] == 1 assert data["metrics"]["pr_merge_rate"] == 0.5 assert data["metrics"]["tokens_earned"] == 100 assert data["metrics"]["token_net"] == 80 assert data["narrative_bullets"] == ["Test bullet"] assert data["patterns"] == ["Test pattern"] class TestQueryTokenTransactions: """Test token transaction querying.""" def test_empty_ledger(self): """Test empty ledger returns zero values.""" with patch("lightning.ledger.get_transactions", return_value=[]): earned, spent = _query_token_transactions("kimi", datetime.now(UTC), datetime.now(UTC)) assert earned == 0 assert spent == 0 def test_ledger_with_transactions(self): """Test ledger aggregation of transactions.""" now = datetime.now(UTC) mock_tx = [ MagicMock( agent_id="kimi", tx_type=MagicMock(value="incoming"), amount_sats=100, created_at=now.isoformat(), ), MagicMock( agent_id="kimi", tx_type=MagicMock(value="outgoing"), amount_sats=30, created_at=now.isoformat(), ), ] with patch("lightning.ledger.get_transactions", return_value=mock_tx): earned, spent = _query_token_transactions( "kimi", now - timedelta(hours=1), now + timedelta(hours=1) ) assert earned == 100 assert spent == 30 def test_ledger_filters_by_agent(self): """Test ledger filters transactions by agent_id.""" now = datetime.now(UTC) mock_tx = [ MagicMock( agent_id="claude", tx_type=MagicMock(value="incoming"), amount_sats=100, created_at=now.isoformat(), ), ] with patch("lightning.ledger.get_transactions", return_value=mock_tx): earned, spent = _query_token_transactions( "kimi", now - timedelta(hours=1), now + timedelta(hours=1) ) assert earned == 0 # Transaction was for claude, not kimi def test_ledger_filters_by_time(self): """Test ledger filters transactions by time range.""" now = datetime.now(UTC) old_time = now - timedelta(days=2) mock_tx = [ MagicMock( agent_id="kimi", tx_type=MagicMock(value="incoming"), amount_sats=100, created_at=old_time.isoformat(), ), ] with patch("lightning.ledger.get_transactions", return_value=mock_tx): # Query for today only earned, spent = _query_token_transactions( "kimi", now - timedelta(hours=1), now + timedelta(hours=1) ) assert earned == 0 # Transaction was 2 days ago class TestGenerateScorecard: """Test scorecard generation.""" def test_generate_scorecard_no_activity(self): """Test scorecard generation for agent with no activity.""" with patch( "dashboard.services.scorecard_service._collect_events_for_period", return_value=[] ): with patch( "dashboard.services.scorecard_service._query_token_transactions", return_value=(0, 0), ): scorecard = generate_scorecard("kimi", PeriodType.daily) assert scorecard is not None assert scorecard.agent_id == "kimi" assert scorecard.period_type == PeriodType.daily assert len(scorecard.narrative_bullets) == 1 assert "No recorded activity" in scorecard.narrative_bullets[0] def test_generate_scorecard_with_activity(self): """Test scorecard generation includes activity.""" events = [ Event(type="gitea.push", source="gitea", data={"actor": "kimi", "num_commits": 5}), ] with patch( "dashboard.services.scorecard_service._collect_events_for_period", return_value=events ): with patch( "dashboard.services.scorecard_service._query_token_transactions", return_value=(100, 20), ): scorecard = generate_scorecard("kimi", PeriodType.daily) assert scorecard is not None assert scorecard.metrics.commits == 5 assert scorecard.metrics.tokens_earned == 100 assert scorecard.metrics.tokens_spent == 20 class TestGenerateAllScorecards: """Test generating scorecards for all agents.""" def test_generates_for_all_tracked_agents(self): """Test all tracked agents get scorecards even with no activity.""" with patch( "dashboard.services.scorecard_service._collect_events_for_period", return_value=[] ): with patch( "dashboard.services.scorecard_service._query_token_transactions", return_value=(0, 0), ): scorecards = generate_all_scorecards(PeriodType.daily) agent_ids = {s.agent_id for s in scorecards} expected = {"kimi", "claude", "gemini", "hermes", "manus"} assert expected.issubset(agent_ids) def test_scorecards_sorted(self): """Test scorecards are sorted by agent_id.""" with patch( "dashboard.services.scorecard_service._collect_events_for_period", return_value=[] ): with patch( "dashboard.services.scorecard_service._query_token_transactions", return_value=(0, 0), ): scorecards = generate_all_scorecards(PeriodType.daily) agent_ids = [s.agent_id for s in scorecards] assert agent_ids == sorted(agent_ids) class TestScorecardRoutes: """Test scorecard API routes.""" def test_list_agents_endpoint(self, client): """Test GET /scorecards/api/agents returns tracked agents.""" response = client.get("/scorecards/api/agents") assert response.status_code == 200 data = response.json() assert "agents" in data assert "kimi" in data["agents"] assert "claude" in data["agents"] def test_get_scorecard_endpoint(self, client): """Test GET /scorecards/api/{agent_id} returns scorecard.""" with patch("dashboard.routes.scorecards.generate_scorecard") as mock_generate: mock_generate.return_value = ScorecardSummary( agent_id="kimi", period_type=PeriodType.daily, period_start=datetime.now(UTC), period_end=datetime.now(UTC), metrics=AgentMetrics(agent_id="kimi"), narrative_bullets=["Test bullet"], patterns=[], ) response = client.get("/scorecards/api/kimi?period=daily") assert response.status_code == 200 data = response.json() assert data["agent_id"] == "kimi" assert data["period_type"] == "daily" def test_get_scorecard_invalid_period(self, client): """Test GET with invalid period returns 400.""" response = client.get("/scorecards/api/kimi?period=invalid") assert response.status_code == 400 assert "error" in response.json() def test_get_all_scorecards_endpoint(self, client): """Test GET /scorecards/api returns all scorecards.""" with patch("dashboard.routes.scorecards.generate_all_scorecards") as mock_generate: mock_generate.return_value = [ ScorecardSummary( agent_id="kimi", period_type=PeriodType.daily, period_start=datetime.now(UTC), period_end=datetime.now(UTC), metrics=AgentMetrics(agent_id="kimi"), narrative_bullets=[], patterns=[], ), ] response = client.get("/scorecards/api?period=daily") assert response.status_code == 200 data = response.json() assert data["period"] == "daily" assert "scorecards" in data assert len(data["scorecards"]) == 1 def test_scorecards_page_renders(self, client): """Test GET /scorecards returns HTML page.""" response = client.get("/scorecards") assert response.status_code == 200 assert "text/html" in response.headers.get("content-type", "") assert "AGENT SCORECARDS" in response.text def test_scorecard_panel_renders(self, client): """Test GET /scorecards/panel/{agent_id} returns HTML.""" with patch("dashboard.routes.scorecards.generate_scorecard") as mock_generate: mock_generate.return_value = ScorecardSummary( agent_id="kimi", period_type=PeriodType.daily, period_start=datetime.now(UTC), period_end=datetime.now(UTC), metrics=AgentMetrics(agent_id="kimi", commits=5), narrative_bullets=["Active across 5 commits this day."], patterns=["High activity"], ) response = client.get("/scorecards/panel/kimi?period=daily") assert response.status_code == 200 assert "text/html" in response.headers.get("content-type", "") assert "Kimi" in response.text def test_all_panels_renders(self, client): """Test GET /scorecards/all/panels returns HTML with all panels.""" with patch("dashboard.routes.scorecards.generate_all_scorecards") as mock_generate: mock_generate.return_value = [ ScorecardSummary( agent_id="kimi", period_type=PeriodType.daily, period_start=datetime.now(UTC), period_end=datetime.now(UTC), metrics=AgentMetrics(agent_id="kimi"), narrative_bullets=[], patterns=[], ), ] response = client.get("/scorecards/all/panels?period=daily") assert response.status_code == 200 assert "text/html" in response.headers.get("content-type", "")