From af162f1a8013a4e68c3301e38546d5765dd8636c Mon Sep 17 00:00:00 2001 From: "Claude (Opus 4.6)" Date: Tue, 24 Mar 2026 02:12:47 +0000 Subject: [PATCH] [claude] Add unit tests for scorecard_service.py (#1139) (#1320) Co-authored-by: Claude (Opus 4.6) Co-committed-by: Claude (Opus 4.6) --- tests/dashboard/test_scorecard_service.py | 458 ++++++++++++++++++++++ 1 file changed, 458 insertions(+) create mode 100644 tests/dashboard/test_scorecard_service.py diff --git a/tests/dashboard/test_scorecard_service.py b/tests/dashboard/test_scorecard_service.py new file mode 100644 index 00000000..ece59634 --- /dev/null +++ b/tests/dashboard/test_scorecard_service.py @@ -0,0 +1,458 @@ +"""Unit tests for dashboard/services/scorecard_service.py. + +Focuses on edge cases and scenarios not covered in test_scorecards.py: +- _aggregate_metrics: test.execution events, PR-closed-without-merge, + push default commit count, untracked agent with agent_id passthrough +- _detect_patterns: boundary conditions (< 3 PRs, exactly 3, exactly 80%) +- _generate_narrative_bullets: singular/plural forms +- generate_scorecard: token augmentation max() logic +- ScorecardSummary.to_dict(): ISO timestamp format, tests_affected count +""" + +from __future__ import annotations + +from datetime import UTC, datetime +from unittest.mock import MagicMock, patch + +import pytest + +pytestmark = pytest.mark.unit + +from dashboard.services.scorecard_service import ( + AgentMetrics, + PeriodType, + ScorecardSummary, + _aggregate_metrics, + _detect_patterns, + _generate_narrative_bullets, + generate_scorecard, +) +from infrastructure.events.bus import Event + + +# --------------------------------------------------------------------------- +# _aggregate_metrics — edge cases +# --------------------------------------------------------------------------- + + +class TestAggregateMetricsEdgeCases: + """Edge cases for _aggregate_metrics not covered in test_scorecards.py.""" + + def test_push_event_defaults_to_one_commit(self): + """Push event with no num_commits key should count as 1 commit.""" + events = [ + Event(type="gitea.push", source="gitea", data={"actor": "claude"}), + ] + result = _aggregate_metrics(events) + + assert result["claude"].commits == 1 + + def test_pr_closed_without_merge_not_counted(self): + """PR closed without merge should not appear in prs_merged.""" + events = [ + Event( + type="gitea.pull_request", + source="gitea", + data={"actor": "kimi", "pr_number": 99, "action": "closed", "merged": False}, + ), + ] + result = _aggregate_metrics(events) + + # PR was not merged — should not be in prs_merged + assert "kimi" in result + assert 99 not in result["kimi"].prs_merged + # Also not counted as opened (action != "opened") + assert 99 not in result["kimi"].prs_opened + # Not touched (only merged PRs add to issues_touched) + assert 99 not in result["kimi"].issues_touched + + def test_test_execution_event_aggregation(self): + """test.execution events should populate tests_affected.""" + events = [ + Event( + type="test.execution", + source="ci", + data={"actor": "gemini", "test_files": ["tests/test_alpha.py", "tests/test_beta.py"]}, + ), + ] + result = _aggregate_metrics(events) + + assert "gemini" in result + assert "tests/test_alpha.py" in result["gemini"].tests_affected + assert "tests/test_beta.py" in result["gemini"].tests_affected + + def test_untracked_agent_with_agent_id_field_included(self): + """An untracked actor that carries agent_id in data should be included.""" + events = [ + Event( + type="agent.task.completed", + source="system", + data={"agent_id": "kimi", "tests_affected": [], "token_reward": 5}, + ), + ] + result = _aggregate_metrics(events) + + # kimi is tracked and agent_id is present in data + assert "kimi" in result + assert result["kimi"].tokens_earned == 5 + + def test_untracked_actor_without_agent_id_excluded(self): + """Actor that is not tracked and has no agent_id in data is skipped.""" + events = [ + Event( + type="gitea.push", + source="gitea", + data={"actor": "anon-bot", "num_commits": 10}, + ), + ] + result = _aggregate_metrics(events) + + assert "anon-bot" not in result + + def test_issue_opened_with_no_issue_number_ignored(self): + """Issue opened event with issue_number=0 should not add to issues_touched.""" + events = [ + Event( + type="gitea.issue.opened", + source="gitea", + data={"actor": "hermes", "issue_number": 0}, + ), + ] + result = _aggregate_metrics(events) + + assert "hermes" in result + assert len(result["hermes"].issues_touched) == 0 + + def test_comment_with_no_issue_number_still_increments_counter(self): + """Comment event with issue_number=0 increments comment count but not issues_touched.""" + events = [ + Event( + type="gitea.issue.comment", + source="gitea", + data={"actor": "manus", "issue_number": 0}, + ), + ] + result = _aggregate_metrics(events) + + assert "manus" in result + assert result["manus"].comments == 1 + assert len(result["manus"].issues_touched) == 0 + + def test_task_completion_no_tests_affected(self): + """Task completion with empty tests_affected list should work fine.""" + events = [ + Event( + type="agent.task.completed", + source="system", + data={"agent_id": "claude", "tests_affected": [], "token_reward": 20}, + ), + ] + result = _aggregate_metrics(events) + + assert "claude" in result + assert len(result["claude"].tests_affected) == 0 + assert result["claude"].tokens_earned == 20 + + def test_multiple_agents_independent_metrics(self): + """Events from multiple agents are tracked independently.""" + events = [ + Event(type="gitea.push", source="gitea", data={"actor": "claude", "num_commits": 3}), + Event(type="gitea.push", source="gitea", data={"actor": "gemini", "num_commits": 7}), + ] + result = _aggregate_metrics(events) + + assert result["claude"].commits == 3 + assert result["gemini"].commits == 7 + + def test_pr_with_no_pr_number_not_recorded(self): + """PR event with pr_number=0 should not add to prs_opened.""" + events = [ + Event( + type="gitea.pull_request", + source="gitea", + data={"actor": "kimi", "pr_number": 0, "action": "opened"}, + ), + ] + result = _aggregate_metrics(events) + + assert "kimi" in result + assert len(result["kimi"].prs_opened) == 0 + + +# --------------------------------------------------------------------------- +# _detect_patterns — boundary conditions +# --------------------------------------------------------------------------- + + +class TestDetectPatternsBoundaries: + """Boundary conditions for _detect_patterns.""" + + def test_no_patterns_with_empty_metrics(self): + """Empty metrics should not trigger any patterns.""" + metrics = AgentMetrics(agent_id="kimi") + patterns = _detect_patterns(metrics) + + assert patterns == [] + + def test_merge_rate_requires_three_or_more_prs(self): + """Merge rate pattern requires at least 3 PRs opened.""" + metrics = AgentMetrics( + agent_id="kimi", + prs_opened={1, 2}, + prs_merged={1, 2}, # 100% rate but only 2 PRs + ) + patterns = _detect_patterns(metrics) + + # Should NOT trigger high-merge-rate pattern (< 3 PRs) + assert not any("High merge rate" in p for p in patterns) + assert not any("low merge rate" in p for p in patterns) + + def test_merge_rate_exactly_3_prs_triggers_pattern(self): + """Exactly 3 PRs opened triggers merge rate evaluation.""" + metrics = AgentMetrics( + agent_id="kimi", + prs_opened={1, 2, 3}, + prs_merged={1, 2, 3}, # 100% rate, 3 PRs + ) + patterns = _detect_patterns(metrics) + + assert any("High merge rate" in p for p in patterns) + + def test_merge_rate_80_percent_is_high(self): + """Exactly 80% merge rate triggers high merge rate pattern.""" + metrics = AgentMetrics( + agent_id="kimi", + prs_opened={1, 2, 3, 4, 5}, + prs_merged={1, 2, 3, 4}, # 80% + ) + patterns = _detect_patterns(metrics) + + assert any("High merge rate" in p for p in patterns) + + def test_merge_rate_below_80_not_high(self): + """79% merge rate should NOT trigger high merge rate pattern.""" + metrics = AgentMetrics( + agent_id="kimi", + prs_opened={1, 2, 3, 4, 5, 6, 7}, # 7 PRs + prs_merged={1, 2, 3, 4, 5}, # ~71.4% — below 80% + ) + patterns = _detect_patterns(metrics) + + assert not any("High merge rate" in p for p in patterns) + + def test_commit_pattern_requires_over_10_commits(self): + """Exactly 10 commits does NOT trigger the high-commit/no-PR pattern.""" + metrics = AgentMetrics( + agent_id="kimi", + commits=10, + prs_opened=set(), + ) + patterns = _detect_patterns(metrics) + + assert not any("High commit volume" in p for p in patterns) + + def test_commit_pattern_triggered_at_11_commits(self): + """11 commits with no PRs triggers the high-commit pattern.""" + metrics = AgentMetrics( + agent_id="kimi", + commits=11, + prs_opened=set(), + ) + patterns = _detect_patterns(metrics) + + assert any("High commit volume without PRs" in p for p in patterns) + + def test_token_accumulation_exact_boundary(self): + """Net tokens = 100 does NOT trigger accumulation pattern (must be > 100).""" + metrics = AgentMetrics(agent_id="kimi", tokens_earned=100, tokens_spent=0) + patterns = _detect_patterns(metrics) + + assert not any("Strong token accumulation" in p for p in patterns) + + def test_token_spend_exact_boundary(self): + """Net tokens = -50 does NOT trigger high spend pattern (must be < -50).""" + metrics = AgentMetrics(agent_id="kimi", tokens_earned=0, tokens_spent=50) + patterns = _detect_patterns(metrics) + + assert not any("High token spend" in p for p in patterns) + + +# --------------------------------------------------------------------------- +# _generate_narrative_bullets — singular/plural +# --------------------------------------------------------------------------- + + +class TestGenerateNarrativeSingularPlural: + """Test singular and plural forms in narrative bullets.""" + + def test_singular_commit(self): + """One commit should use singular form.""" + metrics = AgentMetrics(agent_id="kimi", commits=1) + bullets = _generate_narrative_bullets(metrics, PeriodType.daily) + + activity = next((b for b in bullets if "Active across" in b), None) + assert activity is not None + assert "1 commit" in activity + assert "1 commits" not in activity + + def test_singular_pr_opened(self): + """One opened PR should use singular form.""" + metrics = AgentMetrics(agent_id="kimi", prs_opened={1}) + bullets = _generate_narrative_bullets(metrics, PeriodType.daily) + + activity = next((b for b in bullets if "Active across" in b), None) + assert activity is not None + assert "1 PR opened" in activity + + def test_singular_pr_merged(self): + """One merged PR should use singular form.""" + metrics = AgentMetrics(agent_id="kimi", prs_merged={1}) + bullets = _generate_narrative_bullets(metrics, PeriodType.daily) + + activity = next((b for b in bullets if "Active across" in b), None) + assert activity is not None + assert "1 PR merged" in activity + + def test_singular_issue_touched(self): + """One issue touched should use singular form.""" + metrics = AgentMetrics(agent_id="kimi", issues_touched={42}) + bullets = _generate_narrative_bullets(metrics, PeriodType.daily) + + activity = next((b for b in bullets if "Active across" in b), None) + assert activity is not None + assert "1 issue touched" in activity + + def test_singular_comment(self): + """One comment should use singular form.""" + metrics = AgentMetrics(agent_id="kimi", comments=1) + bullets = _generate_narrative_bullets(metrics, PeriodType.daily) + + activity = next((b for b in bullets if "Active across" in b), None) + assert activity is not None + assert "1 comment" in activity + + def test_singular_test_file(self): + """One test file should use singular form.""" + metrics = AgentMetrics(agent_id="kimi", tests_affected={"test_foo.py"}) + bullets = _generate_narrative_bullets(metrics, PeriodType.daily) + + assert any("1 test file." in b for b in bullets) + + def test_weekly_period_label(self): + """Weekly period uses 'week' label in no-activity message.""" + metrics = AgentMetrics(agent_id="kimi") + bullets = _generate_narrative_bullets(metrics, PeriodType.weekly) + + assert any("this week" in b for b in bullets) + + +# --------------------------------------------------------------------------- +# generate_scorecard — token augmentation (max logic) +# --------------------------------------------------------------------------- + + +class TestGenerateScorecardTokenAugmentation: + """Test the max() token augmentation logic in generate_scorecard.""" + + def test_event_tokens_win_over_ledger_when_higher(self): + """When event tokens > ledger tokens, event tokens are preserved.""" + events = [ + Event( + type="agent.task.completed", + source="system", + data={"agent_id": "kimi", "tests_affected": [], "token_reward": 200}, + ), + ] + with patch( + "dashboard.services.scorecard_service._collect_events_for_period", + return_value=events, + ): + with patch( + "dashboard.services.scorecard_service._query_token_transactions", + return_value=(50, 0), # ledger says 50 earned + ): + scorecard = generate_scorecard("kimi", PeriodType.daily) + + # max(200, 50) = 200 should win + assert scorecard.metrics.tokens_earned == 200 + + def test_ledger_tokens_win_when_higher(self): + """When ledger tokens > event tokens, ledger tokens are used.""" + events = [ + Event( + type="agent.task.completed", + source="system", + data={"agent_id": "kimi", "tests_affected": [], "token_reward": 10}, + ), + ] + with patch( + "dashboard.services.scorecard_service._collect_events_for_period", + return_value=events, + ): + with patch( + "dashboard.services.scorecard_service._query_token_transactions", + return_value=(500, 100), # ledger says 500 earned, 100 spent + ): + scorecard = generate_scorecard("kimi", PeriodType.daily) + + # max(10, 500) = 500 + assert scorecard.metrics.tokens_earned == 500 + # max(0, 100) = 100 + assert scorecard.metrics.tokens_spent == 100 + + +# --------------------------------------------------------------------------- +# ScorecardSummary.to_dict — timestamp format and tests_affected +# --------------------------------------------------------------------------- + + +class TestScorecardSummaryToDict: + """Additional to_dict tests.""" + + def test_timestamps_are_iso_strings(self): + """period_start and period_end should be ISO format strings.""" + start = datetime(2026, 3, 20, 0, 0, 0, tzinfo=UTC) + end = datetime(2026, 3, 21, 0, 0, 0, tzinfo=UTC) + summary = ScorecardSummary( + agent_id="kimi", + period_type=PeriodType.daily, + period_start=start, + period_end=end, + metrics=AgentMetrics(agent_id="kimi"), + ) + data = summary.to_dict() + + assert data["period_start"] == start.isoformat() + assert data["period_end"] == end.isoformat() + + def test_tests_affected_count_in_dict(self): + """to_dict metrics.tests_affected should be a count (int).""" + metrics = AgentMetrics( + agent_id="kimi", + tests_affected={"test_a.py", "test_b.py", "test_c.py"}, + ) + summary = ScorecardSummary( + agent_id="kimi", + period_type=PeriodType.daily, + period_start=datetime.now(UTC), + period_end=datetime.now(UTC), + metrics=metrics, + ) + data = summary.to_dict() + + assert data["metrics"]["tests_affected"] == 3 + + def test_empty_narrative_and_patterns(self): + """to_dict with default empty lists should serialize correctly.""" + summary = ScorecardSummary( + agent_id="claude", + period_type=PeriodType.weekly, + period_start=datetime.now(UTC), + period_end=datetime.now(UTC), + metrics=AgentMetrics(agent_id="claude"), + ) + data = summary.to_dict() + + assert data["narrative_bullets"] == [] + assert data["patterns"] == [] + assert data["period_type"] == "weekly"