Co-authored-by: Claude (Opus 4.6) <claude@hermes.local> Co-committed-by: Claude (Opus 4.6) <claude@hermes.local>
This commit was merged in pull request #1320.
This commit is contained in:
458
tests/dashboard/test_scorecard_service.py
Normal file
458
tests/dashboard/test_scorecard_service.py
Normal file
@@ -0,0 +1,458 @@
|
|||||||
|
"""Unit tests for dashboard/services/scorecard_service.py.
|
||||||
|
|
||||||
|
Focuses on edge cases and scenarios not covered in test_scorecards.py:
|
||||||
|
- _aggregate_metrics: test.execution events, PR-closed-without-merge,
|
||||||
|
push default commit count, untracked agent with agent_id passthrough
|
||||||
|
- _detect_patterns: boundary conditions (< 3 PRs, exactly 3, exactly 80%)
|
||||||
|
- _generate_narrative_bullets: singular/plural forms
|
||||||
|
- generate_scorecard: token augmentation max() logic
|
||||||
|
- ScorecardSummary.to_dict(): ISO timestamp format, tests_affected count
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
from dashboard.services.scorecard_service import (
|
||||||
|
AgentMetrics,
|
||||||
|
PeriodType,
|
||||||
|
ScorecardSummary,
|
||||||
|
_aggregate_metrics,
|
||||||
|
_detect_patterns,
|
||||||
|
_generate_narrative_bullets,
|
||||||
|
generate_scorecard,
|
||||||
|
)
|
||||||
|
from infrastructure.events.bus import Event
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _aggregate_metrics — edge cases
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestAggregateMetricsEdgeCases:
|
||||||
|
"""Edge cases for _aggregate_metrics not covered in test_scorecards.py."""
|
||||||
|
|
||||||
|
def test_push_event_defaults_to_one_commit(self):
|
||||||
|
"""Push event with no num_commits key should count as 1 commit."""
|
||||||
|
events = [
|
||||||
|
Event(type="gitea.push", source="gitea", data={"actor": "claude"}),
|
||||||
|
]
|
||||||
|
result = _aggregate_metrics(events)
|
||||||
|
|
||||||
|
assert result["claude"].commits == 1
|
||||||
|
|
||||||
|
def test_pr_closed_without_merge_not_counted(self):
|
||||||
|
"""PR closed without merge should not appear in prs_merged."""
|
||||||
|
events = [
|
||||||
|
Event(
|
||||||
|
type="gitea.pull_request",
|
||||||
|
source="gitea",
|
||||||
|
data={"actor": "kimi", "pr_number": 99, "action": "closed", "merged": False},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = _aggregate_metrics(events)
|
||||||
|
|
||||||
|
# PR was not merged — should not be in prs_merged
|
||||||
|
assert "kimi" in result
|
||||||
|
assert 99 not in result["kimi"].prs_merged
|
||||||
|
# Also not counted as opened (action != "opened")
|
||||||
|
assert 99 not in result["kimi"].prs_opened
|
||||||
|
# Not touched (only merged PRs add to issues_touched)
|
||||||
|
assert 99 not in result["kimi"].issues_touched
|
||||||
|
|
||||||
|
def test_test_execution_event_aggregation(self):
|
||||||
|
"""test.execution events should populate tests_affected."""
|
||||||
|
events = [
|
||||||
|
Event(
|
||||||
|
type="test.execution",
|
||||||
|
source="ci",
|
||||||
|
data={"actor": "gemini", "test_files": ["tests/test_alpha.py", "tests/test_beta.py"]},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = _aggregate_metrics(events)
|
||||||
|
|
||||||
|
assert "gemini" in result
|
||||||
|
assert "tests/test_alpha.py" in result["gemini"].tests_affected
|
||||||
|
assert "tests/test_beta.py" in result["gemini"].tests_affected
|
||||||
|
|
||||||
|
def test_untracked_agent_with_agent_id_field_included(self):
|
||||||
|
"""An untracked actor that carries agent_id in data should be included."""
|
||||||
|
events = [
|
||||||
|
Event(
|
||||||
|
type="agent.task.completed",
|
||||||
|
source="system",
|
||||||
|
data={"agent_id": "kimi", "tests_affected": [], "token_reward": 5},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = _aggregate_metrics(events)
|
||||||
|
|
||||||
|
# kimi is tracked and agent_id is present in data
|
||||||
|
assert "kimi" in result
|
||||||
|
assert result["kimi"].tokens_earned == 5
|
||||||
|
|
||||||
|
def test_untracked_actor_without_agent_id_excluded(self):
|
||||||
|
"""Actor that is not tracked and has no agent_id in data is skipped."""
|
||||||
|
events = [
|
||||||
|
Event(
|
||||||
|
type="gitea.push",
|
||||||
|
source="gitea",
|
||||||
|
data={"actor": "anon-bot", "num_commits": 10},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = _aggregate_metrics(events)
|
||||||
|
|
||||||
|
assert "anon-bot" not in result
|
||||||
|
|
||||||
|
def test_issue_opened_with_no_issue_number_ignored(self):
|
||||||
|
"""Issue opened event with issue_number=0 should not add to issues_touched."""
|
||||||
|
events = [
|
||||||
|
Event(
|
||||||
|
type="gitea.issue.opened",
|
||||||
|
source="gitea",
|
||||||
|
data={"actor": "hermes", "issue_number": 0},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = _aggregate_metrics(events)
|
||||||
|
|
||||||
|
assert "hermes" in result
|
||||||
|
assert len(result["hermes"].issues_touched) == 0
|
||||||
|
|
||||||
|
def test_comment_with_no_issue_number_still_increments_counter(self):
|
||||||
|
"""Comment event with issue_number=0 increments comment count but not issues_touched."""
|
||||||
|
events = [
|
||||||
|
Event(
|
||||||
|
type="gitea.issue.comment",
|
||||||
|
source="gitea",
|
||||||
|
data={"actor": "manus", "issue_number": 0},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = _aggregate_metrics(events)
|
||||||
|
|
||||||
|
assert "manus" in result
|
||||||
|
assert result["manus"].comments == 1
|
||||||
|
assert len(result["manus"].issues_touched) == 0
|
||||||
|
|
||||||
|
def test_task_completion_no_tests_affected(self):
|
||||||
|
"""Task completion with empty tests_affected list should work fine."""
|
||||||
|
events = [
|
||||||
|
Event(
|
||||||
|
type="agent.task.completed",
|
||||||
|
source="system",
|
||||||
|
data={"agent_id": "claude", "tests_affected": [], "token_reward": 20},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = _aggregate_metrics(events)
|
||||||
|
|
||||||
|
assert "claude" in result
|
||||||
|
assert len(result["claude"].tests_affected) == 0
|
||||||
|
assert result["claude"].tokens_earned == 20
|
||||||
|
|
||||||
|
def test_multiple_agents_independent_metrics(self):
|
||||||
|
"""Events from multiple agents are tracked independently."""
|
||||||
|
events = [
|
||||||
|
Event(type="gitea.push", source="gitea", data={"actor": "claude", "num_commits": 3}),
|
||||||
|
Event(type="gitea.push", source="gitea", data={"actor": "gemini", "num_commits": 7}),
|
||||||
|
]
|
||||||
|
result = _aggregate_metrics(events)
|
||||||
|
|
||||||
|
assert result["claude"].commits == 3
|
||||||
|
assert result["gemini"].commits == 7
|
||||||
|
|
||||||
|
def test_pr_with_no_pr_number_not_recorded(self):
|
||||||
|
"""PR event with pr_number=0 should not add to prs_opened."""
|
||||||
|
events = [
|
||||||
|
Event(
|
||||||
|
type="gitea.pull_request",
|
||||||
|
source="gitea",
|
||||||
|
data={"actor": "kimi", "pr_number": 0, "action": "opened"},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = _aggregate_metrics(events)
|
||||||
|
|
||||||
|
assert "kimi" in result
|
||||||
|
assert len(result["kimi"].prs_opened) == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _detect_patterns — boundary conditions
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestDetectPatternsBoundaries:
|
||||||
|
"""Boundary conditions for _detect_patterns."""
|
||||||
|
|
||||||
|
def test_no_patterns_with_empty_metrics(self):
|
||||||
|
"""Empty metrics should not trigger any patterns."""
|
||||||
|
metrics = AgentMetrics(agent_id="kimi")
|
||||||
|
patterns = _detect_patterns(metrics)
|
||||||
|
|
||||||
|
assert patterns == []
|
||||||
|
|
||||||
|
def test_merge_rate_requires_three_or_more_prs(self):
|
||||||
|
"""Merge rate pattern requires at least 3 PRs opened."""
|
||||||
|
metrics = AgentMetrics(
|
||||||
|
agent_id="kimi",
|
||||||
|
prs_opened={1, 2},
|
||||||
|
prs_merged={1, 2}, # 100% rate but only 2 PRs
|
||||||
|
)
|
||||||
|
patterns = _detect_patterns(metrics)
|
||||||
|
|
||||||
|
# Should NOT trigger high-merge-rate pattern (< 3 PRs)
|
||||||
|
assert not any("High merge rate" in p for p in patterns)
|
||||||
|
assert not any("low merge rate" in p for p in patterns)
|
||||||
|
|
||||||
|
def test_merge_rate_exactly_3_prs_triggers_pattern(self):
|
||||||
|
"""Exactly 3 PRs opened triggers merge rate evaluation."""
|
||||||
|
metrics = AgentMetrics(
|
||||||
|
agent_id="kimi",
|
||||||
|
prs_opened={1, 2, 3},
|
||||||
|
prs_merged={1, 2, 3}, # 100% rate, 3 PRs
|
||||||
|
)
|
||||||
|
patterns = _detect_patterns(metrics)
|
||||||
|
|
||||||
|
assert any("High merge rate" in p for p in patterns)
|
||||||
|
|
||||||
|
def test_merge_rate_80_percent_is_high(self):
|
||||||
|
"""Exactly 80% merge rate triggers high merge rate pattern."""
|
||||||
|
metrics = AgentMetrics(
|
||||||
|
agent_id="kimi",
|
||||||
|
prs_opened={1, 2, 3, 4, 5},
|
||||||
|
prs_merged={1, 2, 3, 4}, # 80%
|
||||||
|
)
|
||||||
|
patterns = _detect_patterns(metrics)
|
||||||
|
|
||||||
|
assert any("High merge rate" in p for p in patterns)
|
||||||
|
|
||||||
|
def test_merge_rate_below_80_not_high(self):
|
||||||
|
"""79% merge rate should NOT trigger high merge rate pattern."""
|
||||||
|
metrics = AgentMetrics(
|
||||||
|
agent_id="kimi",
|
||||||
|
prs_opened={1, 2, 3, 4, 5, 6, 7}, # 7 PRs
|
||||||
|
prs_merged={1, 2, 3, 4, 5}, # ~71.4% — below 80%
|
||||||
|
)
|
||||||
|
patterns = _detect_patterns(metrics)
|
||||||
|
|
||||||
|
assert not any("High merge rate" in p for p in patterns)
|
||||||
|
|
||||||
|
def test_commit_pattern_requires_over_10_commits(self):
|
||||||
|
"""Exactly 10 commits does NOT trigger the high-commit/no-PR pattern."""
|
||||||
|
metrics = AgentMetrics(
|
||||||
|
agent_id="kimi",
|
||||||
|
commits=10,
|
||||||
|
prs_opened=set(),
|
||||||
|
)
|
||||||
|
patterns = _detect_patterns(metrics)
|
||||||
|
|
||||||
|
assert not any("High commit volume" in p for p in patterns)
|
||||||
|
|
||||||
|
def test_commit_pattern_triggered_at_11_commits(self):
|
||||||
|
"""11 commits with no PRs triggers the high-commit pattern."""
|
||||||
|
metrics = AgentMetrics(
|
||||||
|
agent_id="kimi",
|
||||||
|
commits=11,
|
||||||
|
prs_opened=set(),
|
||||||
|
)
|
||||||
|
patterns = _detect_patterns(metrics)
|
||||||
|
|
||||||
|
assert any("High commit volume without PRs" in p for p in patterns)
|
||||||
|
|
||||||
|
def test_token_accumulation_exact_boundary(self):
|
||||||
|
"""Net tokens = 100 does NOT trigger accumulation pattern (must be > 100)."""
|
||||||
|
metrics = AgentMetrics(agent_id="kimi", tokens_earned=100, tokens_spent=0)
|
||||||
|
patterns = _detect_patterns(metrics)
|
||||||
|
|
||||||
|
assert not any("Strong token accumulation" in p for p in patterns)
|
||||||
|
|
||||||
|
def test_token_spend_exact_boundary(self):
|
||||||
|
"""Net tokens = -50 does NOT trigger high spend pattern (must be < -50)."""
|
||||||
|
metrics = AgentMetrics(agent_id="kimi", tokens_earned=0, tokens_spent=50)
|
||||||
|
patterns = _detect_patterns(metrics)
|
||||||
|
|
||||||
|
assert not any("High token spend" in p for p in patterns)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _generate_narrative_bullets — singular/plural
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGenerateNarrativeSingularPlural:
|
||||||
|
"""Test singular and plural forms in narrative bullets."""
|
||||||
|
|
||||||
|
def test_singular_commit(self):
|
||||||
|
"""One commit should use singular form."""
|
||||||
|
metrics = AgentMetrics(agent_id="kimi", commits=1)
|
||||||
|
bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
|
||||||
|
|
||||||
|
activity = next((b for b in bullets if "Active across" in b), None)
|
||||||
|
assert activity is not None
|
||||||
|
assert "1 commit" in activity
|
||||||
|
assert "1 commits" not in activity
|
||||||
|
|
||||||
|
def test_singular_pr_opened(self):
|
||||||
|
"""One opened PR should use singular form."""
|
||||||
|
metrics = AgentMetrics(agent_id="kimi", prs_opened={1})
|
||||||
|
bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
|
||||||
|
|
||||||
|
activity = next((b for b in bullets if "Active across" in b), None)
|
||||||
|
assert activity is not None
|
||||||
|
assert "1 PR opened" in activity
|
||||||
|
|
||||||
|
def test_singular_pr_merged(self):
|
||||||
|
"""One merged PR should use singular form."""
|
||||||
|
metrics = AgentMetrics(agent_id="kimi", prs_merged={1})
|
||||||
|
bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
|
||||||
|
|
||||||
|
activity = next((b for b in bullets if "Active across" in b), None)
|
||||||
|
assert activity is not None
|
||||||
|
assert "1 PR merged" in activity
|
||||||
|
|
||||||
|
def test_singular_issue_touched(self):
|
||||||
|
"""One issue touched should use singular form."""
|
||||||
|
metrics = AgentMetrics(agent_id="kimi", issues_touched={42})
|
||||||
|
bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
|
||||||
|
|
||||||
|
activity = next((b for b in bullets if "Active across" in b), None)
|
||||||
|
assert activity is not None
|
||||||
|
assert "1 issue touched" in activity
|
||||||
|
|
||||||
|
def test_singular_comment(self):
|
||||||
|
"""One comment should use singular form."""
|
||||||
|
metrics = AgentMetrics(agent_id="kimi", comments=1)
|
||||||
|
bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
|
||||||
|
|
||||||
|
activity = next((b for b in bullets if "Active across" in b), None)
|
||||||
|
assert activity is not None
|
||||||
|
assert "1 comment" in activity
|
||||||
|
|
||||||
|
def test_singular_test_file(self):
|
||||||
|
"""One test file should use singular form."""
|
||||||
|
metrics = AgentMetrics(agent_id="kimi", tests_affected={"test_foo.py"})
|
||||||
|
bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
|
||||||
|
|
||||||
|
assert any("1 test file." in b for b in bullets)
|
||||||
|
|
||||||
|
def test_weekly_period_label(self):
|
||||||
|
"""Weekly period uses 'week' label in no-activity message."""
|
||||||
|
metrics = AgentMetrics(agent_id="kimi")
|
||||||
|
bullets = _generate_narrative_bullets(metrics, PeriodType.weekly)
|
||||||
|
|
||||||
|
assert any("this week" in b for b in bullets)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# generate_scorecard — token augmentation (max logic)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestGenerateScorecardTokenAugmentation:
|
||||||
|
"""Test the max() token augmentation logic in generate_scorecard."""
|
||||||
|
|
||||||
|
def test_event_tokens_win_over_ledger_when_higher(self):
|
||||||
|
"""When event tokens > ledger tokens, event tokens are preserved."""
|
||||||
|
events = [
|
||||||
|
Event(
|
||||||
|
type="agent.task.completed",
|
||||||
|
source="system",
|
||||||
|
data={"agent_id": "kimi", "tests_affected": [], "token_reward": 200},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
with patch(
|
||||||
|
"dashboard.services.scorecard_service._collect_events_for_period",
|
||||||
|
return_value=events,
|
||||||
|
):
|
||||||
|
with patch(
|
||||||
|
"dashboard.services.scorecard_service._query_token_transactions",
|
||||||
|
return_value=(50, 0), # ledger says 50 earned
|
||||||
|
):
|
||||||
|
scorecard = generate_scorecard("kimi", PeriodType.daily)
|
||||||
|
|
||||||
|
# max(200, 50) = 200 should win
|
||||||
|
assert scorecard.metrics.tokens_earned == 200
|
||||||
|
|
||||||
|
def test_ledger_tokens_win_when_higher(self):
|
||||||
|
"""When ledger tokens > event tokens, ledger tokens are used."""
|
||||||
|
events = [
|
||||||
|
Event(
|
||||||
|
type="agent.task.completed",
|
||||||
|
source="system",
|
||||||
|
data={"agent_id": "kimi", "tests_affected": [], "token_reward": 10},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
with patch(
|
||||||
|
"dashboard.services.scorecard_service._collect_events_for_period",
|
||||||
|
return_value=events,
|
||||||
|
):
|
||||||
|
with patch(
|
||||||
|
"dashboard.services.scorecard_service._query_token_transactions",
|
||||||
|
return_value=(500, 100), # ledger says 500 earned, 100 spent
|
||||||
|
):
|
||||||
|
scorecard = generate_scorecard("kimi", PeriodType.daily)
|
||||||
|
|
||||||
|
# max(10, 500) = 500
|
||||||
|
assert scorecard.metrics.tokens_earned == 500
|
||||||
|
# max(0, 100) = 100
|
||||||
|
assert scorecard.metrics.tokens_spent == 100
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ScorecardSummary.to_dict — timestamp format and tests_affected
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestScorecardSummaryToDict:
|
||||||
|
"""Additional to_dict tests."""
|
||||||
|
|
||||||
|
def test_timestamps_are_iso_strings(self):
|
||||||
|
"""period_start and period_end should be ISO format strings."""
|
||||||
|
start = datetime(2026, 3, 20, 0, 0, 0, tzinfo=UTC)
|
||||||
|
end = datetime(2026, 3, 21, 0, 0, 0, tzinfo=UTC)
|
||||||
|
summary = ScorecardSummary(
|
||||||
|
agent_id="kimi",
|
||||||
|
period_type=PeriodType.daily,
|
||||||
|
period_start=start,
|
||||||
|
period_end=end,
|
||||||
|
metrics=AgentMetrics(agent_id="kimi"),
|
||||||
|
)
|
||||||
|
data = summary.to_dict()
|
||||||
|
|
||||||
|
assert data["period_start"] == start.isoformat()
|
||||||
|
assert data["period_end"] == end.isoformat()
|
||||||
|
|
||||||
|
def test_tests_affected_count_in_dict(self):
|
||||||
|
"""to_dict metrics.tests_affected should be a count (int)."""
|
||||||
|
metrics = AgentMetrics(
|
||||||
|
agent_id="kimi",
|
||||||
|
tests_affected={"test_a.py", "test_b.py", "test_c.py"},
|
||||||
|
)
|
||||||
|
summary = ScorecardSummary(
|
||||||
|
agent_id="kimi",
|
||||||
|
period_type=PeriodType.daily,
|
||||||
|
period_start=datetime.now(UTC),
|
||||||
|
period_end=datetime.now(UTC),
|
||||||
|
metrics=metrics,
|
||||||
|
)
|
||||||
|
data = summary.to_dict()
|
||||||
|
|
||||||
|
assert data["metrics"]["tests_affected"] == 3
|
||||||
|
|
||||||
|
def test_empty_narrative_and_patterns(self):
|
||||||
|
"""to_dict with default empty lists should serialize correctly."""
|
||||||
|
summary = ScorecardSummary(
|
||||||
|
agent_id="claude",
|
||||||
|
period_type=PeriodType.weekly,
|
||||||
|
period_start=datetime.now(UTC),
|
||||||
|
period_end=datetime.now(UTC),
|
||||||
|
metrics=AgentMetrics(agent_id="claude"),
|
||||||
|
)
|
||||||
|
data = summary.to_dict()
|
||||||
|
|
||||||
|
assert data["narrative_bullets"] == []
|
||||||
|
assert data["patterns"] == []
|
||||||
|
assert data["period_type"] == "weekly"
|
||||||
Reference in New Issue
Block a user