[claude] Add unit tests for backlog_triage.py (#1293) (#1307)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled

This commit was merged in pull request #1307.
This commit is contained in:
2026-03-24 01:57:44 +00:00
parent 16c4cc0f9f
commit 8304cf50da

View File

@@ -0,0 +1,696 @@
"""Unit tests for timmy.backlog_triage — scoring, prioritization, and decision logic."""
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
from timmy.backlog_triage import (
AGENT_CLAUDE,
AGENT_KIMI,
KIMI_READY_LABEL,
OWNER_LOGIN,
READY_THRESHOLD,
BacklogTriageLoop,
ScoredIssue,
TriageCycleResult,
TriageDecision,
_build_audit_comment,
_extract_tags,
_score_acceptance,
_score_alignment,
_score_scope,
decide,
execute_decision,
score_issue,
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_raw_issue(
number: int = 1,
title: str = "Fix something broken in src/foo.py",
body: str = "## Problem\nThis crashes. Expected: no crash. Steps: run it.",
labels: list[str] | None = None,
assignees: list[str] | None = None,
created_at: str | None = None,
) -> dict:
if labels is None:
labels = []
if assignees is None:
assignees = []
if created_at is None:
created_at = datetime.now(UTC).isoformat()
return {
"number": number,
"title": title,
"body": body,
"labels": [{"name": lbl} for lbl in labels],
"assignees": [{"login": a} for a in assignees],
"created_at": created_at,
}
def _make_scored(
number: int = 1,
title: str = "Fix a bug",
issue_type: str = "bug",
score: int = 6,
ready: bool = True,
assignees: list[str] | None = None,
tags: set[str] | None = None,
is_p0: bool = False,
is_blocked: bool = False,
) -> ScoredIssue:
return ScoredIssue(
number=number,
title=title,
body="",
labels=[],
tags=tags or set(),
assignees=assignees or [],
created_at=datetime.now(UTC),
issue_type=issue_type,
score=score,
scope=2,
acceptance=2,
alignment=2,
ready=ready,
age_days=5,
is_p0=is_p0,
is_blocked=is_blocked,
)
# ---------------------------------------------------------------------------
# _extract_tags
# ---------------------------------------------------------------------------
class TestExtractTags:
def test_bracket_tags_from_title(self):
tags = _extract_tags("[feat][bug] do something", [])
assert "feat" in tags
assert "bug" in tags
def test_label_names_included(self):
tags = _extract_tags("Normal title", ["kimi-ready", "enhancement"])
assert "kimi-ready" in tags
assert "enhancement" in tags
def test_combined(self):
tags = _extract_tags("[fix] crash in module", ["p0"])
assert "fix" in tags
assert "p0" in tags
def test_empty_inputs(self):
assert _extract_tags("", []) == set()
def test_tags_are_lowercased(self):
tags = _extract_tags("[BUG][Refactor] title", ["Enhancement"])
assert "bug" in tags
assert "refactor" in tags
assert "enhancement" in tags
# ---------------------------------------------------------------------------
# _score_scope
# ---------------------------------------------------------------------------
class TestScoreScope:
def test_file_reference_adds_point(self):
score = _score_scope("Fix login", "See src/auth/login.py for details", set())
assert score >= 1
def test_function_reference_adds_point(self):
score = _score_scope("Fix login", "In the `handle_login()` method", set())
assert score >= 1
def test_short_title_adds_point(self):
score = _score_scope("Short clear title", "", set())
assert score >= 1
def test_long_title_no_bonus(self):
long_title = "A" * 90
score_long = _score_scope(long_title, "", set())
score_short = _score_scope("Short title", "", set())
assert score_short >= score_long
def test_meta_tags_reduce_score(self):
score_meta = _score_scope("Discuss src/foo.py philosophy", "def func()", {"philosophy"})
score_plain = _score_scope("Fix src/foo.py bug", "def func()", set())
assert score_meta < score_plain
def test_max_is_three(self):
score = _score_scope(
"Fix it", "See src/foo.py and `def bar()` method here", set()
)
assert score <= 3
# ---------------------------------------------------------------------------
# _score_acceptance
# ---------------------------------------------------------------------------
class TestScoreAcceptance:
def test_accept_keywords_add_points(self):
body = "Should return 200. Must pass validation. Assert no errors."
score = _score_acceptance("", body, set())
assert score >= 2
def test_test_reference_adds_point(self):
score = _score_acceptance("", "Run pytest to verify", set())
assert score >= 1
def test_structured_headers_add_point(self):
body = "## Problem\nit breaks\n## Expected\nsuccess"
score = _score_acceptance("", body, set())
assert score >= 1
def test_meta_tags_reduce_score(self):
body = "Should pass and must verify assert test_foo"
score_meta = _score_acceptance("", body, {"philosophy"})
score_plain = _score_acceptance("", body, set())
assert score_meta < score_plain
def test_max_is_three(self):
body = (
"Should pass. Must return. Expected: success. Assert no error. "
"pytest test_foo. ## Problem\ndef. ## Expected\nok"
)
score = _score_acceptance("", body, set())
assert score <= 3
# ---------------------------------------------------------------------------
# _score_alignment
# ---------------------------------------------------------------------------
class TestScoreAlignment:
def test_bug_tags_return_max(self):
assert _score_alignment("", "", {"bug"}) == 3
assert _score_alignment("", "", {"crash"}) == 3
assert _score_alignment("", "", {"hotfix"}) == 3
def test_refactor_tags_give_high_score(self):
score = _score_alignment("", "", {"refactor"})
assert score >= 2
def test_feature_tags_give_high_score(self):
score = _score_alignment("", "", {"feature"})
assert score >= 2
def test_loop_generated_adds_bonus(self):
score_with = _score_alignment("", "", {"feature", "loop-generated"})
score_without = _score_alignment("", "", {"feature"})
assert score_with >= score_without
def test_meta_tags_zero_out_score(self):
score = _score_alignment("", "", {"philosophy", "refactor"})
assert score == 0
def test_max_is_three(self):
score = _score_alignment("", "", {"feature", "loop-generated", "enhancement"})
assert score <= 3
# ---------------------------------------------------------------------------
# score_issue
# ---------------------------------------------------------------------------
class TestScoreIssue:
def test_basic_bug_issue_classified(self):
raw = _make_raw_issue(
title="[bug] fix crash in src/timmy/agent.py",
body="## Problem\nCrashes on startup. Expected: runs. Steps: python -m timmy",
)
issue = score_issue(raw)
assert issue.issue_type == "bug"
assert issue.is_p0 is True
def test_feature_issue_classified(self):
raw = _make_raw_issue(
title="[feat] add dark mode to dashboard",
body="Add a toggle button. Should switch CSS vars.",
labels=["feature"],
)
issue = score_issue(raw)
assert issue.issue_type == "feature"
def test_research_issue_classified(self):
raw = _make_raw_issue(
title="Investigate MCP performance",
labels=["kimi-ready", "research"],
)
issue = score_issue(raw)
assert issue.issue_type == "research"
assert issue.needs_kimi is True
def test_philosophy_issue_classified(self):
raw = _make_raw_issue(
title="Discussion: soul and identity",
labels=["philosophy"],
)
issue = score_issue(raw)
assert issue.issue_type == "philosophy"
def test_score_totals_components(self):
raw = _make_raw_issue()
issue = score_issue(raw)
assert issue.score == issue.scope + issue.acceptance + issue.alignment
def test_ready_flag_set_when_score_meets_threshold(self):
# Create an issue that will definitely score >= READY_THRESHOLD
raw = _make_raw_issue(
title="[bug] crash in src/core.py",
body=(
"## Problem\nCrashes when running `run()`. "
"Expected: should return 200. Must pass pytest assert."
),
labels=["bug"],
)
issue = score_issue(raw)
assert issue.ready == (issue.score >= READY_THRESHOLD)
def test_assigned_issue_reports_assignees(self):
raw = _make_raw_issue(assignees=["claude", "kimi"])
issue = score_issue(raw)
assert "claude" in issue.assignees
assert issue.is_unassigned is False
def test_unassigned_issue(self):
raw = _make_raw_issue(assignees=[])
issue = score_issue(raw)
assert issue.is_unassigned is True
def test_blocked_issue_detected(self):
raw = _make_raw_issue(
title="Fix blocked deployment", body="Blocked by infra team."
)
issue = score_issue(raw)
assert issue.is_blocked is True
def test_age_days_computed(self):
old_date = (datetime.now(UTC) - timedelta(days=30)).isoformat()
raw = _make_raw_issue(created_at=old_date)
issue = score_issue(raw)
assert issue.age_days >= 29
def test_invalid_created_at_defaults_to_now(self):
raw = _make_raw_issue(created_at="not-a-date")
issue = score_issue(raw)
assert issue.age_days == 0
def test_title_bracket_tags_stripped(self):
raw = _make_raw_issue(title="[bug][p0] crash in login")
issue = score_issue(raw)
assert "[" not in issue.title
def test_missing_body_defaults_to_empty(self):
raw = _make_raw_issue()
raw["body"] = None
issue = score_issue(raw)
assert issue.body == ""
def test_kimi_label_triggers_needs_kimi(self):
raw = _make_raw_issue(labels=[KIMI_READY_LABEL])
issue = score_issue(raw)
assert issue.needs_kimi is True
# ---------------------------------------------------------------------------
# decide
# ---------------------------------------------------------------------------
class TestDecide:
def test_philosophy_is_skipped(self):
issue = _make_scored(issue_type="philosophy")
d = decide(issue)
assert d.action == "skip"
assert "philosophy" in d.reason.lower() or "meta" in d.reason.lower()
def test_already_assigned_is_skipped(self):
issue = _make_scored(assignees=["claude"])
d = decide(issue)
assert d.action == "skip"
assert "assigned" in d.reason.lower()
def test_low_score_is_skipped(self):
issue = _make_scored(score=READY_THRESHOLD - 1, ready=False)
d = decide(issue)
assert d.action == "skip"
assert str(READY_THRESHOLD) in d.reason
def test_blocked_is_flagged_for_alex(self):
issue = _make_scored(is_blocked=True)
d = decide(issue)
assert d.action == "flag_alex"
assert d.agent == OWNER_LOGIN
def test_kimi_ready_assigned_to_kimi(self):
issue = _make_scored(tags={"kimi-ready"})
# Ensure it's unassigned and ready
issue.assignees = []
issue.ready = True
issue.is_blocked = False
issue.issue_type = "research"
d = decide(issue)
assert d.action == "assign_kimi"
assert d.agent == AGENT_KIMI
def test_research_type_assigned_to_kimi(self):
issue = _make_scored(issue_type="research", tags={"research"})
d = decide(issue)
assert d.action == "assign_kimi"
assert d.agent == AGENT_KIMI
def test_p0_bug_assigned_to_claude(self):
issue = _make_scored(issue_type="bug", is_p0=True)
d = decide(issue)
assert d.action == "assign_claude"
assert d.agent == AGENT_CLAUDE
def test_ready_feature_assigned_to_claude(self):
issue = _make_scored(issue_type="feature", score=6, ready=True)
d = decide(issue)
assert d.action == "assign_claude"
assert d.agent == AGENT_CLAUDE
def test_ready_refactor_assigned_to_claude(self):
issue = _make_scored(issue_type="refactor", score=6, ready=True)
d = decide(issue)
assert d.action == "assign_claude"
assert d.agent == AGENT_CLAUDE
def test_decision_has_issue_number(self):
issue = _make_scored(number=42)
d = decide(issue)
assert d.issue_number == 42
# ---------------------------------------------------------------------------
# _build_audit_comment
# ---------------------------------------------------------------------------
class TestBuildAuditComment:
def test_assign_claude_comment(self):
d = TriageDecision(
issue_number=1, action="assign_claude", agent=AGENT_CLAUDE, reason="Ready bug"
)
comment = _build_audit_comment(d)
assert AGENT_CLAUDE in comment
assert "Timmy Triage" in comment
assert "Ready bug" in comment
def test_assign_kimi_comment(self):
d = TriageDecision(
issue_number=2, action="assign_kimi", agent=AGENT_KIMI, reason="Research spike"
)
comment = _build_audit_comment(d)
assert KIMI_READY_LABEL in comment
def test_flag_alex_comment(self):
d = TriageDecision(
issue_number=3, action="flag_alex", agent=OWNER_LOGIN, reason="Blocked"
)
comment = _build_audit_comment(d)
assert OWNER_LOGIN in comment
def test_comment_contains_autonomous_triage_note(self):
d = TriageDecision(issue_number=1, action="assign_claude", agent=AGENT_CLAUDE, reason="x")
comment = _build_audit_comment(d)
assert "Autonomous triage" in comment or "autonomous" in comment.lower()
# ---------------------------------------------------------------------------
# execute_decision (dry_run)
# ---------------------------------------------------------------------------
class TestExecuteDecisionDryRun:
@pytest.mark.asyncio
async def test_skip_action_marks_executed(self):
d = TriageDecision(issue_number=1, action="skip", reason="Already assigned")
mock_client = AsyncMock()
result = await execute_decision(mock_client, d, dry_run=True)
assert result.executed is True
mock_client.post.assert_not_called()
@pytest.mark.asyncio
async def test_dry_run_does_not_call_api(self):
d = TriageDecision(
issue_number=5, action="assign_claude", agent=AGENT_CLAUDE, reason="Ready"
)
mock_client = AsyncMock()
result = await execute_decision(mock_client, d, dry_run=True)
assert result.executed is True
mock_client.post.assert_not_called()
mock_client.patch.assert_not_called()
@pytest.mark.asyncio
async def test_dry_run_kimi_does_not_call_api(self):
d = TriageDecision(
issue_number=6, action="assign_kimi", agent=AGENT_KIMI, reason="Research"
)
mock_client = AsyncMock()
result = await execute_decision(mock_client, d, dry_run=True)
assert result.executed is True
mock_client.post.assert_not_called()
# ---------------------------------------------------------------------------
# execute_decision (live — mocked HTTP)
# ---------------------------------------------------------------------------
class TestExecuteDecisionLive:
@pytest.mark.asyncio
async def test_assign_claude_posts_comment_then_patches(self):
comment_resp = MagicMock()
comment_resp.status_code = 201
patch_resp = MagicMock()
patch_resp.status_code = 200
mock_client = AsyncMock()
mock_client.post.return_value = comment_resp
mock_client.patch.return_value = patch_resp
d = TriageDecision(
issue_number=10, action="assign_claude", agent=AGENT_CLAUDE, reason="Bug ready"
)
with patch("timmy.backlog_triage.settings") as mock_settings:
mock_settings.gitea_token = "tok"
mock_settings.gitea_repo = "owner/repo"
mock_settings.gitea_url = "http://localhost:3000"
result = await execute_decision(mock_client, d, dry_run=False)
assert result.executed is True
assert result.error == ""
mock_client.post.assert_called_once()
mock_client.patch.assert_called_once()
@pytest.mark.asyncio
async def test_comment_failure_sets_error(self):
comment_resp = MagicMock()
comment_resp.status_code = 500
mock_client = AsyncMock()
mock_client.post.return_value = comment_resp
d = TriageDecision(
issue_number=11, action="assign_claude", agent=AGENT_CLAUDE, reason="Bug"
)
with patch("timmy.backlog_triage.settings") as mock_settings:
mock_settings.gitea_token = "tok"
mock_settings.gitea_repo = "owner/repo"
mock_settings.gitea_url = "http://localhost:3000"
result = await execute_decision(mock_client, d, dry_run=False)
assert result.executed is False
assert result.error != ""
@pytest.mark.asyncio
async def test_flag_alex_only_posts_comment(self):
comment_resp = MagicMock()
comment_resp.status_code = 201
mock_client = AsyncMock()
mock_client.post.return_value = comment_resp
d = TriageDecision(
issue_number=12, action="flag_alex", agent=OWNER_LOGIN, reason="Blocked"
)
with patch("timmy.backlog_triage.settings") as mock_settings:
mock_settings.gitea_token = "tok"
mock_settings.gitea_repo = "owner/repo"
mock_settings.gitea_url = "http://localhost:3000"
result = await execute_decision(mock_client, d, dry_run=False)
assert result.executed is True
mock_client.patch.assert_not_called()
# ---------------------------------------------------------------------------
# BacklogTriageLoop
# ---------------------------------------------------------------------------
class TestBacklogTriageLoop:
def test_default_state(self):
with patch("timmy.backlog_triage.settings") as mock_settings:
mock_settings.backlog_triage_interval_seconds = 900
mock_settings.backlog_triage_dry_run = True
mock_settings.backlog_triage_daily_summary = False
loop = BacklogTriageLoop()
assert loop.is_running is False
assert loop.cycle_count == 0
assert loop.history == []
def test_custom_interval_overrides_settings(self):
with patch("timmy.backlog_triage.settings") as mock_settings:
mock_settings.backlog_triage_interval_seconds = 900
mock_settings.backlog_triage_dry_run = True
mock_settings.backlog_triage_daily_summary = False
loop = BacklogTriageLoop(interval=60)
assert loop._interval == 60.0
def test_stop_sets_running_false(self):
with patch("timmy.backlog_triage.settings") as mock_settings:
mock_settings.backlog_triage_interval_seconds = 900
mock_settings.backlog_triage_dry_run = True
mock_settings.backlog_triage_daily_summary = False
loop = BacklogTriageLoop()
loop._running = True
loop.stop()
assert loop.is_running is False
@pytest.mark.asyncio
async def test_run_once_skips_when_gitea_disabled(self):
with patch("timmy.backlog_triage.settings") as mock_settings:
mock_settings.backlog_triage_interval_seconds = 900
mock_settings.backlog_triage_dry_run = True
mock_settings.backlog_triage_daily_summary = False
mock_settings.gitea_enabled = False
mock_settings.gitea_token = ""
loop = BacklogTriageLoop(dry_run=True, daily_summary=False)
result = await loop.run_once()
assert result.total_open == 0
assert result.scored == 0
@pytest.mark.asyncio
async def test_run_once_increments_cycle_count(self):
with patch("timmy.backlog_triage.settings") as mock_settings:
mock_settings.backlog_triage_interval_seconds = 900
mock_settings.backlog_triage_dry_run = True
mock_settings.backlog_triage_daily_summary = False
mock_settings.gitea_enabled = False
mock_settings.gitea_token = ""
loop = BacklogTriageLoop(dry_run=True, daily_summary=False)
await loop.run_once()
await loop.run_once()
assert loop.cycle_count == 2
@pytest.mark.asyncio
async def test_run_once_full_cycle_with_mocked_gitea(self):
raw_issues = [
_make_raw_issue(
number=100,
title="[bug] crash in src/timmy/agent.py",
body=(
"## Problem\nCrashes. Expected: runs. "
"Must pass pytest. Should return 200."
),
labels=["bug"],
assignees=[],
)
]
issues_resp = MagicMock()
issues_resp.status_code = 200
issues_resp.json.side_effect = [raw_issues, []] # page 1, then empty
mock_client = AsyncMock()
mock_client.get.return_value = issues_resp
with patch("timmy.backlog_triage.settings") as mock_settings:
mock_settings.backlog_triage_interval_seconds = 900
mock_settings.backlog_triage_dry_run = True
mock_settings.backlog_triage_daily_summary = False
mock_settings.gitea_enabled = True
mock_settings.gitea_token = "tok"
mock_settings.gitea_repo = "owner/repo"
mock_settings.gitea_url = "http://localhost:3000"
with patch("timmy.backlog_triage.httpx.AsyncClient") as mock_cls:
mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_cls.return_value.__aexit__ = AsyncMock(return_value=False)
loop = BacklogTriageLoop(dry_run=True, daily_summary=False)
result = await loop.run_once()
assert result.total_open == 1
assert result.scored == 1
assert loop.cycle_count == 1
assert len(loop.history) == 1
# ---------------------------------------------------------------------------
# ScoredIssue properties
# ---------------------------------------------------------------------------
class TestScoredIssueProperties:
def test_is_unassigned_true_when_no_assignees(self):
issue = _make_scored(assignees=[])
assert issue.is_unassigned is True
def test_is_unassigned_false_when_assigned(self):
issue = _make_scored(assignees=["claude"])
assert issue.is_unassigned is False
def test_needs_kimi_from_research_tag(self):
issue = _make_scored(tags={"research"})
assert issue.needs_kimi is True
def test_needs_kimi_from_kimi_ready_label(self):
issue = _make_scored()
issue.labels = [KIMI_READY_LABEL]
assert issue.needs_kimi is True
def test_needs_kimi_false_for_plain_bug(self):
issue = _make_scored(tags={"bug"}, issue_type="bug")
assert issue.needs_kimi is False
# ---------------------------------------------------------------------------
# TriageCycleResult
# ---------------------------------------------------------------------------
class TestTriageCycleResult:
def test_default_decisions_list_is_empty(self):
result = TriageCycleResult(
timestamp="2026-01-01T00:00:00", total_open=10, scored=8, ready=3
)
assert result.decisions == []
assert result.errors == []
assert result.duration_ms == 0