Co-authored-by: Claude (Opus 4.6) <claude@hermes.local> Co-committed-by: Claude (Opus 4.6) <claude@hermes.local>
621 lines
21 KiB
Python
621 lines
21 KiB
Python
"""Unit tests for timmy.backlog_triage — autonomous backlog triage loop."""
|
|
|
|
from datetime import UTC, datetime
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from timmy.backlog_triage import (
|
|
AGENT_CLAUDE,
|
|
AGENT_KIMI,
|
|
KIMI_READY_LABEL,
|
|
OWNER_LOGIN,
|
|
READY_THRESHOLD,
|
|
BacklogTriageLoop,
|
|
ScoredIssue,
|
|
TriageCycleResult,
|
|
TriageDecision,
|
|
_build_audit_comment,
|
|
_build_daily_summary,
|
|
_extract_tags,
|
|
_score_acceptance,
|
|
_score_alignment,
|
|
_score_scope,
|
|
decide,
|
|
score_issue,
|
|
)
|
|
|
|
# ── Fixtures ─────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def _make_raw_issue(
|
|
number: int = 1,
|
|
title: str = "Fix the login bug",
|
|
body: str = "## Problem\nLogin fails on empty password.\n\n## Steps\nassert response == 200",
|
|
labels: list | None = None,
|
|
assignees: list | None = None,
|
|
created_at: str = "2026-03-20T10:00:00Z",
|
|
) -> dict:
|
|
return {
|
|
"number": number,
|
|
"title": title,
|
|
"body": body,
|
|
"labels": [{"name": lbl} for lbl in (labels or [])],
|
|
"assignees": [{"login": a} for a in (assignees or [])],
|
|
"created_at": created_at,
|
|
}
|
|
|
|
|
|
def _make_scored_issue(
|
|
number: int = 1,
|
|
title: str = "Fix login bug",
|
|
issue_type: str = "bug",
|
|
score: int = 7,
|
|
ready: bool = True,
|
|
is_p0: bool = True,
|
|
is_blocked: bool = False,
|
|
assignees: list | None = None,
|
|
tags: set | None = None,
|
|
labels: list | None = None,
|
|
age_days: int = 3,
|
|
) -> ScoredIssue:
|
|
return ScoredIssue(
|
|
number=number,
|
|
title=title,
|
|
body="",
|
|
labels=labels or [],
|
|
tags=tags or {"bug"},
|
|
assignees=assignees or [],
|
|
created_at=datetime.now(UTC),
|
|
issue_type=issue_type,
|
|
score=score,
|
|
scope=2,
|
|
acceptance=2,
|
|
alignment=3,
|
|
ready=ready,
|
|
age_days=age_days,
|
|
is_p0=is_p0,
|
|
is_blocked=is_blocked,
|
|
)
|
|
|
|
|
|
# ── _extract_tags ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestExtractTags:
|
|
def test_bracket_tags_in_title(self):
|
|
tags = _extract_tags("[Bug] Login fails", [])
|
|
assert "bug" in tags
|
|
|
|
def test_multiple_brackets(self):
|
|
tags = _extract_tags("[Bug][P0] Crash on startup", [])
|
|
assert "bug" in tags
|
|
assert "p0" in tags
|
|
|
|
def test_label_names(self):
|
|
tags = _extract_tags("Fix thing", ["security", "hotfix"])
|
|
assert "security" in tags
|
|
assert "hotfix" in tags
|
|
|
|
def test_labels_lowercased(self):
|
|
tags = _extract_tags("Title", ["Bug", "FEATURE"])
|
|
assert "bug" in tags
|
|
assert "feature" in tags
|
|
|
|
def test_empty_inputs(self):
|
|
tags = _extract_tags("", [])
|
|
assert tags == set()
|
|
|
|
|
|
# ── Scoring functions ─────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestScoreScope:
|
|
def test_file_reference_adds_point(self):
|
|
score = _score_scope("Fix auth", "Edit src/timmy/auth.py", set())
|
|
assert score >= 1
|
|
|
|
def test_function_reference_adds_point(self):
|
|
score = _score_scope("Fix auth", "def validate_token()", set())
|
|
assert score >= 1
|
|
|
|
def test_short_title_adds_point(self):
|
|
score = _score_scope("Short title", "", set())
|
|
assert score >= 1
|
|
|
|
def test_meta_tag_penalizes(self):
|
|
score = _score_scope("Discussion about philosophy", "long body " * 5, {"philosophy"})
|
|
assert score <= 1
|
|
|
|
def test_max_score_3(self):
|
|
score = _score_scope("Fix auth", "src/auth.py\ndef login()", set())
|
|
assert score <= 3
|
|
|
|
|
|
class TestScoreAcceptance:
|
|
def test_acceptance_keywords(self):
|
|
body = "should return 200\nmust pass tests\nexpect response"
|
|
score = _score_acceptance("Title", body, set())
|
|
assert score >= 2
|
|
|
|
def test_test_reference_adds_point(self):
|
|
score = _score_acceptance("Title", "Run tox -e unit", set())
|
|
assert score >= 1
|
|
|
|
def test_structured_sections(self):
|
|
body = "## Problem\nX\n## Solution\nY"
|
|
score = _score_acceptance("Title", body, set())
|
|
assert score >= 1
|
|
|
|
def test_meta_tag_penalizes(self):
|
|
score = _score_acceptance("Title", "should do something", {"philosophy"})
|
|
# still counts but penalized
|
|
assert score <= 2
|
|
|
|
def test_empty_body(self):
|
|
score = _score_acceptance("Title", "", set())
|
|
assert score == 0
|
|
|
|
|
|
class TestScoreAlignment:
|
|
def test_bug_tags_score_max(self):
|
|
assert _score_alignment("", "", {"bug"}) == 3
|
|
|
|
def test_hotfix_tag_max(self):
|
|
assert _score_alignment("", "", {"hotfix"}) == 3
|
|
|
|
def test_refactor_tag(self):
|
|
score = _score_alignment("", "", {"refactor"})
|
|
assert score >= 2
|
|
|
|
def test_feature_tag(self):
|
|
score = _score_alignment("", "", {"feature"})
|
|
assert score >= 2
|
|
|
|
def test_meta_tags_zero(self):
|
|
assert _score_alignment("", "", {"philosophy"}) == 0
|
|
|
|
def test_loop_generated_bonus(self):
|
|
score = _score_alignment("", "", {"loop-generated"})
|
|
assert score >= 1
|
|
|
|
|
|
# ── score_issue ───────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestScoreIssue:
|
|
def test_bug_issue_classified_correctly(self):
|
|
raw = _make_raw_issue(labels=["bug"], title="[Bug] Crash on startup")
|
|
scored = score_issue(raw)
|
|
assert scored.issue_type == "bug"
|
|
assert scored.is_p0 is True
|
|
|
|
def test_feature_issue_classified(self):
|
|
raw = _make_raw_issue(labels=["feature"], title="Add voice support")
|
|
scored = score_issue(raw)
|
|
assert scored.issue_type == "feature"
|
|
|
|
def test_philosophy_issue_classified(self):
|
|
raw = _make_raw_issue(labels=["philosophy"], title="[Philosophy] Should Timmy sleep?")
|
|
scored = score_issue(raw)
|
|
assert scored.issue_type == "philosophy"
|
|
|
|
def test_research_issue_classified(self):
|
|
raw = _make_raw_issue(labels=["research"], title="Investigate model options")
|
|
scored = score_issue(raw)
|
|
assert scored.issue_type == "research"
|
|
|
|
def test_ready_flag_set_when_score_high(self):
|
|
body = (
|
|
"## Problem\nX breaks.\n## Solution\nFix src/timmy/agent.py def run()\n"
|
|
"should return True\nmust pass tox -e unit"
|
|
)
|
|
raw = _make_raw_issue(labels=["bug"], body=body)
|
|
scored = score_issue(raw)
|
|
assert scored.score >= READY_THRESHOLD
|
|
assert scored.ready is True
|
|
|
|
def test_is_blocked_detected_in_body(self):
|
|
raw = _make_raw_issue(body="This is blocked by issue #50")
|
|
scored = score_issue(raw)
|
|
assert scored.is_blocked is True
|
|
|
|
def test_is_blocked_detected_in_title(self):
|
|
raw = _make_raw_issue(title="[blocking] Cannot proceed")
|
|
scored = score_issue(raw)
|
|
# "blocking" in brackets becomes a tag
|
|
assert scored.is_blocked is True
|
|
|
|
def test_unassigned_when_no_assignees(self):
|
|
raw = _make_raw_issue(assignees=[])
|
|
scored = score_issue(raw)
|
|
assert scored.is_unassigned is True
|
|
|
|
def test_assigned_when_has_assignee(self):
|
|
raw = _make_raw_issue(assignees=["claude"])
|
|
scored = score_issue(raw)
|
|
assert scored.is_unassigned is False
|
|
|
|
def test_age_days_computed(self):
|
|
old_ts = "2026-01-01T00:00:00Z"
|
|
raw = _make_raw_issue(created_at=old_ts)
|
|
scored = score_issue(raw)
|
|
assert scored.age_days > 0
|
|
|
|
def test_needs_kimi_for_research_label(self):
|
|
raw = _make_raw_issue(labels=["kimi-ready"])
|
|
scored = score_issue(raw)
|
|
assert scored.needs_kimi is True
|
|
|
|
|
|
# ── decide ────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestDecide:
|
|
def test_philosophy_skipped(self):
|
|
issue = _make_scored_issue(issue_type="philosophy", tags={"philosophy"})
|
|
d = decide(issue)
|
|
assert d.action == "skip"
|
|
assert "philosophy" in d.reason.lower()
|
|
|
|
def test_assigned_issue_skipped(self):
|
|
issue = _make_scored_issue(assignees=["perplexity"])
|
|
d = decide(issue)
|
|
assert d.action == "skip"
|
|
assert "assigned" in d.reason.lower()
|
|
|
|
def test_low_score_skipped(self):
|
|
issue = _make_scored_issue(score=2, ready=False)
|
|
d = decide(issue)
|
|
assert d.action == "skip"
|
|
assert "threshold" in d.reason.lower()
|
|
|
|
def test_blocked_issue_flagged_for_alex(self):
|
|
issue = _make_scored_issue(is_blocked=True)
|
|
d = decide(issue)
|
|
assert d.action == "flag_alex"
|
|
assert d.agent == OWNER_LOGIN
|
|
|
|
def test_research_issue_assigned_kimi(self):
|
|
issue = _make_scored_issue(
|
|
issue_type="research",
|
|
tags={"research"},
|
|
is_p0=False,
|
|
is_blocked=False,
|
|
)
|
|
d = decide(issue)
|
|
assert d.action == "assign_kimi"
|
|
assert d.agent == AGENT_KIMI
|
|
|
|
def test_kimi_ready_label_assigns_kimi(self):
|
|
issue = _make_scored_issue(
|
|
issue_type="unknown",
|
|
tags={"kimi-ready"},
|
|
labels=["kimi-ready"],
|
|
is_p0=False,
|
|
is_blocked=False,
|
|
)
|
|
d = decide(issue)
|
|
assert d.action == "assign_kimi"
|
|
|
|
def test_p0_bug_assigns_claude(self):
|
|
issue = _make_scored_issue(issue_type="bug", is_p0=True, is_blocked=False)
|
|
d = decide(issue)
|
|
assert d.action == "assign_claude"
|
|
assert d.agent == AGENT_CLAUDE
|
|
|
|
def test_ready_feature_assigns_claude(self):
|
|
issue = _make_scored_issue(
|
|
issue_type="feature",
|
|
is_p0=False,
|
|
is_blocked=False,
|
|
tags={"feature"},
|
|
)
|
|
d = decide(issue)
|
|
assert d.action == "assign_claude"
|
|
assert d.agent == AGENT_CLAUDE
|
|
|
|
def test_decision_has_reason(self):
|
|
issue = _make_scored_issue()
|
|
d = decide(issue)
|
|
assert len(d.reason) > 10
|
|
|
|
|
|
# ── _build_audit_comment ──────────────────────────────────────────────────────
|
|
|
|
|
|
class TestBuildAuditComment:
|
|
def test_contains_timmy_triage_header(self):
|
|
d = TriageDecision(42, "assign_claude", "High priority bug", agent=AGENT_CLAUDE)
|
|
comment = _build_audit_comment(d)
|
|
assert "Timmy Triage" in comment
|
|
|
|
def test_contains_issue_reason(self):
|
|
d = TriageDecision(42, "assign_claude", "Urgent P0 bug", agent=AGENT_CLAUDE)
|
|
comment = _build_audit_comment(d)
|
|
assert "Urgent P0 bug" in comment
|
|
|
|
def test_assign_claude_mentions_agent(self):
|
|
d = TriageDecision(42, "assign_claude", "reason", agent=AGENT_CLAUDE)
|
|
comment = _build_audit_comment(d)
|
|
assert AGENT_CLAUDE in comment
|
|
|
|
def test_assign_kimi_mentions_label(self):
|
|
d = TriageDecision(42, "assign_kimi", "reason", agent=AGENT_KIMI)
|
|
comment = _build_audit_comment(d)
|
|
assert KIMI_READY_LABEL in comment
|
|
|
|
def test_flag_alex_mentions_owner(self):
|
|
d = TriageDecision(42, "flag_alex", "blocked", agent=OWNER_LOGIN)
|
|
comment = _build_audit_comment(d)
|
|
assert OWNER_LOGIN in comment
|
|
|
|
def test_contains_override_note(self):
|
|
d = TriageDecision(42, "assign_claude", "reason", agent=AGENT_CLAUDE)
|
|
comment = _build_audit_comment(d)
|
|
assert "override" in comment.lower()
|
|
|
|
|
|
# ── _build_daily_summary ──────────────────────────────────────────────────────
|
|
|
|
|
|
class TestBuildDailySummary:
|
|
def _make_result(self, decisions=None) -> TriageCycleResult:
|
|
return TriageCycleResult(
|
|
timestamp=datetime.now(UTC).isoformat(),
|
|
total_open=10,
|
|
scored=8,
|
|
ready=5,
|
|
decisions=decisions or [],
|
|
)
|
|
|
|
def test_contains_open_count(self):
|
|
result = self._make_result()
|
|
scored = [_make_scored_issue(number=i, ready=True, score=6) for i in range(1, 4)]
|
|
summary = _build_daily_summary(result, scored)
|
|
assert "10" in summary # total_open
|
|
|
|
def test_contains_ready_count(self):
|
|
result = self._make_result()
|
|
summary = _build_daily_summary(result, [])
|
|
assert "5" in summary
|
|
|
|
def test_actions_taken_section(self):
|
|
decisions = [
|
|
TriageDecision(1, "assign_claude", "P0 bug", agent="claude", executed=True),
|
|
]
|
|
result = self._make_result(decisions=decisions)
|
|
summary = _build_daily_summary(result, [])
|
|
assert "Actions Taken" in summary
|
|
assert "#1" in summary
|
|
|
|
def test_top_issues_listed(self):
|
|
scored = [_make_scored_issue(number=99, ready=True, score=8)]
|
|
result = self._make_result()
|
|
summary = _build_daily_summary(result, scored)
|
|
assert "#99" in summary
|
|
|
|
def test_footer_present(self):
|
|
summary = _build_daily_summary(self._make_result(), [])
|
|
assert "Auto-generated" in summary
|
|
|
|
|
|
# ── BacklogTriageLoop ─────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestBacklogTriageLoop:
|
|
def test_default_interval_from_settings(self):
|
|
loop = BacklogTriageLoop()
|
|
from config import settings
|
|
|
|
assert loop._interval == float(settings.backlog_triage_interval_seconds)
|
|
|
|
def test_custom_interval(self):
|
|
loop = BacklogTriageLoop(interval=300)
|
|
assert loop._interval == 300.0
|
|
|
|
def test_dry_run_default(self):
|
|
loop = BacklogTriageLoop(dry_run=True)
|
|
assert loop._dry_run is True
|
|
|
|
def test_not_running_initially(self):
|
|
loop = BacklogTriageLoop()
|
|
assert loop.is_running is False
|
|
|
|
def test_stop_sets_running_false(self):
|
|
loop = BacklogTriageLoop()
|
|
loop._running = True
|
|
loop.stop()
|
|
assert loop._running is False
|
|
|
|
def test_cycle_count_starts_zero(self):
|
|
loop = BacklogTriageLoop()
|
|
assert loop.cycle_count == 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_once_skips_when_no_gitea_token(self):
|
|
loop = BacklogTriageLoop()
|
|
mock_settings = MagicMock()
|
|
mock_settings.gitea_enabled = True
|
|
mock_settings.gitea_token = ""
|
|
mock_settings.backlog_triage_interval_seconds = 900
|
|
mock_settings.backlog_triage_dry_run = False
|
|
mock_settings.backlog_triage_daily_summary = False
|
|
|
|
with patch("timmy.backlog_triage.settings", mock_settings):
|
|
result = await loop.run_once()
|
|
|
|
assert result.total_open == 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_once_dry_run_no_api_writes(self):
|
|
"""In dry_run mode, decisions are made but no Gitea API writes happen."""
|
|
loop = BacklogTriageLoop(dry_run=True, daily_summary=False)
|
|
|
|
raw_issues = [
|
|
_make_raw_issue(
|
|
number=10,
|
|
title="Fix crash",
|
|
labels=["bug"],
|
|
body=(
|
|
"## Problem\nCrash on login.\n## Solution\nFix src/auth.py "
|
|
"def login()\nshould return 200\nmust pass tox tests"
|
|
),
|
|
)
|
|
]
|
|
|
|
mock_settings = MagicMock()
|
|
mock_settings.gitea_enabled = True
|
|
mock_settings.gitea_token = "fake-token"
|
|
mock_settings.gitea_repo = "owner/repo"
|
|
mock_settings.gitea_url = "http://gitea.local"
|
|
mock_settings.backlog_triage_interval_seconds = 900
|
|
mock_settings.backlog_triage_dry_run = True
|
|
mock_settings.backlog_triage_daily_summary = False
|
|
|
|
mock_client = AsyncMock()
|
|
mock_client.get.return_value = MagicMock(
|
|
status_code=200, json=MagicMock(return_value=raw_issues)
|
|
)
|
|
|
|
mock_ctx = AsyncMock()
|
|
mock_ctx.__aenter__.return_value = mock_client
|
|
mock_ctx.__aexit__.return_value = False
|
|
|
|
with (
|
|
patch("timmy.backlog_triage.settings", mock_settings),
|
|
patch("httpx.AsyncClient", return_value=mock_ctx),
|
|
):
|
|
result = await loop.run_once()
|
|
|
|
# No POST/PATCH calls in dry run
|
|
mock_client.post.assert_not_called()
|
|
mock_client.patch.assert_not_called()
|
|
|
|
assert result.total_open == 1
|
|
assert loop.cycle_count == 1
|
|
assert len(loop.history) == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_once_assigns_unassigned_bug(self):
|
|
"""Unassigned ready bug should be assigned to Claude with audit comment."""
|
|
loop = BacklogTriageLoop(dry_run=False, daily_summary=False)
|
|
|
|
body = (
|
|
"## Problem\nCrash on login.\n## Solution\nFix src/auth.py "
|
|
"def login()\nshould return 200\nmust pass tox tests"
|
|
)
|
|
raw_issues = [_make_raw_issue(number=5, title="Fix crash", labels=["bug"], body=body)]
|
|
|
|
mock_settings = MagicMock()
|
|
mock_settings.gitea_enabled = True
|
|
mock_settings.gitea_token = "fake-token"
|
|
mock_settings.gitea_repo = "owner/repo"
|
|
mock_settings.gitea_url = "http://gitea.local"
|
|
mock_settings.backlog_triage_interval_seconds = 900
|
|
mock_settings.backlog_triage_dry_run = False
|
|
mock_settings.backlog_triage_daily_summary = False
|
|
|
|
# GET /issues returns our issue
|
|
get_issues_resp = MagicMock(status_code=200)
|
|
get_issues_resp.json.return_value = raw_issues
|
|
|
|
# POST /comments returns success
|
|
comment_resp = MagicMock(status_code=201)
|
|
comment_resp.json.return_value = {"id": 1}
|
|
|
|
# PATCH /issues/{n} (assign) returns success
|
|
assign_resp = MagicMock(status_code=200)
|
|
assign_resp.json.return_value = {"number": 5}
|
|
|
|
mock_client = AsyncMock()
|
|
mock_client.get.return_value = get_issues_resp
|
|
mock_client.post.return_value = comment_resp
|
|
mock_client.patch.return_value = assign_resp
|
|
|
|
mock_ctx = AsyncMock()
|
|
mock_ctx.__aenter__.return_value = mock_client
|
|
mock_ctx.__aexit__.return_value = False
|
|
|
|
with (
|
|
patch("timmy.backlog_triage.settings", mock_settings),
|
|
patch("httpx.AsyncClient", return_value=mock_ctx),
|
|
patch("asyncio.sleep", new_callable=AsyncMock),
|
|
):
|
|
result = await loop.run_once()
|
|
|
|
assert result.total_open == 1
|
|
# Comment should have been posted
|
|
mock_client.post.assert_called()
|
|
# Assign should have been called (PATCH)
|
|
mock_client.patch.assert_called()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_once_skips_already_assigned(self):
|
|
"""Issues already assigned should not be acted upon."""
|
|
loop = BacklogTriageLoop(dry_run=False, daily_summary=False)
|
|
|
|
raw_issues = [
|
|
_make_raw_issue(
|
|
number=3,
|
|
labels=["bug"],
|
|
assignees=["perplexity"],
|
|
body="## Problem\nX\nmust pass tox\nshould return 200 at least 3 times",
|
|
)
|
|
]
|
|
|
|
mock_settings = MagicMock()
|
|
mock_settings.gitea_enabled = True
|
|
mock_settings.gitea_token = "tok"
|
|
mock_settings.gitea_repo = "owner/repo"
|
|
mock_settings.gitea_url = "http://gitea.local"
|
|
mock_settings.backlog_triage_interval_seconds = 900
|
|
mock_settings.backlog_triage_dry_run = False
|
|
mock_settings.backlog_triage_daily_summary = False
|
|
|
|
get_resp = MagicMock(status_code=200)
|
|
get_resp.json.return_value = raw_issues
|
|
|
|
mock_client = AsyncMock()
|
|
mock_client.get.return_value = get_resp
|
|
|
|
mock_ctx = AsyncMock()
|
|
mock_ctx.__aenter__.return_value = mock_client
|
|
mock_ctx.__aexit__.return_value = False
|
|
|
|
with (
|
|
patch("timmy.backlog_triage.settings", mock_settings),
|
|
patch("httpx.AsyncClient", return_value=mock_ctx),
|
|
):
|
|
result = await loop.run_once()
|
|
|
|
# No writes for already-assigned issue
|
|
mock_client.post.assert_not_called()
|
|
mock_client.patch.assert_not_called()
|
|
assert result.decisions[0].action == "skip"
|
|
|
|
|
|
# ── ScoredIssue properties ────────────────────────────────────────────────────
|
|
|
|
|
|
class TestScoredIssueProperties:
|
|
def test_is_unassigned_true_when_no_assignees(self):
|
|
issue = _make_scored_issue(assignees=[])
|
|
assert issue.is_unassigned is True
|
|
|
|
def test_is_unassigned_false_when_assigned(self):
|
|
issue = _make_scored_issue(assignees=["claude"])
|
|
assert issue.is_unassigned is False
|
|
|
|
def test_needs_kimi_for_research_tag(self):
|
|
issue = _make_scored_issue(tags={"research"})
|
|
assert issue.needs_kimi is True
|
|
|
|
def test_needs_kimi_for_kimi_ready_label(self):
|
|
issue = _make_scored_issue(labels=["kimi-ready"], tags=set())
|
|
assert issue.needs_kimi is True
|
|
|
|
def test_needs_kimi_false_for_bug(self):
|
|
issue = _make_scored_issue(tags={"bug"}, labels=[])
|
|
assert issue.needs_kimi is False
|