diff --git a/src/timmy/research_triage.py b/src/timmy/research_triage.py new file mode 100644 index 00000000..2af0df32 --- /dev/null +++ b/src/timmy/research_triage.py @@ -0,0 +1,369 @@ +"""Research triage — extract action items from research reports and file Gitea issues. + +Closes the loop: research → knowledge → actionable engineering work. + +The LLM extracts action items during synthesis (not post-processed), then +each item is filed as a Gitea issue with appropriate labels, source links, +and evidence from the original research. + +Usage:: + + from timmy.research_triage import triage_research_report + + results = await triage_research_report( + report="## Findings\\n...", + source_issue=946, + ) +""" + +from __future__ import annotations + +import json +import logging +import re +from dataclasses import dataclass, field +from typing import Any + +import httpx + +from config import settings + +logger = logging.getLogger(__name__) + +# Regex to strip markdown code fences from LLM output +_FENCE_RE = re.compile(r"^```(?:json)?\s*\n?", re.MULTILINE) + + +@dataclass +class ActionItem: + """A single actionable item extracted from a research report.""" + + title: str + body: str + labels: list[str] = field(default_factory=list) + priority: str = "medium" + source_urls: list[str] = field(default_factory=list) + + def to_issue_body(self, source_issue: int | None = None) -> str: + """Format for a Gitea issue body with source attribution.""" + parts = [self.body] + + if self.source_urls: + parts.append("\n### Source Evidence") + for url in self.source_urls: + parts.append(f"- {url}") + + if source_issue: + parts.append( + f"\n### Origin\nExtracted from research in #{source_issue}" + ) + + parts.append("\n---\n*Auto-triaged from research findings by Timmy*") + return "\n".join(parts) + + +def _build_extraction_prompt(report: str) -> str: + """Build the LLM prompt for extracting action items from a research report.""" + return ( + "You are triaging a research report for actionable engineering work.\n" + "Extract 0-5 CONCRETE action items — bugs to fix, features to build,\n" + "infrastructure to set up, or investigations to run.\n\n" + "Rules:\n" + "- Only include items that map to real engineering tasks\n" + "- Skip vague recommendations or philosophical observations\n" + "- Each item should be specific enough to become a Gitea issue\n" + "- Include evidence/URLs from the report in source_urls\n" + "- Priority: high (blocking or critical), medium (important), low (nice-to-have)\n" + "- Labels: pick from [actionable, research, bug, feature, infrastructure, " + "performance, security, kimi-ready]\n" + " - 'kimi-ready' means a well-scoped task suitable for an AI agent\n" + " - 'actionable' should be on every item (these are all actionable)\n\n" + "For each item return:\n" + '- "title": Clear, specific title with area prefix ' + '(e.g. "[MCP] Restore tool server with FastMCP")\n' + '- "body": Detailed markdown body with:\n' + " **What:** What needs to be done\n" + " **Why:** Why this matters (link to research finding)\n" + " **Suggested approach:** How to implement\n" + " **Acceptance criteria:** How to verify\n" + '- "labels": Array of label strings\n' + '- "priority": One of high, medium, low\n' + '- "source_urls": Array of URLs referenced in the research\n\n' + "Return ONLY a JSON array of objects. Return [] if nothing is actionable.\n\n" + f"Research report:\n{report}\n\nJSON array:" + ) + + +def _parse_llm_response(raw: str) -> list[dict[str, Any]]: + """Parse LLM JSON response, stripping code fences if present.""" + cleaned = raw.strip() + + # Strip markdown code fences + if cleaned.startswith("```"): + cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip() + + items = json.loads(cleaned) + if not isinstance(items, list): + return [] + return items + + +def _validate_action_item(raw_item: dict[str, Any]) -> ActionItem | None: + """Validate and convert a raw dict to an ActionItem, or None if invalid.""" + if not isinstance(raw_item, dict): + return None + + title = raw_item.get("title", "").strip() + body = raw_item.get("body", "").strip() + + if not title or len(title) < 10: + return None + if not body or len(body) < 20: + return None + + labels = raw_item.get("labels", []) + if isinstance(labels, str): + labels = [l.strip() for l in labels.split(",") if l.strip()] + if not isinstance(labels, list): + labels = [] + + # Ensure 'actionable' label is always present + if "actionable" not in labels: + labels.insert(0, "actionable") + + priority = raw_item.get("priority", "medium").strip().lower() + if priority not in ("high", "medium", "low"): + priority = "medium" + + source_urls = raw_item.get("source_urls", []) + if not isinstance(source_urls, list): + source_urls = [] + + return ActionItem( + title=title, + body=body, + labels=labels, + priority=priority, + source_urls=source_urls, + ) + + +async def extract_action_items( + report: str, + llm_caller: Any | None = None, +) -> list[ActionItem]: + """Extract actionable engineering items from a research report. + + Uses the LLM to identify concrete tasks, bugs, features, and + infrastructure work from structured research output. + + Args: + report: The research report text (markdown). + llm_caller: Optional async callable(prompt) -> str for LLM. + Falls back to the cascade router. + + Returns: + List of validated ActionItem objects (0-5 items). + """ + if not report or not report.strip(): + return [] + + prompt = _build_extraction_prompt(report) + + try: + if llm_caller is not None: + raw = await llm_caller(prompt) + else: + raw = await _call_llm(prompt) + except Exception as exc: + logger.warning("LLM extraction failed: %s", exc) + return [] + + if not raw or not raw.strip(): + return [] + + try: + raw_items = _parse_llm_response(raw) + except (json.JSONDecodeError, ValueError) as exc: + logger.warning("Failed to parse LLM action items: %s", exc) + return [] + + items = [] + for raw_item in raw_items[:5]: # Safety cap + item = _validate_action_item(raw_item) + if item is not None: + items.append(item) + + logger.info("Extracted %d action items from research report", len(items)) + return items + + +async def _call_llm(prompt: str) -> str: + """Call the cascade router for LLM completion. + + Falls back gracefully if the router is unavailable. + """ + from infrastructure.router import get_router + + router = get_router() + messages = [{"role": "user", "content": prompt}] + result = await router.complete(messages=messages, temperature=0.1) + return result.get("content", "") if isinstance(result, dict) else str(result) + + +async def create_gitea_issue( + item: ActionItem, + source_issue: int | None = None, +) -> dict[str, Any] | None: + """Create a Gitea issue from an ActionItem via the REST API. + + Args: + item: The action item to file. + source_issue: Parent research issue number to link back to. + + Returns: + The created issue dict from Gitea API, or None on failure. + """ + if not settings.gitea_enabled or not settings.gitea_token: + logger.debug("Gitea not configured — skipping issue creation") + return None + + owner, repo = settings.gitea_repo.split("/", 1) + api_url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/issues" + + body = item.to_issue_body(source_issue=source_issue) + + payload: dict[str, Any] = { + "title": item.title, + "body": body, + } + + # Resolve label names to IDs + label_ids = await _resolve_label_ids(item.labels, owner, repo) + if label_ids: + payload["labels"] = label_ids + + try: + async with httpx.AsyncClient(timeout=15) as client: + resp = await client.post( + api_url, + headers={ + "Authorization": f"token {settings.gitea_token}", + "Content-Type": "application/json", + }, + json=payload, + ) + + if resp.status_code in (200, 201): + issue_data = resp.json() + logger.info( + "Created Gitea issue #%s: %s", + issue_data.get("number", "?"), + item.title[:60], + ) + return issue_data + + logger.warning( + "Gitea issue creation failed (HTTP %s): %s", + resp.status_code, + resp.text[:200], + ) + return None + + except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc: + logger.warning("Gitea connection failed: %s", exc) + return None + except Exception as exc: + logger.error("Unexpected error creating Gitea issue: %s", exc) + return None + + +async def _resolve_label_ids( + label_names: list[str], + owner: str, + repo: str, +) -> list[int]: + """Resolve label names to Gitea label IDs, creating missing labels. + + Returns a list of integer label IDs for the issue payload. + """ + if not label_names: + return [] + + labels_url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/labels" + headers = { + "Authorization": f"token {settings.gitea_token}", + "Content-Type": "application/json", + } + + try: + async with httpx.AsyncClient(timeout=10) as client: + # Fetch existing labels + resp = await client.get(labels_url, headers=headers) + if resp.status_code != 200: + return [] + + existing = {l["name"]: l["id"] for l in resp.json()} + label_ids = [] + + for name in label_names: + if name in existing: + label_ids.append(existing[name]) + else: + # Auto-create missing labels with a default color + create_resp = await client.post( + labels_url, + headers=headers, + json={"name": name, "color": "#0075ca"}, + ) + if create_resp.status_code in (200, 201): + label_ids.append(create_resp.json()["id"]) + + return label_ids + + except Exception as exc: + logger.debug("Label resolution failed: %s", exc) + return [] + + +async def triage_research_report( + report: str, + source_issue: int | None = None, + llm_caller: Any | None = None, + dry_run: bool = False, +) -> list[dict[str, Any]]: + """End-to-end: extract action items from research and file Gitea issues. + + This is the main entry point that closes the research → backlog loop. + + Args: + report: Research report text (markdown). + source_issue: The Gitea issue number that produced this research. + llm_caller: Optional async callable(prompt) -> str for LLM calls. + dry_run: If True, extract items but don't create issues. + + Returns: + List of dicts with 'action_item' and 'gitea_issue' (or None) keys. + """ + items = await extract_action_items(report, llm_caller=llm_caller) + + if not items: + logger.info("No action items extracted from research report") + return [] + + results = [] + for item in items: + if dry_run: + results.append({"action_item": item, "gitea_issue": None}) + continue + + issue_data = await create_gitea_issue(item, source_issue=source_issue) + results.append({"action_item": item, "gitea_issue": issue_data}) + + created_count = sum(1 for r in results if r["gitea_issue"] is not None) + logger.info( + "Research triage complete: %d items extracted, %d issues created", + len(results), + created_count, + ) + return results diff --git a/tests/timmy/test_research_triage.py b/tests/timmy/test_research_triage.py new file mode 100644 index 00000000..66407373 --- /dev/null +++ b/tests/timmy/test_research_triage.py @@ -0,0 +1,348 @@ +"""Tests for research triage — action item extraction and Gitea issue filing.""" + +import json +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from timmy.research_triage import ( + ActionItem, + _parse_llm_response, + _resolve_label_ids, + _validate_action_item, + create_gitea_issue, + extract_action_items, + triage_research_report, +) + +# --------------------------------------------------------------------------- +# ActionItem +# --------------------------------------------------------------------------- + +SAMPLE_REPORT = """ +## Research: MCP Abstraction Layer + +### Finding 1: FastMCP overhead is negligible +FastMCP averages 26.45ms per tool call. Total overhead <3% of budget. + +### Finding 2: Agno tool calling is broken +Agno issues #2231, #2625 document persistent breakage with Ollama. +Fix: Use Ollama's `format` parameter with Pydantic JSON schemas. + +### Recommendation +Implement three-tier router for structured output. +""" + +SAMPLE_LLM_RESPONSE = json.dumps( + [ + { + "title": "[Router] Implement three-tier structured output router", + "body": ( + "**What:** Build a three-tier router that uses Ollama's " + "`format` parameter for structured output.\n" + "**Why:** Agno's native tool calling is broken (#2231, #2625). " + "Pydantic JSON schemas with `format` bypass the issue.\n" + "**Suggested approach:** Add format parameter support to " + "CascadeRouter.\n" + "**Acceptance criteria:** Tool calls return valid JSON matching " + "the Pydantic schema." + ), + "labels": ["actionable", "feature", "kimi-ready"], + "priority": "high", + "source_urls": ["https://github.com/agno-agi/agno/issues/2231"], + }, + ] +) + + +class TestActionItem: + def test_to_issue_body_basic(self): + item = ActionItem(title="Test", body="Test body") + body = item.to_issue_body() + assert "Test body" in body + assert "Auto-triaged" in body + + def test_to_issue_body_with_source_issue(self): + item = ActionItem(title="Test", body="Test body") + body = item.to_issue_body(source_issue=946) + assert "#946" in body + assert "Origin" in body + + def test_to_issue_body_with_source_urls(self): + item = ActionItem( + title="Test", + body="Body", + source_urls=["https://example.com/finding"], + ) + body = item.to_issue_body() + assert "https://example.com/finding" in body + assert "Source Evidence" in body + + +# --------------------------------------------------------------------------- +# _parse_llm_response +# --------------------------------------------------------------------------- + + +class TestParseLlmResponse: + def test_plain_json(self): + items = _parse_llm_response('[{"title": "foo"}]') + assert len(items) == 1 + assert items[0]["title"] == "foo" + + def test_fenced_json(self): + raw = '```json\n[{"title": "bar"}]\n```' + items = _parse_llm_response(raw) + assert len(items) == 1 + assert items[0]["title"] == "bar" + + def test_empty_array(self): + assert _parse_llm_response("[]") == [] + + def test_non_array_returns_empty(self): + assert _parse_llm_response('{"title": "not an array"}') == [] + + def test_invalid_json_raises(self): + with pytest.raises(json.JSONDecodeError): + _parse_llm_response("not json at all") + + +# --------------------------------------------------------------------------- +# _validate_action_item +# --------------------------------------------------------------------------- + + +class TestValidateActionItem: + def test_valid_item(self): + raw = { + "title": "[Area] A specific clear title", + "body": "Detailed body with enough content to be useful.", + "labels": ["actionable", "bug"], + "priority": "high", + } + item = _validate_action_item(raw) + assert item is not None + assert item.title == "[Area] A specific clear title" + assert item.priority == "high" + assert "actionable" in item.labels + + def test_short_title_rejected(self): + raw = {"title": "Short", "body": "Detailed body with enough content here."} + assert _validate_action_item(raw) is None + + def test_short_body_rejected(self): + raw = {"title": "A perfectly fine title here", "body": "Too short"} + assert _validate_action_item(raw) is None + + def test_missing_title_rejected(self): + raw = {"body": "Detailed body with enough content to be useful."} + assert _validate_action_item(raw) is None + + def test_non_dict_rejected(self): + assert _validate_action_item("not a dict") is None + + def test_actionable_label_auto_added(self): + raw = { + "title": "A perfectly fine title here", + "body": "Detailed body with enough content to be useful.", + "labels": ["bug"], + } + item = _validate_action_item(raw) + assert item is not None + assert "actionable" in item.labels + + def test_labels_as_csv_string(self): + raw = { + "title": "A perfectly fine title here", + "body": "Detailed body with enough content to be useful.", + "labels": "bug, feature", + } + item = _validate_action_item(raw) + assert item is not None + assert "bug" in item.labels + assert "feature" in item.labels + + def test_invalid_priority_defaults_medium(self): + raw = { + "title": "A perfectly fine title here", + "body": "Detailed body with enough content to be useful.", + "priority": "urgent", + } + item = _validate_action_item(raw) + assert item is not None + assert item.priority == "medium" + + +# --------------------------------------------------------------------------- +# extract_action_items +# --------------------------------------------------------------------------- + + +class TestExtractActionItems: + @pytest.mark.asyncio + async def test_extracts_items_from_report(self): + mock_llm = AsyncMock(return_value=SAMPLE_LLM_RESPONSE) + items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm) + assert len(items) == 1 + assert "three-tier" in items[0].title.lower() + assert items[0].priority == "high" + mock_llm.assert_called_once() + + @pytest.mark.asyncio + async def test_empty_report_returns_empty(self): + items = await extract_action_items("") + assert items == [] + + @pytest.mark.asyncio + async def test_llm_failure_returns_empty(self): + mock_llm = AsyncMock(side_effect=RuntimeError("LLM down")) + items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm) + assert items == [] + + @pytest.mark.asyncio + async def test_llm_returns_empty_string(self): + mock_llm = AsyncMock(return_value="") + items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm) + assert items == [] + + @pytest.mark.asyncio + async def test_llm_returns_invalid_json(self): + mock_llm = AsyncMock(return_value="not valid json") + items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm) + assert items == [] + + @pytest.mark.asyncio + async def test_caps_at_five_items(self): + many_items = [ + { + "title": f"[Area] Action item number {i} is specific", + "body": f"Detailed body for action item {i} with enough words.", + "labels": ["actionable"], + "priority": "medium", + } + for i in range(10) + ] + mock_llm = AsyncMock(return_value=json.dumps(many_items)) + items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm) + assert len(items) <= 5 + + +# --------------------------------------------------------------------------- +# create_gitea_issue +# --------------------------------------------------------------------------- + + +class TestCreateGiteaIssue: + @pytest.mark.asyncio + async def test_creates_issue_via_api(self): + item = ActionItem( + title="[Test] Create a test issue", + body="This is a test issue body with details.", + labels=["actionable"], + ) + issue_resp = MagicMock() + issue_resp.status_code = 201 + issue_resp.json.return_value = {"number": 42, "title": item.title} + + mock_client = AsyncMock() + mock_client.post.return_value = issue_resp + + with ( + patch("timmy.research_triage.settings") as mock_settings, + patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[1]), + patch("timmy.research_triage.httpx.AsyncClient") as mock_cls, + ): + mock_settings.gitea_enabled = True + mock_settings.gitea_token = "test-token" + mock_settings.gitea_repo = "owner/repo" + mock_settings.gitea_url = "http://localhost:3000" + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + result = await create_gitea_issue(item, source_issue=946) + + assert result is not None + assert result["number"] == 42 + + @pytest.mark.asyncio + async def test_returns_none_when_disabled(self): + item = ActionItem(title="[Test] Disabled test", body="Body content here.") + with patch("timmy.research_triage.settings") as mock_settings: + mock_settings.gitea_enabled = False + mock_settings.gitea_token = "" + result = await create_gitea_issue(item) + assert result is None + + @pytest.mark.asyncio + async def test_handles_connection_error(self): + item = ActionItem( + title="[Test] Connection fail", + body="Body content for connection test.", + ) + mock_client = AsyncMock() + mock_client.post.side_effect = httpx.ConnectError("refused") + + with ( + patch("timmy.research_triage.settings") as mock_settings, + patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]), + patch("timmy.research_triage.httpx.AsyncClient") as mock_cls, + ): + mock_settings.gitea_enabled = True + mock_settings.gitea_token = "test-token" + mock_settings.gitea_repo = "owner/repo" + mock_settings.gitea_url = "http://localhost:3000" + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + result = await create_gitea_issue(item) + assert result is None + + +# --------------------------------------------------------------------------- +# triage_research_report (integration) +# --------------------------------------------------------------------------- + + +class TestTriageResearchReport: + @pytest.mark.asyncio + async def test_dry_run_extracts_without_filing(self): + mock_llm = AsyncMock(return_value=SAMPLE_LLM_RESPONSE) + results = await triage_research_report( + SAMPLE_REPORT, source_issue=946, llm_caller=mock_llm, dry_run=True + ) + assert len(results) == 1 + assert results[0]["action_item"] is not None + assert results[0]["gitea_issue"] is None + + @pytest.mark.asyncio + async def test_empty_report_returns_empty(self): + results = await triage_research_report("", llm_caller=AsyncMock(return_value="[]")) + assert results == [] + + @pytest.mark.asyncio + async def test_end_to_end_with_mock_gitea(self): + mock_llm = AsyncMock(return_value=SAMPLE_LLM_RESPONSE) + + issue_resp = MagicMock() + issue_resp.status_code = 201 + issue_resp.json.return_value = {"number": 99, "title": "test"} + + mock_client = AsyncMock() + mock_client.post.return_value = issue_resp + + with ( + patch("timmy.research_triage.settings") as mock_settings, + patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]), + patch("timmy.research_triage.httpx.AsyncClient") as mock_cls, + ): + mock_settings.gitea_enabled = True + mock_settings.gitea_token = "test-token" + mock_settings.gitea_repo = "owner/repo" + mock_settings.gitea_url = "http://localhost:3000" + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + results = await triage_research_report( + SAMPLE_REPORT, source_issue=946, llm_caller=mock_llm + ) + + assert len(results) == 1 + assert results[0]["gitea_issue"]["number"] == 99