1
0

[claude] Auto-create Gitea issues from research findings (#977) (#1060)

Co-authored-by: Claude (Opus 4.6) <claude@hermes.local>
Co-committed-by: Claude (Opus 4.6) <claude@hermes.local>
This commit is contained in:
2026-03-23 15:09:18 +00:00
committed by rockachopa
parent 6a674bf9e0
commit ab36149fa5
2 changed files with 717 additions and 0 deletions

View File

@@ -0,0 +1,369 @@
"""Research triage — extract action items from research reports and file Gitea issues.
Closes the loop: research → knowledge → actionable engineering work.
The LLM extracts action items during synthesis (not post-processed), then
each item is filed as a Gitea issue with appropriate labels, source links,
and evidence from the original research.
Usage::
from timmy.research_triage import triage_research_report
results = await triage_research_report(
report="## Findings\\n...",
source_issue=946,
)
"""
from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass, field
from typing import Any
import httpx
from config import settings
logger = logging.getLogger(__name__)
# Regex to strip markdown code fences from LLM output
_FENCE_RE = re.compile(r"^```(?:json)?\s*\n?", re.MULTILINE)
@dataclass
class ActionItem:
"""A single actionable item extracted from a research report."""
title: str
body: str
labels: list[str] = field(default_factory=list)
priority: str = "medium"
source_urls: list[str] = field(default_factory=list)
def to_issue_body(self, source_issue: int | None = None) -> str:
"""Format for a Gitea issue body with source attribution."""
parts = [self.body]
if self.source_urls:
parts.append("\n### Source Evidence")
for url in self.source_urls:
parts.append(f"- {url}")
if source_issue:
parts.append(
f"\n### Origin\nExtracted from research in #{source_issue}"
)
parts.append("\n---\n*Auto-triaged from research findings by Timmy*")
return "\n".join(parts)
def _build_extraction_prompt(report: str) -> str:
"""Build the LLM prompt for extracting action items from a research report."""
return (
"You are triaging a research report for actionable engineering work.\n"
"Extract 0-5 CONCRETE action items — bugs to fix, features to build,\n"
"infrastructure to set up, or investigations to run.\n\n"
"Rules:\n"
"- Only include items that map to real engineering tasks\n"
"- Skip vague recommendations or philosophical observations\n"
"- Each item should be specific enough to become a Gitea issue\n"
"- Include evidence/URLs from the report in source_urls\n"
"- Priority: high (blocking or critical), medium (important), low (nice-to-have)\n"
"- Labels: pick from [actionable, research, bug, feature, infrastructure, "
"performance, security, kimi-ready]\n"
" - 'kimi-ready' means a well-scoped task suitable for an AI agent\n"
" - 'actionable' should be on every item (these are all actionable)\n\n"
"For each item return:\n"
'- "title": Clear, specific title with area prefix '
'(e.g. "[MCP] Restore tool server with FastMCP")\n'
'- "body": Detailed markdown body with:\n'
" **What:** What needs to be done\n"
" **Why:** Why this matters (link to research finding)\n"
" **Suggested approach:** How to implement\n"
" **Acceptance criteria:** How to verify\n"
'- "labels": Array of label strings\n'
'- "priority": One of high, medium, low\n'
'- "source_urls": Array of URLs referenced in the research\n\n'
"Return ONLY a JSON array of objects. Return [] if nothing is actionable.\n\n"
f"Research report:\n{report}\n\nJSON array:"
)
def _parse_llm_response(raw: str) -> list[dict[str, Any]]:
"""Parse LLM JSON response, stripping code fences if present."""
cleaned = raw.strip()
# Strip markdown code fences
if cleaned.startswith("```"):
cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
items = json.loads(cleaned)
if not isinstance(items, list):
return []
return items
def _validate_action_item(raw_item: dict[str, Any]) -> ActionItem | None:
"""Validate and convert a raw dict to an ActionItem, or None if invalid."""
if not isinstance(raw_item, dict):
return None
title = raw_item.get("title", "").strip()
body = raw_item.get("body", "").strip()
if not title or len(title) < 10:
return None
if not body or len(body) < 20:
return None
labels = raw_item.get("labels", [])
if isinstance(labels, str):
labels = [l.strip() for l in labels.split(",") if l.strip()]
if not isinstance(labels, list):
labels = []
# Ensure 'actionable' label is always present
if "actionable" not in labels:
labels.insert(0, "actionable")
priority = raw_item.get("priority", "medium").strip().lower()
if priority not in ("high", "medium", "low"):
priority = "medium"
source_urls = raw_item.get("source_urls", [])
if not isinstance(source_urls, list):
source_urls = []
return ActionItem(
title=title,
body=body,
labels=labels,
priority=priority,
source_urls=source_urls,
)
async def extract_action_items(
report: str,
llm_caller: Any | None = None,
) -> list[ActionItem]:
"""Extract actionable engineering items from a research report.
Uses the LLM to identify concrete tasks, bugs, features, and
infrastructure work from structured research output.
Args:
report: The research report text (markdown).
llm_caller: Optional async callable(prompt) -> str for LLM.
Falls back to the cascade router.
Returns:
List of validated ActionItem objects (0-5 items).
"""
if not report or not report.strip():
return []
prompt = _build_extraction_prompt(report)
try:
if llm_caller is not None:
raw = await llm_caller(prompt)
else:
raw = await _call_llm(prompt)
except Exception as exc:
logger.warning("LLM extraction failed: %s", exc)
return []
if not raw or not raw.strip():
return []
try:
raw_items = _parse_llm_response(raw)
except (json.JSONDecodeError, ValueError) as exc:
logger.warning("Failed to parse LLM action items: %s", exc)
return []
items = []
for raw_item in raw_items[:5]: # Safety cap
item = _validate_action_item(raw_item)
if item is not None:
items.append(item)
logger.info("Extracted %d action items from research report", len(items))
return items
async def _call_llm(prompt: str) -> str:
"""Call the cascade router for LLM completion.
Falls back gracefully if the router is unavailable.
"""
from infrastructure.router import get_router
router = get_router()
messages = [{"role": "user", "content": prompt}]
result = await router.complete(messages=messages, temperature=0.1)
return result.get("content", "") if isinstance(result, dict) else str(result)
async def create_gitea_issue(
item: ActionItem,
source_issue: int | None = None,
) -> dict[str, Any] | None:
"""Create a Gitea issue from an ActionItem via the REST API.
Args:
item: The action item to file.
source_issue: Parent research issue number to link back to.
Returns:
The created issue dict from Gitea API, or None on failure.
"""
if not settings.gitea_enabled or not settings.gitea_token:
logger.debug("Gitea not configured — skipping issue creation")
return None
owner, repo = settings.gitea_repo.split("/", 1)
api_url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/issues"
body = item.to_issue_body(source_issue=source_issue)
payload: dict[str, Any] = {
"title": item.title,
"body": body,
}
# Resolve label names to IDs
label_ids = await _resolve_label_ids(item.labels, owner, repo)
if label_ids:
payload["labels"] = label_ids
try:
async with httpx.AsyncClient(timeout=15) as client:
resp = await client.post(
api_url,
headers={
"Authorization": f"token {settings.gitea_token}",
"Content-Type": "application/json",
},
json=payload,
)
if resp.status_code in (200, 201):
issue_data = resp.json()
logger.info(
"Created Gitea issue #%s: %s",
issue_data.get("number", "?"),
item.title[:60],
)
return issue_data
logger.warning(
"Gitea issue creation failed (HTTP %s): %s",
resp.status_code,
resp.text[:200],
)
return None
except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc:
logger.warning("Gitea connection failed: %s", exc)
return None
except Exception as exc:
logger.error("Unexpected error creating Gitea issue: %s", exc)
return None
async def _resolve_label_ids(
label_names: list[str],
owner: str,
repo: str,
) -> list[int]:
"""Resolve label names to Gitea label IDs, creating missing labels.
Returns a list of integer label IDs for the issue payload.
"""
if not label_names:
return []
labels_url = f"{settings.gitea_url}/api/v1/repos/{owner}/{repo}/labels"
headers = {
"Authorization": f"token {settings.gitea_token}",
"Content-Type": "application/json",
}
try:
async with httpx.AsyncClient(timeout=10) as client:
# Fetch existing labels
resp = await client.get(labels_url, headers=headers)
if resp.status_code != 200:
return []
existing = {l["name"]: l["id"] for l in resp.json()}
label_ids = []
for name in label_names:
if name in existing:
label_ids.append(existing[name])
else:
# Auto-create missing labels with a default color
create_resp = await client.post(
labels_url,
headers=headers,
json={"name": name, "color": "#0075ca"},
)
if create_resp.status_code in (200, 201):
label_ids.append(create_resp.json()["id"])
return label_ids
except Exception as exc:
logger.debug("Label resolution failed: %s", exc)
return []
async def triage_research_report(
report: str,
source_issue: int | None = None,
llm_caller: Any | None = None,
dry_run: bool = False,
) -> list[dict[str, Any]]:
"""End-to-end: extract action items from research and file Gitea issues.
This is the main entry point that closes the research → backlog loop.
Args:
report: Research report text (markdown).
source_issue: The Gitea issue number that produced this research.
llm_caller: Optional async callable(prompt) -> str for LLM calls.
dry_run: If True, extract items but don't create issues.
Returns:
List of dicts with 'action_item' and 'gitea_issue' (or None) keys.
"""
items = await extract_action_items(report, llm_caller=llm_caller)
if not items:
logger.info("No action items extracted from research report")
return []
results = []
for item in items:
if dry_run:
results.append({"action_item": item, "gitea_issue": None})
continue
issue_data = await create_gitea_issue(item, source_issue=source_issue)
results.append({"action_item": item, "gitea_issue": issue_data})
created_count = sum(1 for r in results if r["gitea_issue"] is not None)
logger.info(
"Research triage complete: %d items extracted, %d issues created",
len(results),
created_count,
)
return results

View File

@@ -0,0 +1,348 @@
"""Tests for research triage — action item extraction and Gitea issue filing."""
import json
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
from timmy.research_triage import (
ActionItem,
_parse_llm_response,
_resolve_label_ids,
_validate_action_item,
create_gitea_issue,
extract_action_items,
triage_research_report,
)
# ---------------------------------------------------------------------------
# ActionItem
# ---------------------------------------------------------------------------
SAMPLE_REPORT = """
## Research: MCP Abstraction Layer
### Finding 1: FastMCP overhead is negligible
FastMCP averages 26.45ms per tool call. Total overhead <3% of budget.
### Finding 2: Agno tool calling is broken
Agno issues #2231, #2625 document persistent breakage with Ollama.
Fix: Use Ollama's `format` parameter with Pydantic JSON schemas.
### Recommendation
Implement three-tier router for structured output.
"""
SAMPLE_LLM_RESPONSE = json.dumps(
[
{
"title": "[Router] Implement three-tier structured output router",
"body": (
"**What:** Build a three-tier router that uses Ollama's "
"`format` parameter for structured output.\n"
"**Why:** Agno's native tool calling is broken (#2231, #2625). "
"Pydantic JSON schemas with `format` bypass the issue.\n"
"**Suggested approach:** Add format parameter support to "
"CascadeRouter.\n"
"**Acceptance criteria:** Tool calls return valid JSON matching "
"the Pydantic schema."
),
"labels": ["actionable", "feature", "kimi-ready"],
"priority": "high",
"source_urls": ["https://github.com/agno-agi/agno/issues/2231"],
},
]
)
class TestActionItem:
def test_to_issue_body_basic(self):
item = ActionItem(title="Test", body="Test body")
body = item.to_issue_body()
assert "Test body" in body
assert "Auto-triaged" in body
def test_to_issue_body_with_source_issue(self):
item = ActionItem(title="Test", body="Test body")
body = item.to_issue_body(source_issue=946)
assert "#946" in body
assert "Origin" in body
def test_to_issue_body_with_source_urls(self):
item = ActionItem(
title="Test",
body="Body",
source_urls=["https://example.com/finding"],
)
body = item.to_issue_body()
assert "https://example.com/finding" in body
assert "Source Evidence" in body
# ---------------------------------------------------------------------------
# _parse_llm_response
# ---------------------------------------------------------------------------
class TestParseLlmResponse:
def test_plain_json(self):
items = _parse_llm_response('[{"title": "foo"}]')
assert len(items) == 1
assert items[0]["title"] == "foo"
def test_fenced_json(self):
raw = '```json\n[{"title": "bar"}]\n```'
items = _parse_llm_response(raw)
assert len(items) == 1
assert items[0]["title"] == "bar"
def test_empty_array(self):
assert _parse_llm_response("[]") == []
def test_non_array_returns_empty(self):
assert _parse_llm_response('{"title": "not an array"}') == []
def test_invalid_json_raises(self):
with pytest.raises(json.JSONDecodeError):
_parse_llm_response("not json at all")
# ---------------------------------------------------------------------------
# _validate_action_item
# ---------------------------------------------------------------------------
class TestValidateActionItem:
def test_valid_item(self):
raw = {
"title": "[Area] A specific clear title",
"body": "Detailed body with enough content to be useful.",
"labels": ["actionable", "bug"],
"priority": "high",
}
item = _validate_action_item(raw)
assert item is not None
assert item.title == "[Area] A specific clear title"
assert item.priority == "high"
assert "actionable" in item.labels
def test_short_title_rejected(self):
raw = {"title": "Short", "body": "Detailed body with enough content here."}
assert _validate_action_item(raw) is None
def test_short_body_rejected(self):
raw = {"title": "A perfectly fine title here", "body": "Too short"}
assert _validate_action_item(raw) is None
def test_missing_title_rejected(self):
raw = {"body": "Detailed body with enough content to be useful."}
assert _validate_action_item(raw) is None
def test_non_dict_rejected(self):
assert _validate_action_item("not a dict") is None
def test_actionable_label_auto_added(self):
raw = {
"title": "A perfectly fine title here",
"body": "Detailed body with enough content to be useful.",
"labels": ["bug"],
}
item = _validate_action_item(raw)
assert item is not None
assert "actionable" in item.labels
def test_labels_as_csv_string(self):
raw = {
"title": "A perfectly fine title here",
"body": "Detailed body with enough content to be useful.",
"labels": "bug, feature",
}
item = _validate_action_item(raw)
assert item is not None
assert "bug" in item.labels
assert "feature" in item.labels
def test_invalid_priority_defaults_medium(self):
raw = {
"title": "A perfectly fine title here",
"body": "Detailed body with enough content to be useful.",
"priority": "urgent",
}
item = _validate_action_item(raw)
assert item is not None
assert item.priority == "medium"
# ---------------------------------------------------------------------------
# extract_action_items
# ---------------------------------------------------------------------------
class TestExtractActionItems:
@pytest.mark.asyncio
async def test_extracts_items_from_report(self):
mock_llm = AsyncMock(return_value=SAMPLE_LLM_RESPONSE)
items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm)
assert len(items) == 1
assert "three-tier" in items[0].title.lower()
assert items[0].priority == "high"
mock_llm.assert_called_once()
@pytest.mark.asyncio
async def test_empty_report_returns_empty(self):
items = await extract_action_items("")
assert items == []
@pytest.mark.asyncio
async def test_llm_failure_returns_empty(self):
mock_llm = AsyncMock(side_effect=RuntimeError("LLM down"))
items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm)
assert items == []
@pytest.mark.asyncio
async def test_llm_returns_empty_string(self):
mock_llm = AsyncMock(return_value="")
items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm)
assert items == []
@pytest.mark.asyncio
async def test_llm_returns_invalid_json(self):
mock_llm = AsyncMock(return_value="not valid json")
items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm)
assert items == []
@pytest.mark.asyncio
async def test_caps_at_five_items(self):
many_items = [
{
"title": f"[Area] Action item number {i} is specific",
"body": f"Detailed body for action item {i} with enough words.",
"labels": ["actionable"],
"priority": "medium",
}
for i in range(10)
]
mock_llm = AsyncMock(return_value=json.dumps(many_items))
items = await extract_action_items(SAMPLE_REPORT, llm_caller=mock_llm)
assert len(items) <= 5
# ---------------------------------------------------------------------------
# create_gitea_issue
# ---------------------------------------------------------------------------
class TestCreateGiteaIssue:
@pytest.mark.asyncio
async def test_creates_issue_via_api(self):
item = ActionItem(
title="[Test] Create a test issue",
body="This is a test issue body with details.",
labels=["actionable"],
)
issue_resp = MagicMock()
issue_resp.status_code = 201
issue_resp.json.return_value = {"number": 42, "title": item.title}
mock_client = AsyncMock()
mock_client.post.return_value = issue_resp
with (
patch("timmy.research_triage.settings") as mock_settings,
patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[1]),
patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
):
mock_settings.gitea_enabled = True
mock_settings.gitea_token = "test-token"
mock_settings.gitea_repo = "owner/repo"
mock_settings.gitea_url = "http://localhost:3000"
mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_cls.return_value.__aexit__ = AsyncMock(return_value=False)
result = await create_gitea_issue(item, source_issue=946)
assert result is not None
assert result["number"] == 42
@pytest.mark.asyncio
async def test_returns_none_when_disabled(self):
item = ActionItem(title="[Test] Disabled test", body="Body content here.")
with patch("timmy.research_triage.settings") as mock_settings:
mock_settings.gitea_enabled = False
mock_settings.gitea_token = ""
result = await create_gitea_issue(item)
assert result is None
@pytest.mark.asyncio
async def test_handles_connection_error(self):
item = ActionItem(
title="[Test] Connection fail",
body="Body content for connection test.",
)
mock_client = AsyncMock()
mock_client.post.side_effect = httpx.ConnectError("refused")
with (
patch("timmy.research_triage.settings") as mock_settings,
patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]),
patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
):
mock_settings.gitea_enabled = True
mock_settings.gitea_token = "test-token"
mock_settings.gitea_repo = "owner/repo"
mock_settings.gitea_url = "http://localhost:3000"
mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_cls.return_value.__aexit__ = AsyncMock(return_value=False)
result = await create_gitea_issue(item)
assert result is None
# ---------------------------------------------------------------------------
# triage_research_report (integration)
# ---------------------------------------------------------------------------
class TestTriageResearchReport:
@pytest.mark.asyncio
async def test_dry_run_extracts_without_filing(self):
mock_llm = AsyncMock(return_value=SAMPLE_LLM_RESPONSE)
results = await triage_research_report(
SAMPLE_REPORT, source_issue=946, llm_caller=mock_llm, dry_run=True
)
assert len(results) == 1
assert results[0]["action_item"] is not None
assert results[0]["gitea_issue"] is None
@pytest.mark.asyncio
async def test_empty_report_returns_empty(self):
results = await triage_research_report("", llm_caller=AsyncMock(return_value="[]"))
assert results == []
@pytest.mark.asyncio
async def test_end_to_end_with_mock_gitea(self):
mock_llm = AsyncMock(return_value=SAMPLE_LLM_RESPONSE)
issue_resp = MagicMock()
issue_resp.status_code = 201
issue_resp.json.return_value = {"number": 99, "title": "test"}
mock_client = AsyncMock()
mock_client.post.return_value = issue_resp
with (
patch("timmy.research_triage.settings") as mock_settings,
patch("timmy.research_triage._resolve_label_ids", new_callable=AsyncMock, return_value=[]),
patch("timmy.research_triage.httpx.AsyncClient") as mock_cls,
):
mock_settings.gitea_enabled = True
mock_settings.gitea_token = "test-token"
mock_settings.gitea_repo = "owner/repo"
mock_settings.gitea_url = "http://localhost:3000"
mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_cls.return_value.__aexit__ = AsyncMock(return_value=False)
results = await triage_research_report(
SAMPLE_REPORT, source_issue=946, llm_caller=mock_llm
)
assert len(results) == 1
assert results[0]["gitea_issue"]["number"] == 99