forked from Rockachopa/Timmy-time-dashboard
Implements autonomous research pipeline that chains: - Check local knowledge (semantic memory cache, confidence > 0.85) - Generate queries via LLM cascade - Web search (concurrent, deduplicated) - Fetch top pages - Synthesize structured report via LLM - Crystallize results in semantic memory - Write artifact (create Gitea issues from action items) Includes full unit test suite (25 tests) covering all pipeline steps, cache hits, graceful degradation, and Gitea integration. Fixes #975 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
498 lines
18 KiB
Python
498 lines
18 KiB
Python
"""Unit tests for timmy.research — ResearchOrchestrator pipeline."""
|
|
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from timmy.research import (
|
|
DEFAULT_QUERIES_PER_TOPIC,
|
|
MemoryInterface,
|
|
ResearchOrchestrator,
|
|
ResearchResult,
|
|
ResearchTools,
|
|
SearchSnippet,
|
|
_extract_action_items,
|
|
)
|
|
|
|
# ── Data structures ──────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestResearchResult:
|
|
def test_defaults(self):
|
|
r = ResearchResult(topic="test", report="content")
|
|
assert r.topic == "test"
|
|
assert r.report == "content"
|
|
assert r.cache_hit is False
|
|
assert r.queries_generated == []
|
|
assert r.sources == []
|
|
assert r.action_items == []
|
|
assert r.duration_ms == 0.0
|
|
assert r.timestamp # non-empty
|
|
|
|
def test_with_data(self):
|
|
r = ResearchResult(
|
|
topic="AI",
|
|
report="report text",
|
|
queries_generated=["q1", "q2"],
|
|
sources=[{"url": "http://example.com", "title": "Test"}],
|
|
action_items=["Do X"],
|
|
cache_hit=True,
|
|
duration_ms=42.5,
|
|
)
|
|
assert r.cache_hit is True
|
|
assert len(r.sources) == 1
|
|
assert r.duration_ms == 42.5
|
|
|
|
|
|
class TestSearchSnippet:
|
|
def test_fields(self):
|
|
s = SearchSnippet(title="T", url="http://x.com", snippet="text")
|
|
assert s.relevance == 0.0
|
|
|
|
|
|
# ── _extract_action_items ────────────────────────────────────────────────────
|
|
|
|
|
|
class TestExtractActionItems:
|
|
def test_action_prefix(self):
|
|
report = "Some text\nACTION: Do the thing\nMore text"
|
|
items = _extract_action_items(report)
|
|
assert items == ["Do the thing"]
|
|
|
|
def test_todo_prefix(self):
|
|
report = "TODO: Fix the bug\nTodo: Also this"
|
|
items = _extract_action_items(report)
|
|
assert items == ["Fix the bug", "Also this"]
|
|
|
|
def test_checkbox(self):
|
|
report = "- [ ] Implement feature\n- [x] Already done"
|
|
items = _extract_action_items(report)
|
|
assert items == ["Implement feature"]
|
|
|
|
def test_mixed(self):
|
|
report = "ACTION: First\n- [ ] Second\nTODO: Third"
|
|
items = _extract_action_items(report)
|
|
assert items == ["First", "Second", "Third"]
|
|
|
|
def test_empty(self):
|
|
assert _extract_action_items("No actions here") == []
|
|
assert _extract_action_items("") == []
|
|
|
|
|
|
# ── MemoryInterface ──────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestMemoryInterface:
|
|
def test_custom_fns(self):
|
|
search = MagicMock(return_value=[])
|
|
store = MagicMock()
|
|
mi = MemoryInterface(search_fn=search, store_fn=store)
|
|
assert mi.search_fn is search
|
|
assert mi.store_fn is store
|
|
|
|
def test_defaults_when_import_fails(self):
|
|
with patch.dict("sys.modules", {"timmy.memory_system": None}):
|
|
mi = MemoryInterface()
|
|
# Should have fallback callables
|
|
assert callable(mi.search_fn)
|
|
assert callable(mi.store_fn)
|
|
# Fallback search returns empty
|
|
assert mi.search_fn("test") == []
|
|
|
|
|
|
# ── ResearchOrchestrator ─────────────────────────────────────────────────────
|
|
|
|
|
|
def _make_cascade(**overrides):
|
|
"""Create a mock cascade router."""
|
|
cascade = AsyncMock()
|
|
cascade.complete = AsyncMock(
|
|
return_value={"content": overrides.get("content", "query1\nquery2\nquery3")}
|
|
)
|
|
return cascade
|
|
|
|
|
|
def _make_memory(search_results=None, score=0.0):
|
|
"""Create a mock memory interface."""
|
|
if search_results is None:
|
|
search_results = []
|
|
search_fn = MagicMock(return_value=search_results)
|
|
store_fn = MagicMock()
|
|
return MemoryInterface(search_fn=search_fn, store_fn=store_fn)
|
|
|
|
|
|
def _make_tools(search_results=None, fetch_content="Page content"):
|
|
"""Create mock research tools."""
|
|
web_search = MagicMock(
|
|
return_value=search_results
|
|
or [
|
|
{"title": "Result 1", "url": "http://a.com", "snippet": "Snippet 1"},
|
|
{"title": "Result 2", "url": "http://b.com", "snippet": "Snippet 2"},
|
|
]
|
|
)
|
|
web_fetch = MagicMock(return_value=fetch_content)
|
|
return ResearchTools(web_search=web_search, web_fetch=web_fetch)
|
|
|
|
|
|
class TestResearchOrchestratorInit:
|
|
def test_basic_init(self):
|
|
cascade = _make_cascade()
|
|
memory = _make_memory()
|
|
tools = _make_tools()
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=memory, tools=tools)
|
|
assert orch.cascade is cascade
|
|
assert orch.memory is memory
|
|
assert orch.tools is tools
|
|
assert orch._metrics["research_cache_hit"] == 0
|
|
assert orch._metrics["research_api_call"] == 0
|
|
|
|
|
|
class TestCheckLocalKnowledge:
|
|
@pytest.mark.asyncio
|
|
async def test_cache_hit(self):
|
|
"""High-confidence memory result returns cached ResearchResult."""
|
|
entry = MagicMock()
|
|
entry.relevance_score = 0.90
|
|
entry.content = "Cached report"
|
|
|
|
memory = _make_memory(search_results=[entry])
|
|
cascade = _make_cascade()
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=memory)
|
|
|
|
result = await orch._check_local_knowledge("test topic")
|
|
assert result is not None
|
|
assert result.cache_hit is True
|
|
assert result.report == "Cached report"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cache_miss_low_score(self):
|
|
"""Low-confidence result returns None."""
|
|
entry = MagicMock()
|
|
entry.relevance_score = 0.5
|
|
entry.content = "Weak match"
|
|
|
|
memory = _make_memory(search_results=[entry])
|
|
cascade = _make_cascade()
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=memory)
|
|
|
|
result = await orch._check_local_knowledge("test topic")
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cache_miss_empty(self):
|
|
"""No memory results returns None."""
|
|
memory = _make_memory(search_results=[])
|
|
cascade = _make_cascade()
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=memory)
|
|
|
|
result = await orch._check_local_knowledge("test topic")
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_exception_returns_none(self):
|
|
"""Memory search exception returns None gracefully."""
|
|
memory = MemoryInterface(
|
|
search_fn=MagicMock(side_effect=RuntimeError("db error")),
|
|
store_fn=MagicMock(),
|
|
)
|
|
cascade = _make_cascade()
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=memory)
|
|
|
|
result = await orch._check_local_knowledge("test topic")
|
|
assert result is None
|
|
|
|
|
|
class TestGenerateQueries:
|
|
@pytest.mark.asyncio
|
|
async def test_parses_queries(self):
|
|
cascade = _make_cascade(content="query one\nquery two\nquery three")
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=_make_memory())
|
|
|
|
queries = await orch._generate_queries("AI safety", None, None)
|
|
assert queries == ["query one", "query two", "query three"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_strips_numbering(self):
|
|
cascade = _make_cascade(content="1. First query\n2. Second query\n3) Third")
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=_make_memory())
|
|
|
|
queries = await orch._generate_queries("topic", None, None)
|
|
assert "First query" in queries
|
|
assert "Second query" in queries
|
|
assert "Third" in queries
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_on_error(self):
|
|
cascade = AsyncMock()
|
|
cascade.complete = AsyncMock(side_effect=RuntimeError("LLM down"))
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=_make_memory())
|
|
|
|
queries = await orch._generate_queries("fallback topic", None, None)
|
|
assert queries == ["fallback topic"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_passes_cascade_tier(self):
|
|
cascade = _make_cascade(content="q1\nq2")
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=_make_memory())
|
|
|
|
await orch._generate_queries("topic", None, "gpt-4")
|
|
call_kwargs = cascade.complete.call_args.kwargs
|
|
assert call_kwargs.get("model") == "gpt-4"
|
|
|
|
|
|
class TestSearch:
|
|
@pytest.mark.asyncio
|
|
async def test_collects_snippets(self):
|
|
tools = _make_tools()
|
|
orch = ResearchOrchestrator(
|
|
cascade=_make_cascade(), memory=_make_memory(), tools=tools
|
|
)
|
|
|
|
snippets = await orch._search(["q1", "q2"])
|
|
# 2 results per query, 2 queries, but deduplicated by URL
|
|
assert len(snippets) == 2 # same URLs returned for both queries
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_search_tool(self):
|
|
tools = ResearchTools(web_search=None)
|
|
orch = ResearchOrchestrator(
|
|
cascade=_make_cascade(), memory=_make_memory(), tools=tools
|
|
)
|
|
|
|
snippets = await orch._search(["q1"])
|
|
assert snippets == []
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_search_error_handled(self):
|
|
tools = ResearchTools(
|
|
web_search=MagicMock(side_effect=RuntimeError("network error"))
|
|
)
|
|
orch = ResearchOrchestrator(
|
|
cascade=_make_cascade(), memory=_make_memory(), tools=tools
|
|
)
|
|
|
|
snippets = await orch._search(["q1"])
|
|
assert snippets == []
|
|
|
|
|
|
class TestFetch:
|
|
@pytest.mark.asyncio
|
|
async def test_fetches_pages(self):
|
|
tools = _make_tools(fetch_content="Page body here")
|
|
orch = ResearchOrchestrator(
|
|
cascade=_make_cascade(), memory=_make_memory(), tools=tools
|
|
)
|
|
|
|
snippets = [
|
|
SearchSnippet(title="P1", url="http://a.com", snippet="s1"),
|
|
SearchSnippet(title="P2", url="http://b.com", snippet="s2"),
|
|
]
|
|
pages = await orch._fetch(snippets)
|
|
assert len(pages) == 2
|
|
assert pages[0].content == "Page body here"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_fetch_tool(self):
|
|
tools = ResearchTools(web_fetch=None)
|
|
orch = ResearchOrchestrator(
|
|
cascade=_make_cascade(), memory=_make_memory(), tools=tools
|
|
)
|
|
|
|
pages = await orch._fetch([SearchSnippet("T", "http://x.com", "s")])
|
|
assert pages == []
|
|
|
|
|
|
class TestSynthesize:
|
|
@pytest.mark.asyncio
|
|
async def test_produces_report(self):
|
|
cascade = _make_cascade(content="# Report\nKey findings here")
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=_make_memory())
|
|
|
|
from timmy.research import FetchedPage
|
|
|
|
pages = [FetchedPage(url="http://x.com", title="X", content="content")]
|
|
report = await orch._synthesize("topic", None, pages, None)
|
|
assert "Report" in report
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_on_error(self):
|
|
cascade = AsyncMock()
|
|
cascade.complete = AsyncMock(side_effect=RuntimeError("LLM error"))
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=_make_memory())
|
|
|
|
from timmy.research import FetchedPage
|
|
|
|
pages = [FetchedPage(url="http://x.com", title="X", content="content")]
|
|
report = await orch._synthesize("topic", None, pages, None)
|
|
assert "Synthesis failed" in report
|
|
assert "topic" in report
|
|
|
|
|
|
class TestCrystallize:
|
|
@pytest.mark.asyncio
|
|
async def test_stores_in_memory(self):
|
|
memory = _make_memory()
|
|
orch = ResearchOrchestrator(cascade=_make_cascade(), memory=memory)
|
|
|
|
result = ResearchResult(topic="test", report="report text")
|
|
await orch._crystallize("test", result)
|
|
|
|
memory.store_fn.assert_called_once()
|
|
call_kwargs = memory.store_fn.call_args
|
|
assert call_kwargs.kwargs.get("context_type") == "research"
|
|
assert call_kwargs.kwargs.get("source") == "research_orchestrator"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_store_error_handled(self):
|
|
memory = MemoryInterface(
|
|
search_fn=MagicMock(return_value=[]),
|
|
store_fn=MagicMock(side_effect=RuntimeError("db error")),
|
|
)
|
|
orch = ResearchOrchestrator(cascade=_make_cascade(), memory=memory)
|
|
|
|
result = ResearchResult(topic="test", report="report")
|
|
# Should not raise
|
|
await orch._crystallize("test", result)
|
|
|
|
|
|
class TestWriteArtifact:
|
|
@pytest.mark.asyncio
|
|
async def test_no_action_items_skips(self):
|
|
orch = ResearchOrchestrator(cascade=_make_cascade(), memory=_make_memory())
|
|
|
|
result = ResearchResult(topic="test", report="r", action_items=[])
|
|
# Should complete without any calls
|
|
await orch._write_artifact(result)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_creates_issues(self):
|
|
orch = ResearchOrchestrator(cascade=_make_cascade(), memory=_make_memory())
|
|
|
|
result = ResearchResult(
|
|
topic="test", report="r", action_items=["Fix the thing"]
|
|
)
|
|
with patch("timmy.research._create_gitea_issues") as mock_create:
|
|
await orch._write_artifact(result)
|
|
mock_create.assert_called_once_with(result)
|
|
|
|
|
|
class TestFullPipeline:
|
|
@pytest.mark.asyncio
|
|
async def test_cache_hit_short_circuits(self):
|
|
"""When memory has a high-confidence match, skip web search."""
|
|
entry = MagicMock()
|
|
entry.relevance_score = 0.95
|
|
entry.content = "Previously researched content"
|
|
|
|
memory = _make_memory(search_results=[entry])
|
|
cascade = _make_cascade()
|
|
tools = _make_tools()
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=memory, tools=tools)
|
|
|
|
result = await orch.run("cached topic")
|
|
assert result.cache_hit is True
|
|
assert result.report == "Previously researched content"
|
|
# Cascade should NOT have been called (no query generation or synthesis)
|
|
cascade.complete.assert_not_called()
|
|
assert orch._metrics["research_cache_hit"] == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_full_pipeline_no_tools(self):
|
|
"""Pipeline completes even without web tools (graceful degradation)."""
|
|
memory = _make_memory()
|
|
cascade = AsyncMock()
|
|
# First call: generate queries, second: synthesize
|
|
cascade.complete = AsyncMock(
|
|
side_effect=[
|
|
{"content": "query 1\nquery 2"},
|
|
{"content": "# Report\nACTION: Do something"},
|
|
]
|
|
)
|
|
tools = ResearchTools() # No web tools
|
|
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=memory, tools=tools)
|
|
|
|
with patch("timmy.research._create_gitea_issues"):
|
|
result = await orch.run("test topic")
|
|
|
|
assert result.topic == "test topic"
|
|
assert result.cache_hit is False
|
|
assert "Report" in result.report
|
|
assert result.action_items == ["Do something"]
|
|
assert result.duration_ms > 0
|
|
assert orch._metrics["research_api_call"] == 1
|
|
memory.store_fn.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_full_pipeline_with_tools(self):
|
|
"""Full pipeline with search and fetch tools."""
|
|
memory = _make_memory()
|
|
cascade = AsyncMock()
|
|
cascade.complete = AsyncMock(
|
|
side_effect=[
|
|
{"content": "search query 1\nsearch query 2"},
|
|
{"content": "# Full Report\nTODO: Review findings"},
|
|
]
|
|
)
|
|
tools = _make_tools()
|
|
|
|
orch = ResearchOrchestrator(cascade=cascade, memory=memory, tools=tools)
|
|
|
|
with patch("timmy.research._create_gitea_issues"):
|
|
result = await orch.run("test topic")
|
|
|
|
assert result.topic == "test topic"
|
|
assert result.cache_hit is False
|
|
assert len(result.queries_generated) == 2
|
|
assert len(result.sources) > 0
|
|
assert result.action_items == ["Review findings"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_metrics(self):
|
|
orch = ResearchOrchestrator(cascade=_make_cascade(), memory=_make_memory())
|
|
metrics = orch.get_metrics()
|
|
assert "research_cache_hit" in metrics
|
|
assert "research_api_call" in metrics
|
|
|
|
|
|
class TestCreateGiteaIssues:
|
|
def test_no_token_skips(self):
|
|
"""No Gitea token configured — silently skips."""
|
|
from timmy.research import _create_gitea_issues
|
|
|
|
result = ResearchResult(
|
|
topic="t", report="r", action_items=["item"]
|
|
)
|
|
mock_settings = MagicMock()
|
|
mock_settings.gitea_token = ""
|
|
mock_settings.gitea_url = ""
|
|
with patch("timmy.research.settings", mock_settings):
|
|
# Should not raise
|
|
_create_gitea_issues(result)
|
|
|
|
def test_creates_issue_on_success(self):
|
|
from timmy.research import _create_gitea_issues
|
|
|
|
result = ResearchResult(
|
|
topic="AI", report="r", action_items=["Deploy model"]
|
|
)
|
|
mock_settings = MagicMock()
|
|
mock_settings.gitea_token = "tok"
|
|
mock_settings.gitea_url = "http://localhost:3000"
|
|
mock_settings.gitea_repo = "owner/repo"
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.status_code = 201
|
|
|
|
mock_requests_mod = MagicMock()
|
|
mock_requests_mod.post.return_value = mock_resp
|
|
|
|
with (
|
|
patch("timmy.research.settings", mock_settings),
|
|
patch.dict("sys.modules", {"requests": mock_requests_mod}),
|
|
):
|
|
_create_gitea_issues(result)
|
|
mock_requests_mod.post.assert_called_once()
|
|
call_kwargs = mock_requests_mod.post.call_args
|
|
assert "[research]" in call_kwargs.kwargs["json"]["title"]
|