From 9eeb49a6f15ec85744eb127cf36e4a104dc31851 Mon Sep 17 00:00:00 2001 From: "Claude (Opus 4.6)" Date: Tue, 24 Mar 2026 01:40:53 +0000 Subject: [PATCH] =?UTF-8?q?[claude]=20Autonomous=20research=20pipeline=20?= =?UTF-8?q?=E2=80=94=20orchestrator=20+=20SOVEREIGNTY.md=20(#972)=20(#1274?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- SOVEREIGNTY.md | 122 ++++++++ src/timmy/research.py | 528 +++++++++++++++++++++++++++++++++++ tests/timmy/test_research.py | 403 ++++++++++++++++++++++++++ 3 files changed, 1053 insertions(+) create mode 100644 SOVEREIGNTY.md create mode 100644 src/timmy/research.py create mode 100644 tests/timmy/test_research.py diff --git a/SOVEREIGNTY.md b/SOVEREIGNTY.md new file mode 100644 index 00000000..9861ec9d --- /dev/null +++ b/SOVEREIGNTY.md @@ -0,0 +1,122 @@ +# SOVEREIGNTY.md — Research Sovereignty Manifest + +> "If this spec is implemented correctly, it is the last research document +> Alexander should need to request from a corporate AI." +> — Issue #972, March 22 2026 + +--- + +## What This Is + +A machine-readable declaration of Timmy's research independence: +where we are, where we're going, and how to measure progress. + +--- + +## The Problem We're Solving + +On March 22, 2026, a single Claude session produced six deep research reports. +It consumed ~3 hours of human time and substantial corporate AI inference. +Every report was valuable — but the workflow was **linear**. +It would cost exactly the same to reproduce tomorrow. + +This file tracks the pipeline that crystallizes that workflow into something +Timmy can run autonomously. + +--- + +## The Six-Step Pipeline + +| Step | What Happens | Status | +|------|-------------|--------| +| 1. Scope | Human describes knowledge gap → Gitea issue with template | ✅ Done (`skills/research/`) | +| 2. Query | LLM slot-fills template → 5–15 targeted queries | ✅ Done (`research.py`) | +| 3. Search | Execute queries → top result URLs | ✅ Done (`research_tools.py`) | +| 4. Fetch | Download + extract full pages (trafilatura) | ✅ Done (`tools/system_tools.py`) | +| 5. Synthesize | Compress findings → structured report | ✅ Done (`research.py` cascade) | +| 6. Deliver | Store to semantic memory + optional disk persist | ✅ Done (`research.py`) | + +--- + +## Cascade Tiers (Synthesis Quality vs. Cost) + +| Tier | Model | Cost | Quality | Status | +|------|-------|------|---------|--------| +| **4** | SQLite semantic cache | $0.00 / instant | reuses prior | ✅ Active | +| **3** | Ollama `qwen3:14b` | $0.00 / local | ★★★ | ✅ Active | +| **2** | Claude API (haiku) | ~$0.01/report | ★★★★ | ✅ Active (opt-in) | +| **1** | Groq `llama-3.3-70b` | $0.00 / rate-limited | ★★★★ | 🔲 Planned (#980) | + +Set `ANTHROPIC_API_KEY` to enable Tier 2 fallback. + +--- + +## Research Templates + +Six prompt templates live in `skills/research/`: + +| Template | Use Case | +|----------|----------| +| `tool_evaluation.md` | Find all shipping tools for `{domain}` | +| `architecture_spike.md` | How to connect `{system_a}` to `{system_b}` | +| `game_analysis.md` | Evaluate `{game}` for AI agent play | +| `integration_guide.md` | Wire `{tool}` into `{stack}` with code | +| `state_of_art.md` | What exists in `{field}` as of `{date}` | +| `competitive_scan.md` | How does `{project}` compare to `{alternatives}` | + +--- + +## Sovereignty Metrics + +| Metric | Target (Week 1) | Target (Month 1) | Target (Month 3) | Graduation | +|--------|-----------------|------------------|------------------|------------| +| Queries answered locally | 10% | 40% | 80% | >90% | +| API cost per report | <$1.50 | <$0.50 | <$0.10 | <$0.01 | +| Time from question to report | <3 hours | <30 min | <5 min | <1 min | +| Human involvement | 100% (review) | Review only | Approve only | None | + +--- + +## How to Use the Pipeline + +```python +from timmy.research import run_research + +# Quick research (no template) +result = await run_research("best local embedding models for 36GB RAM") + +# With a template and slot values +result = await run_research( + topic="PDF text extraction libraries for Python", + template="tool_evaluation", + slots={"domain": "PDF parsing", "use_case": "RAG pipeline", "focus_criteria": "accuracy"}, + save_to_disk=True, +) + +print(result.report) +print(f"Backend: {result.synthesis_backend}, Cached: {result.cached}") +``` + +--- + +## Implementation Status + +| Component | Issue | Status | +|-----------|-------|--------| +| `web_fetch` tool (trafilatura) | #973 | ✅ Done | +| Research template library (6 templates) | #974 | ✅ Done | +| `ResearchOrchestrator` (`research.py`) | #975 | ✅ Done | +| Semantic index for outputs | #976 | 🔲 Planned | +| Auto-create Gitea issues from findings | #977 | 🔲 Planned | +| Paperclip task runner integration | #978 | 🔲 Planned | +| Kimi delegation via labels | #979 | 🔲 Planned | +| Groq free-tier cascade tier | #980 | 🔲 Planned | +| Sovereignty metrics dashboard | #981 | 🔲 Planned | + +--- + +## Governing Spec + +See [issue #972](http://143.198.27.163:3000/Rockachopa/Timmy-time-dashboard/issues/972) for the full spec and rationale. + +Research artifacts committed to `docs/research/`. diff --git a/src/timmy/research.py b/src/timmy/research.py new file mode 100644 index 00000000..fca58bf3 --- /dev/null +++ b/src/timmy/research.py @@ -0,0 +1,528 @@ +"""Research Orchestrator — autonomous, sovereign research pipeline. + +Chains all six steps of the research workflow with local-first execution: + + Step 0 Cache — check semantic memory (SQLite, instant, zero API cost) + Step 1 Scope — load a research template from skills/research/ + Step 2 Query — slot-fill template + formulate 5-15 search queries via Ollama + Step 3 Search — execute queries via web_search (SerpAPI or fallback) + Step 4 Fetch — download + extract full pages via web_fetch (trafilatura) + Step 5 Synth — compress findings into a structured report via cascade + Step 6 Deliver — store to semantic memory; optionally save to docs/research/ + +Cascade tiers for synthesis (spec §4): + Tier 4 SQLite semantic cache — instant, free, covers ~80% after warm-up + Tier 3 Ollama (qwen3:14b) — local, free, good quality + Tier 2 Claude API (haiku) — cloud fallback, cheap, set ANTHROPIC_API_KEY + Tier 1 (future) Groq — free-tier rate-limited, tracked in #980 + +All optional services degrade gracefully per project conventions. + +Refs #972 (governing spec), #975 (ResearchOrchestrator sub-issue). +""" + +from __future__ import annotations + +import asyncio +import logging +import re +import textwrap +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +# Optional memory imports — available at module level so tests can patch them. +try: + from timmy.memory_system import SemanticMemory, store_memory +except Exception: # pragma: no cover + SemanticMemory = None # type: ignore[assignment,misc] + store_memory = None # type: ignore[assignment] + +# Root of the project — two levels up from src/timmy/ +_PROJECT_ROOT = Path(__file__).parent.parent.parent +_SKILLS_ROOT = _PROJECT_ROOT / "skills" / "research" +_DOCS_ROOT = _PROJECT_ROOT / "docs" / "research" + +# Similarity threshold for cache hit (0–1 cosine similarity) +_CACHE_HIT_THRESHOLD = 0.82 + +# How many search result URLs to fetch as full pages +_FETCH_TOP_N = 5 + +# Maximum tokens to request from the synthesis LLM +_SYNTHESIS_MAX_TOKENS = 4096 + + +# --------------------------------------------------------------------------- +# Data structures +# --------------------------------------------------------------------------- + + +@dataclass +class ResearchResult: + """Full output of a research pipeline run.""" + + topic: str + query_count: int + sources_fetched: int + report: str + cached: bool = False + cache_similarity: float = 0.0 + synthesis_backend: str = "unknown" + errors: list[str] = field(default_factory=list) + + def is_empty(self) -> bool: + return not self.report.strip() + + +# --------------------------------------------------------------------------- +# Template loading +# --------------------------------------------------------------------------- + + +def list_templates() -> list[str]: + """Return names of available research templates (without .md extension).""" + if not _SKILLS_ROOT.exists(): + return [] + return [p.stem for p in sorted(_SKILLS_ROOT.glob("*.md"))] + + +def load_template(template_name: str, slots: dict[str, str] | None = None) -> str: + """Load a research template and fill {slot} placeholders. + + Args: + template_name: Stem of the .md file under skills/research/ (e.g. "tool_evaluation"). + slots: Mapping of {placeholder} → replacement value. + + Returns: + Template text with slots filled. Unfilled slots are left as-is. + """ + path = _SKILLS_ROOT / f"{template_name}.md" + if not path.exists(): + available = ", ".join(list_templates()) or "(none)" + raise FileNotFoundError( + f"Research template {template_name!r} not found. " + f"Available: {available}" + ) + + text = path.read_text(encoding="utf-8") + + # Strip YAML frontmatter (--- ... ---), including empty frontmatter (--- \n---) + text = re.sub(r"^---\n.*?---\n", "", text, flags=re.DOTALL) + + if slots: + for key, value in slots.items(): + text = text.replace(f"{{{key}}}", value) + + return text.strip() + + +# --------------------------------------------------------------------------- +# Query formulation (Step 2) +# --------------------------------------------------------------------------- + + +async def _formulate_queries(topic: str, template_context: str, n: int = 8) -> list[str]: + """Use the local LLM to generate targeted search queries for a topic. + + Falls back to a simple heuristic if Ollama is unavailable. + """ + prompt = textwrap.dedent(f"""\ + You are a research assistant. Generate exactly {n} targeted, specific web search + queries to thoroughly research the following topic. + + TOPIC: {topic} + + RESEARCH CONTEXT: + {template_context[:1000]} + + Rules: + - One query per line, no numbering, no bullet points. + - Vary the angle (definition, comparison, implementation, alternatives, pitfalls). + - Prefer exact technical terms, tool names, and version numbers where relevant. + - Output ONLY the queries, nothing else. + """) + + queries = await _ollama_complete(prompt, max_tokens=512) + + if not queries: + # Minimal fallback + return [ + f"{topic} overview", + f"{topic} tutorial", + f"{topic} best practices", + f"{topic} alternatives", + f"{topic} 2025", + ] + + lines = [ln.strip() for ln in queries.splitlines() if ln.strip()] + return lines[:n] if len(lines) >= n else lines + + +# --------------------------------------------------------------------------- +# Search (Step 3) +# --------------------------------------------------------------------------- + + +async def _execute_search(queries: list[str]) -> list[dict[str, str]]: + """Run each query through the available web search backend. + + Returns a flat list of {title, url, snippet} dicts. + Degrades gracefully if SerpAPI key is absent. + """ + results: list[dict[str, str]] = [] + seen_urls: set[str] = set() + + for query in queries: + try: + raw = await asyncio.to_thread(_run_search_sync, query) + for item in raw: + url = item.get("url", "") + if url and url not in seen_urls: + seen_urls.add(url) + results.append(item) + except Exception as exc: + logger.warning("Search failed for query %r: %s", query, exc) + + return results + + +def _run_search_sync(query: str) -> list[dict[str, str]]: + """Synchronous search — wraps SerpAPI or returns empty on missing key.""" + import os + + if not os.environ.get("SERPAPI_API_KEY"): + logger.debug("SERPAPI_API_KEY not set — skipping web search for %r", query) + return [] + + try: + from serpapi import GoogleSearch + + params = {"q": query, "api_key": os.environ["SERPAPI_API_KEY"], "num": 5} + search = GoogleSearch(params) + data = search.get_dict() + items = [] + for r in data.get("organic_results", []): + items.append( + { + "title": r.get("title", ""), + "url": r.get("link", ""), + "snippet": r.get("snippet", ""), + } + ) + return items + except Exception as exc: + logger.warning("SerpAPI search error: %s", exc) + return [] + + +# --------------------------------------------------------------------------- +# Fetch (Step 4) +# --------------------------------------------------------------------------- + + +async def _fetch_pages(results: list[dict[str, str]], top_n: int = _FETCH_TOP_N) -> list[str]: + """Download and extract full text for the top search results. + + Uses web_fetch (trafilatura) from timmy.tools.system_tools. + """ + try: + from timmy.tools.system_tools import web_fetch + except ImportError: + logger.warning("web_fetch not available — skipping page fetch") + return [] + + pages: list[str] = [] + for item in results[:top_n]: + url = item.get("url", "") + if not url: + continue + try: + text = await asyncio.to_thread(web_fetch, url, 6000) + if text and not text.startswith("Error:"): + pages.append(f"## {item.get('title', url)}\nSource: {url}\n\n{text}") + except Exception as exc: + logger.warning("Failed to fetch %s: %s", url, exc) + + return pages + + +# --------------------------------------------------------------------------- +# Synthesis (Step 5) — cascade: Ollama → Claude fallback +# --------------------------------------------------------------------------- + + +async def _synthesize(topic: str, pages: list[str], snippets: list[str]) -> tuple[str, str]: + """Compress fetched pages + snippets into a structured research report. + + Returns (report_markdown, backend_used). + """ + # Build synthesis prompt + source_content = "\n\n---\n\n".join(pages[:5]) + if not source_content and snippets: + source_content = "\n".join(f"- {s}" for s in snippets[:20]) + + if not source_content: + return ( + f"# Research: {topic}\n\n*No source material was retrieved. " + "Check SERPAPI_API_KEY and network connectivity.*", + "none", + ) + + prompt = textwrap.dedent(f"""\ + You are a senior technical researcher. Synthesize the source material below + into a structured research report on the topic: **{topic}** + + FORMAT YOUR REPORT AS: + # {topic} + + ## Executive Summary + (2-3 sentences: what you found, top recommendation) + + ## Key Findings + (Bullet list of the most important facts, tools, or patterns) + + ## Comparison / Options + (Table or list comparing alternatives where applicable) + + ## Recommended Approach + (Concrete recommendation with rationale) + + ## Gaps & Next Steps + (What wasn't answered, what to investigate next) + + --- + SOURCE MATERIAL: + {source_content[:12000]} + """) + + # Tier 3 — try Ollama first + report = await _ollama_complete(prompt, max_tokens=_SYNTHESIS_MAX_TOKENS) + if report: + return report, "ollama" + + # Tier 2 — Claude fallback + report = await _claude_complete(prompt, max_tokens=_SYNTHESIS_MAX_TOKENS) + if report: + return report, "claude" + + # Last resort — structured snippet summary + summary = f"# {topic}\n\n## Snippets\n\n" + "\n\n".join( + f"- {s}" for s in snippets[:15] + ) + return summary, "fallback" + + +# --------------------------------------------------------------------------- +# LLM helpers +# --------------------------------------------------------------------------- + + +async def _ollama_complete(prompt: str, max_tokens: int = 1024) -> str: + """Send a prompt to Ollama and return the response text. + + Returns empty string on failure (graceful degradation). + """ + try: + import httpx + + from config import settings + + url = f"{settings.normalized_ollama_url}/api/generate" + payload: dict[str, Any] = { + "model": settings.ollama_model, + "prompt": prompt, + "stream": False, + "options": { + "num_predict": max_tokens, + "temperature": 0.3, + }, + } + + async with httpx.AsyncClient(timeout=120.0) as client: + resp = await client.post(url, json=payload) + resp.raise_for_status() + data = resp.json() + return data.get("response", "").strip() + except Exception as exc: + logger.warning("Ollama completion failed: %s", exc) + return "" + + +async def _claude_complete(prompt: str, max_tokens: int = 1024) -> str: + """Send a prompt to Claude API as a last-resort fallback. + + Only active when ANTHROPIC_API_KEY is configured. + Returns empty string on failure or missing key. + """ + try: + from config import settings + + if not settings.anthropic_api_key: + return "" + + from timmy.backends import ClaudeBackend + + backend = ClaudeBackend() + result = await asyncio.to_thread(backend.run, prompt) + return result.content.strip() + except Exception as exc: + logger.warning("Claude fallback failed: %s", exc) + return "" + + +# --------------------------------------------------------------------------- +# Memory cache (Step 0 + Step 6) +# --------------------------------------------------------------------------- + + +def _check_cache(topic: str) -> tuple[str | None, float]: + """Search semantic memory for a prior result on this topic. + + Returns (cached_report, similarity) or (None, 0.0). + """ + try: + if SemanticMemory is None: + return None, 0.0 + mem = SemanticMemory() + hits = mem.search(topic, top_k=1) + if hits: + content, score = hits[0] + if score >= _CACHE_HIT_THRESHOLD: + return content, score + except Exception as exc: + logger.debug("Cache check failed: %s", exc) + return None, 0.0 + + +def _store_result(topic: str, report: str) -> None: + """Index the research report into semantic memory for future retrieval.""" + try: + if store_memory is None: + logger.debug("store_memory not available — skipping memory index") + return + store_memory( + content=report, + source="research_pipeline", + context_type="research", + metadata={"topic": topic}, + ) + logger.info("Research result indexed for topic: %r", topic) + except Exception as exc: + logger.warning("Failed to store research result: %s", exc) + + +def _save_to_disk(topic: str, report: str) -> Path | None: + """Persist the report as a markdown file under docs/research/. + + Filename is derived from the topic (slugified). Returns the path or None. + """ + try: + slug = re.sub(r"[^a-z0-9]+", "-", topic.lower()).strip("-")[:60] + _DOCS_ROOT.mkdir(parents=True, exist_ok=True) + path = _DOCS_ROOT / f"{slug}.md" + path.write_text(report, encoding="utf-8") + logger.info("Research report saved to %s", path) + return path + except Exception as exc: + logger.warning("Failed to save research report to disk: %s", exc) + return None + + +# --------------------------------------------------------------------------- +# Main orchestrator +# --------------------------------------------------------------------------- + + +async def run_research( + topic: str, + template: str | None = None, + slots: dict[str, str] | None = None, + save_to_disk: bool = False, + skip_cache: bool = False, +) -> ResearchResult: + """Run the full 6-step autonomous research pipeline. + + Args: + topic: The research question or subject. + template: Name of a template from skills/research/ (e.g. "tool_evaluation"). + If None, runs without a template scaffold. + slots: Placeholder values for the template (e.g. {"domain": "PDF parsing"}). + save_to_disk: If True, write the report to docs/research/.md. + skip_cache: If True, bypass the semantic memory cache. + + Returns: + ResearchResult with report and metadata. + """ + errors: list[str] = [] + + # ------------------------------------------------------------------ + # Step 0 — check cache + # ------------------------------------------------------------------ + if not skip_cache: + cached, score = _check_cache(topic) + if cached: + logger.info("Cache hit (%.2f) for topic: %r", score, topic) + return ResearchResult( + topic=topic, + query_count=0, + sources_fetched=0, + report=cached, + cached=True, + cache_similarity=score, + synthesis_backend="cache", + ) + + # ------------------------------------------------------------------ + # Step 1 — load template (optional) + # ------------------------------------------------------------------ + template_context = "" + if template: + try: + template_context = load_template(template, slots) + except FileNotFoundError as exc: + errors.append(str(exc)) + logger.warning("Template load failed: %s", exc) + + # ------------------------------------------------------------------ + # Step 2 — formulate queries + # ------------------------------------------------------------------ + queries = await _formulate_queries(topic, template_context) + logger.info("Formulated %d queries for topic: %r", len(queries), topic) + + # ------------------------------------------------------------------ + # Step 3 — execute search + # ------------------------------------------------------------------ + search_results = await _execute_search(queries) + logger.info("Search returned %d results", len(search_results)) + snippets = [r.get("snippet", "") for r in search_results if r.get("snippet")] + + # ------------------------------------------------------------------ + # Step 4 — fetch full pages + # ------------------------------------------------------------------ + pages = await _fetch_pages(search_results) + logger.info("Fetched %d pages", len(pages)) + + # ------------------------------------------------------------------ + # Step 5 — synthesize + # ------------------------------------------------------------------ + report, backend = await _synthesize(topic, pages, snippets) + + # ------------------------------------------------------------------ + # Step 6 — deliver + # ------------------------------------------------------------------ + _store_result(topic, report) + if save_to_disk: + _save_to_disk(topic, report) + + return ResearchResult( + topic=topic, + query_count=len(queries), + sources_fetched=len(pages), + report=report, + cached=False, + synthesis_backend=backend, + errors=errors, + ) diff --git a/tests/timmy/test_research.py b/tests/timmy/test_research.py new file mode 100644 index 00000000..e3fc0acf --- /dev/null +++ b/tests/timmy/test_research.py @@ -0,0 +1,403 @@ +"""Unit tests for src/timmy/research.py — ResearchOrchestrator pipeline. + +Refs #972 (governing spec), #975 (ResearchOrchestrator). +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +pytestmark = pytest.mark.unit + + +# --------------------------------------------------------------------------- +# list_templates +# --------------------------------------------------------------------------- + + +class TestListTemplates: + def test_returns_list(self, tmp_path, monkeypatch): + (tmp_path / "tool_evaluation.md").write_text("---\n---\n# T") + (tmp_path / "game_analysis.md").write_text("---\n---\n# G") + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + + from timmy.research import list_templates + + result = list_templates() + assert isinstance(result, list) + assert "tool_evaluation" in result + assert "game_analysis" in result + + def test_returns_empty_when_dir_missing(self, tmp_path, monkeypatch): + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path / "nonexistent") + + from timmy.research import list_templates + + assert list_templates() == [] + + +# --------------------------------------------------------------------------- +# load_template +# --------------------------------------------------------------------------- + + +class TestLoadTemplate: + def _write_template(self, path: Path, name: str, body: str) -> None: + (path / f"{name}.md").write_text(body, encoding="utf-8") + + def test_loads_and_strips_frontmatter(self, tmp_path, monkeypatch): + self._write_template( + tmp_path, + "tool_evaluation", + "---\nname: Tool Evaluation\ntype: research\n---\n# Tool Eval: {domain}", + ) + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + + from timmy.research import load_template + + result = load_template("tool_evaluation", {"domain": "PDF parsing"}) + assert "# Tool Eval: PDF parsing" in result + assert "name: Tool Evaluation" not in result + + def test_fills_slots(self, tmp_path, monkeypatch): + self._write_template(tmp_path, "arch", "Connect {system_a} to {system_b}") + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + + from timmy.research import load_template + + result = load_template("arch", {"system_a": "Kafka", "system_b": "Postgres"}) + assert "Kafka" in result + assert "Postgres" in result + + def test_unfilled_slots_preserved(self, tmp_path, monkeypatch): + self._write_template(tmp_path, "t", "Hello {name} and {other}") + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + + from timmy.research import load_template + + result = load_template("t", {"name": "World"}) + assert "{other}" in result + + def test_raises_file_not_found_for_missing_template(self, tmp_path, monkeypatch): + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + + from timmy.research import load_template + + with pytest.raises(FileNotFoundError, match="nonexistent"): + load_template("nonexistent") + + def test_no_slots_returns_raw_body(self, tmp_path, monkeypatch): + self._write_template(tmp_path, "plain", "---\n---\nJust text here") + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + + from timmy.research import load_template + + result = load_template("plain") + assert result == "Just text here" + + +# --------------------------------------------------------------------------- +# _check_cache +# --------------------------------------------------------------------------- + + +class TestCheckCache: + def test_returns_none_when_no_hits(self): + mock_mem = MagicMock() + mock_mem.search.return_value = [] + + with patch("timmy.research.SemanticMemory", return_value=mock_mem): + from timmy.research import _check_cache + + content, score = _check_cache("some topic") + + assert content is None + assert score == 0.0 + + def test_returns_content_above_threshold(self): + mock_mem = MagicMock() + mock_mem.search.return_value = [("cached report text", 0.91)] + + with patch("timmy.research.SemanticMemory", return_value=mock_mem): + from timmy.research import _check_cache + + content, score = _check_cache("same topic") + + assert content == "cached report text" + assert score == pytest.approx(0.91) + + def test_returns_none_below_threshold(self): + mock_mem = MagicMock() + mock_mem.search.return_value = [("old report", 0.60)] + + with patch("timmy.research.SemanticMemory", return_value=mock_mem): + from timmy.research import _check_cache + + content, score = _check_cache("slightly different topic") + + assert content is None + assert score == 0.0 + + def test_degrades_gracefully_on_import_error(self): + with patch("timmy.research.SemanticMemory", None): + from timmy.research import _check_cache + + content, score = _check_cache("topic") + + assert content is None + assert score == 0.0 + + +# --------------------------------------------------------------------------- +# _store_result +# --------------------------------------------------------------------------- + + +class TestStoreResult: + def test_calls_store_memory(self): + mock_store = MagicMock() + + with patch("timmy.research.store_memory", mock_store): + from timmy.research import _store_result + + _store_result("test topic", "# Report\n\nContent here.") + + mock_store.assert_called_once() + call_kwargs = mock_store.call_args + assert "test topic" in str(call_kwargs) + + def test_degrades_gracefully_on_error(self): + mock_store = MagicMock(side_effect=RuntimeError("db error")) + with patch("timmy.research.store_memory", mock_store): + from timmy.research import _store_result + + # Should not raise + _store_result("topic", "report") + + +# --------------------------------------------------------------------------- +# _save_to_disk +# --------------------------------------------------------------------------- + + +class TestSaveToDisk: + def test_writes_file(self, tmp_path, monkeypatch): + monkeypatch.setattr("timmy.research._DOCS_ROOT", tmp_path / "research") + + from timmy.research import _save_to_disk + + path = _save_to_disk("Test Topic: PDF Parsing", "# Test Report") + assert path is not None + assert path.exists() + assert path.read_text() == "# Test Report" + + def test_slugifies_topic_name(self, tmp_path, monkeypatch): + monkeypatch.setattr("timmy.research._DOCS_ROOT", tmp_path / "research") + + from timmy.research import _save_to_disk + + path = _save_to_disk("My Complex Topic! v2.0", "content") + assert path is not None + # Should be slugified: no special chars + assert " " not in path.name + assert "!" not in path.name + + def test_returns_none_on_error(self, monkeypatch): + monkeypatch.setattr( + "timmy.research._DOCS_ROOT", + Path("/nonexistent_root/deeply/nested"), + ) + + with patch("pathlib.Path.mkdir", side_effect=PermissionError("denied")): + from timmy.research import _save_to_disk + + result = _save_to_disk("topic", "report") + + assert result is None + + +# --------------------------------------------------------------------------- +# run_research — end-to-end with mocks +# --------------------------------------------------------------------------- + + +class TestRunResearch: + @pytest.mark.asyncio + async def test_returns_cached_result_when_cache_hit(self): + cached_report = "# Cached Report\n\nPreviously computed." + with ( + patch("timmy.research._check_cache", return_value=(cached_report, 0.93)), + ): + from timmy.research import run_research + + result = await run_research("some topic") + + assert result.cached is True + assert result.cache_similarity == pytest.approx(0.93) + assert result.report == cached_report + assert result.synthesis_backend == "cache" + + @pytest.mark.asyncio + async def test_skips_cache_when_requested(self, tmp_path, monkeypatch): + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + + with ( + patch("timmy.research._check_cache", return_value=("cached", 0.99)) as mock_cache, + patch( + "timmy.research._formulate_queries", + new=AsyncMock(return_value=["q1"]), + ), + patch("timmy.research._execute_search", new=AsyncMock(return_value=[])), + patch("timmy.research._fetch_pages", new=AsyncMock(return_value=[])), + patch( + "timmy.research._synthesize", + new=AsyncMock(return_value=("# Fresh report", "ollama")), + ), + patch("timmy.research._store_result"), + ): + from timmy.research import run_research + + result = await run_research("topic", skip_cache=True) + + mock_cache.assert_not_called() + assert result.cached is False + assert result.report == "# Fresh report" + + @pytest.mark.asyncio + async def test_full_pipeline_no_search_results(self, tmp_path, monkeypatch): + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + + with ( + patch("timmy.research._check_cache", return_value=(None, 0.0)), + patch( + "timmy.research._formulate_queries", + new=AsyncMock(return_value=["query 1", "query 2"]), + ), + patch("timmy.research._execute_search", new=AsyncMock(return_value=[])), + patch("timmy.research._fetch_pages", new=AsyncMock(return_value=[])), + patch( + "timmy.research._synthesize", + new=AsyncMock(return_value=("# Report", "ollama")), + ), + patch("timmy.research._store_result"), + ): + from timmy.research import run_research + + result = await run_research("a new topic") + + assert not result.cached + assert result.query_count == 2 + assert result.sources_fetched == 0 + assert result.report == "# Report" + assert result.synthesis_backend == "ollama" + + @pytest.mark.asyncio + async def test_returns_result_with_error_on_bad_template(self, tmp_path, monkeypatch): + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + + with ( + patch("timmy.research._check_cache", return_value=(None, 0.0)), + patch( + "timmy.research._formulate_queries", + new=AsyncMock(return_value=["q1"]), + ), + patch("timmy.research._execute_search", new=AsyncMock(return_value=[])), + patch("timmy.research._fetch_pages", new=AsyncMock(return_value=[])), + patch( + "timmy.research._synthesize", + new=AsyncMock(return_value=("# Report", "ollama")), + ), + patch("timmy.research._store_result"), + ): + from timmy.research import run_research + + result = await run_research("topic", template="nonexistent_template") + + assert len(result.errors) == 1 + assert "nonexistent_template" in result.errors[0] + + @pytest.mark.asyncio + async def test_saves_to_disk_when_requested(self, tmp_path, monkeypatch): + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + monkeypatch.setattr("timmy.research._DOCS_ROOT", tmp_path / "research") + + with ( + patch("timmy.research._check_cache", return_value=(None, 0.0)), + patch( + "timmy.research._formulate_queries", + new=AsyncMock(return_value=["q1"]), + ), + patch("timmy.research._execute_search", new=AsyncMock(return_value=[])), + patch("timmy.research._fetch_pages", new=AsyncMock(return_value=[])), + patch( + "timmy.research._synthesize", + new=AsyncMock(return_value=("# Saved Report", "ollama")), + ), + patch("timmy.research._store_result"), + ): + from timmy.research import run_research + + result = await run_research("disk topic", save_to_disk=True) + + assert result.report == "# Saved Report" + saved_files = list((tmp_path / "research").glob("*.md")) + assert len(saved_files) == 1 + assert saved_files[0].read_text() == "# Saved Report" + + @pytest.mark.asyncio + async def test_result_is_not_empty_after_synthesis(self, tmp_path, monkeypatch): + monkeypatch.setattr("timmy.research._SKILLS_ROOT", tmp_path) + + with ( + patch("timmy.research._check_cache", return_value=(None, 0.0)), + patch( + "timmy.research._formulate_queries", + new=AsyncMock(return_value=["q"]), + ), + patch("timmy.research._execute_search", new=AsyncMock(return_value=[])), + patch("timmy.research._fetch_pages", new=AsyncMock(return_value=[])), + patch( + "timmy.research._synthesize", + new=AsyncMock(return_value=("# Non-empty", "ollama")), + ), + patch("timmy.research._store_result"), + ): + from timmy.research import run_research + + result = await run_research("topic") + + assert not result.is_empty() + + +# --------------------------------------------------------------------------- +# ResearchResult +# --------------------------------------------------------------------------- + + +class TestResearchResult: + def test_is_empty_when_no_report(self): + from timmy.research import ResearchResult + + r = ResearchResult(topic="t", query_count=0, sources_fetched=0, report="") + assert r.is_empty() + + def test_is_not_empty_with_content(self): + from timmy.research import ResearchResult + + r = ResearchResult(topic="t", query_count=1, sources_fetched=1, report="# Report") + assert not r.is_empty() + + def test_default_cached_false(self): + from timmy.research import ResearchResult + + r = ResearchResult(topic="t", query_count=0, sources_fetched=0, report="x") + assert r.cached is False + + def test_errors_defaults_to_empty_list(self): + from timmy.research import ResearchResult + + r = ResearchResult(topic="t", query_count=0, sources_fetched=0, report="x") + assert r.errors == []