diff --git a/SOVEREIGNTY.md b/SOVEREIGNTY.md new file mode 100644 index 00000000..090f788d --- /dev/null +++ b/SOVEREIGNTY.md @@ -0,0 +1,141 @@ +# SOVEREIGNTY.md + +> "If this spec is implemented correctly, it is the last research document +> Alexander should need to request from a corporate AI." +> — Research Sovereignty Spec, March 22 2026 + +This document tracks Timmy's progress toward **full research sovereignty** — +the ability to answer any research question locally, without paying a +corporate AI inference provider. + +--- + +## What "Sovereign" Means + +| Dimension | Corporate-dependent | Sovereign | +|-----------|--------------------|-----------| +| Inference | Claude / GPT-4 API | Qwen3-14B via Ollama (local) | +| Web search | Claude `web_search` | Timmy `web_search` tool | +| Page fetch | Claude `web_fetch` | `web_fetch` tool (trafilatura) | +| Synthesis | Claude Opus 4 | Groq Llama-3.3-70B → local fallback | +| Memory | Stateless | SQLite semantic index (nomic-embed) | +| Delivery | Claude artifact | reportlab PDF + Gitea issue | +| Delegation | Human re-prompt | `kimi-ready` Gitea queue | + +--- + +## Pipeline Architecture + +``` +Gitea issue (research template) + │ + ▼ +[Step 0] Semantic memory lookup — cache hit → done in <1 s + │ miss + ▼ +[Step 1] Query formulation (local LLM slot-fills template) + │ + ▼ +[Step 2] Web search (Timmy web_search tool) + │ + ▼ +[Step 3] Page fetch (web_fetch + trafilatura, token-bounded) + │ + ▼ +[Step 4] Cascade synthesis: + Tier 4 (SQLite cache) → instant, $0.00 + Tier 3 (Ollama local) → qwen3:32b, $0.00 + Tier 2 (Groq free tier) → llama-3.3-70b, $0.00 (rate limited) + Tier 1 (Claude API) → claude-sonnet-4, ~$1.00 + Kimi queue → kimi-ready label, async + │ + ▼ +[Step 5] Store result in semantic memory + │ + ▼ +[Step 6] Deliver: Gitea issue + optional PDF +``` + +### Cascade tiers (src/timmy/cascade.py) + +| Tier | Model | Cost | Quality | When | +|------|-------|------|---------|------| +| 4 | SQLite semantic memory | $0.00 | N/A | Any previously-answered question | +| 3 | Ollama qwen3:32b | $0.00 | ★★★ | Routine lookups, re-synthesis | +| 2 | Groq llama-3.3-70b (free) | $0.00 | ★★★★ | Most research tasks | +| 1 | Claude API sonnet-4 | ~$1.00/report | ★★★★★ | Novel/high-stakes domains | +| 0 | Kimi (async delegation) | varies | ★★★★★ | Batch / heavy research | + +--- + +## Research Template Library (`skills/research/`) + +Six templates with YAML frontmatter and `{slot}` placeholders: + +| Template | Purpose | +|----------|---------| +| `tool_evaluation.md` | Find all shipping tools for `{domain}` | +| `architecture_spike.md` | How to connect `{system_a}` to `{system_b}` | +| `game_analysis.md` | Evaluate `{game}` for AI agent play | +| `integration_guide.md` | Wire `{tool}` into `{stack}` with code | +| `state_of_art.md` | What exists in `{field}` as of `{date}` | +| `competitive_scan.md` | How does `{project}` compare to `{alternatives}` | + +--- + +## Local Model Selection + +Per the M3-Max study (issue #1063): + +- **Primary agent model:** Qwen3-14B Q5\_K\_M via Ollama + - 0.971 F1 on tool calling — GPT-4-class structured output + - Permissive enough without abliteration quality degradation +- **Fast mode:** Qwen3-8B Q6\_K — 2× speed for routine tasks +- **Heavy synthesis:** qwen3:32b / qwen3-coder:32b when Groq rate-limited + +--- + +## Implementation Status + +| Component | Issue | Status | +|-----------|-------|--------| +| `web_fetch` tool (trafilatura) | #973 | ✅ Done | +| Research template library (6 templates) | #974 | ✅ Done | +| ResearchOrchestrator pipeline | #975 | ✅ Done | +| Semantic index for research outputs | #976 | ✅ Done | +| Auto-create Gitea issues from findings | #977 | ✅ Done | +| Paperclip task runner integration | #978 | ✅ Done | +| Kimi delegation via Gitea labels | #979 | ✅ Done | +| Claude API fallback in cascade.py | #980 | ✅ Done | +| Research sovereignty metrics + dashboard | #981 | ✅ Done | + +--- + +## Sovereignty Metrics (targets) + +| Metric | Week 1 | Month 1 | Month 3 | Graduation | +|--------|--------|---------|---------|------------| +| Queries answered locally | 10% | 40% | 80% | >90% | +| API cost per report | $1.50 | $0.50 | $0.10 | <$0.01 | +| Time question → report | 3 h | 30 min | 5 min | <1 min | +| Human involvement | 100% | Review | Approve | None | + +Metrics are tracked via the sovereignty dashboard (see issue #981 / +`/sovereignty` route on Mission Control). + +--- + +## Related Documents + +- [`AGENTS.md`](AGENTS.md) — Agent roster and conventions +- [`docs/SOVEREIGN_AGI_RESEARCH.md`](docs/SOVEREIGN_AGI_RESEARCH.md) — Architecture analysis +- [`docs/research/`](docs/research/) — Indexed research artifacts +- [`skills/research/`](skills/research/) — Research prompt templates +- [`src/timmy/memory_system.py`](src/timmy/memory_system.py) — Semantic memory (`SemanticMemory`) +- [`src/timmy/paperclip.py`](src/timmy/paperclip.py) — ResearchOrchestrator +- [`src/timmy/kimi_delegation.py`](src/timmy/kimi_delegation.py) — Kimi delegation queue +- [`src/timmy/tools.py`](src/timmy/tools.py) — `web_fetch` tool + +--- + +*Last updated: 2026-03-23. Governing issue: #972.* diff --git a/src/timmy/memory_system.py b/src/timmy/memory_system.py index c3d2d14c..a94a510b 100644 --- a/src/timmy/memory_system.py +++ b/src/timmy/memory_system.py @@ -40,6 +40,7 @@ HOT_MEMORY_PATH = PROJECT_ROOT / "MEMORY.md" VAULT_PATH = PROJECT_ROOT / "memory" SOUL_PATH = VAULT_PATH / "self" / "soul.md" DB_PATH = PROJECT_ROOT / "data" / "memory.db" +RESEARCH_PATH = PROJECT_ROOT / "docs" / "research" # ─────────────────────────────────────────────────────────────────────────────── @@ -1143,6 +1144,95 @@ class SemanticMemory: return "\n\n".join(parts) if parts else "" + def store_research(self, topic: str, report: str) -> str: + """Store a research report in semantic memory. + + Args: + topic: Short description of the research topic (used as ID). + report: Full text of the research report. + + Returns: + The memory ID of the stored report. + """ + import re as _re + now = datetime.now(UTC).isoformat() + safe_id = _re.sub(r"[^a-z0-9_-]", "_", topic.lower())[:80] + entry_id = f"research_{safe_id}" + report_embedding = embed_text(report[:2000]) # Embed the opening for retrieval + + with self._get_conn() as conn: + conn.execute( + """INSERT OR REPLACE INTO memories + (id, content, memory_type, source, metadata, embedding, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?)""", + ( + entry_id, + report, + "research", + topic, + json.dumps({"topic": topic}), + json.dumps(report_embedding), + now, + ), + ) + conn.commit() + + logger.info("SemanticMemory: Stored research report '%s' (%d chars)", topic, len(report)) + return entry_id + + def search_research(self, topic: str, limit: int = 10) -> list[tuple[str, float]]: + """Search research reports by semantic similarity. + + Args: + topic: Natural-language query describing the research topic. + limit: Maximum number of results to return. + + Returns: + List of (content, confidence_score) tuples sorted by relevance descending. + """ + query_embedding = embed_text(topic) + + with self._get_conn() as conn: + rows = conn.execute( + "SELECT content, embedding FROM memories WHERE memory_type = 'research'" + ).fetchall() + + scored = [] + for row in rows: + if not row["embedding"]: + continue + emb = json.loads(row["embedding"]) + score = cosine_similarity(query_embedding, emb) + scored.append((row["content"], score)) + + scored.sort(key=lambda x: x[1], reverse=True) + return scored[:limit] + + def index_research_dir(self, research_path: Path | None = None) -> int: + """Index all markdown files in docs/research/ as research memory. + + Args: + research_path: Path to research directory; defaults to RESEARCH_PATH. + + Returns: + Number of research reports indexed. + """ + path = research_path or RESEARCH_PATH + if not path.exists(): + logger.warning("SemanticMemory: Research directory not found: %s", path) + return 0 + + count = 0 + for md_file in sorted(path.rglob("*.md")): + content = md_file.read_text(errors="replace") + # Use filename stem as topic + topic = md_file.stem.replace("-", " ").replace("_", " ") + self.store_research(topic, content) + count += 1 + + logger.info("SemanticMemory: Indexed %d research reports from %s", count, path) + return count + def stats(self) -> dict: """Get indexing statistics.""" with self._get_conn() as conn: @@ -1150,10 +1240,15 @@ class SemanticMemory: "SELECT COUNT(*), COUNT(DISTINCT source) FROM memories WHERE memory_type = 'vault_chunk'" ) total_chunks, total_files = cursor.fetchone() + cursor2 = conn.execute( + "SELECT COUNT(*) FROM memories WHERE memory_type = 'research'" + ) + research_count = cursor2.fetchone()[0] return { "total_chunks": total_chunks, "total_files": total_files, + "research_reports": research_count, "embedding_dim": EMBEDDING_DIM if _get_embedding_model() else 128, } diff --git a/src/timmy/semantic_memory.py b/src/timmy/semantic_memory.py index c7539d31..0f7ec6d6 100644 --- a/src/timmy/semantic_memory.py +++ b/src/timmy/semantic_memory.py @@ -4,6 +4,7 @@ from timmy.memory_system import ( DB_PATH, EMBEDDING_DIM, EMBEDDING_MODEL, + RESEARCH_PATH, MemoryChunk, MemoryEntry, MemorySearcher, @@ -24,6 +25,7 @@ __all__ = [ "DB_PATH", "EMBEDDING_DIM", "EMBEDDING_MODEL", + "RESEARCH_PATH", "MemoryChunk", "MemoryEntry", "MemorySearcher",