#!/usr/bin/env python3 """ Bootstrapper — assemble pre-session context from knowledge store. Reads the knowledge store and produces a compact context block (2k tokens max) that can be injected into a new session so it starts with situational awareness. Usage: python3 bootstrapper.py --repo the-nexus --agent mimo-sprint python3 bootstrapper.py --repo timmy-home --global python3 bootstrapper.py --global python3 bootstrapper.py --repo the-nexus --max-tokens 1000 """ import argparse import json import sys from pathlib import Path from typing import Optional # Resolve knowledge root relative to this script's parent SCRIPT_DIR = Path(__file__).resolve().parent REPO_ROOT = SCRIPT_DIR.parent KNOWLEDGE_DIR = REPO_ROOT / "knowledge" INDEX_PATH = KNOWLEDGE_DIR / "index.json" # Approximate token count: ~4 chars per token for English text CHARS_PER_TOKEN = 4 # Category sort priority (lower = shown first) CATEGORY_PRIORITY = { "pitfall": 0, "tool-quirk": 1, "pattern": 2, "fact": 3, "question": 4, } def load_index(index_path: Path = INDEX_PATH) -> dict: """Load and validate the knowledge index.""" if not index_path.exists(): return {"version": 1, "total_facts": 0, "facts": []} with open(index_path) as f: data = json.load(f) if "facts" not in data: print(f"WARNING: index.json missing 'facts' key", file=sys.stderr) return {"version": 1, "total_facts": 0, "facts": []} return data def filter_facts( facts: list[dict], repo: Optional[str] = None, agent: Optional[str] = None, include_global: bool = True, ) -> list[dict]: """Filter facts by repo, agent, and global scope.""" filtered = [] for fact in facts: fact_repo = fact.get("repo", "global") fact_agent = fact.get("agent", "") # Match by repo (regardless of agent) if repo and fact_repo == repo: filtered.append(fact) continue # Match by exact agent type if agent and fact_agent == agent: filtered.append(fact) continue # Include global facts without agent restriction (universal facts) if include_global and fact_repo == "global" and not fact_agent: filtered.append(fact) return filtered def sort_facts(facts: list[dict]) -> list[dict]: """ Sort facts by: confidence (desc), then category priority, then fact text. Most reliable and most dangerous facts come first. """ def sort_key(f): confidence = f.get("confidence", 0.5) category = f.get("category", "fact") cat_priority = CATEGORY_PRIORITY.get(category, 5) return (-confidence, cat_priority, f.get("fact", "")) return sorted(facts, key=sort_key) def load_repo_knowledge(repo: str) -> Optional[str]: """Load per-repo knowledge markdown if it exists.""" repo_path = KNOWLEDGE_DIR / "repos" / f"{repo}.md" if repo_path.exists(): return repo_path.read_text().strip() return None def load_agent_knowledge(agent: str) -> Optional[str]: """Load per-agent knowledge markdown if it exists.""" agent_path = KNOWLEDGE_DIR / "agents" / f"{agent}.md" if agent_path.exists(): return agent_path.read_text().strip() return None def load_global_knowledge() -> list[str]: """Load all global knowledge markdown files.""" global_dir = KNOWLEDGE_DIR / "global" if not global_dir.exists(): return [] chunks = [] for md_file in sorted(global_dir.glob("*.md")): content = md_file.read_text().strip() if content: chunks.append(content) return chunks def render_facts_section(facts: list[dict], category: str, label: str) -> str: """Render a section of facts for a single category.""" cat_facts = [f for f in facts if f.get("category") == category] if not cat_facts: return "" lines = [f"### {label}\n"] for f in cat_facts: conf = f.get("confidence", 0.5) fact_text = f.get("fact", "") repo_tag = f.get("repo", "") if repo_tag and repo_tag != "global": lines.append(f"- [{conf:.0%}] ({repo_tag}) {fact_text}") else: lines.append(f"- [{conf:.0%}] {fact_text}") return "\n".join(lines) + "\n" def estimate_tokens(text: str) -> int: """Rough token estimate.""" return len(text) // CHARS_PER_TOKEN def truncate_to_tokens(text: str, max_tokens: int) -> str: """Truncate text to approximately max_tokens, cutting at line boundaries.""" max_chars = max_tokens * CHARS_PER_TOKEN if len(text) <= max_chars: return text # Cut at last newline before the limit truncated = text[:max_chars] last_newline = truncated.rfind("\n") if last_newline > 0: truncated = truncated[:last_newline] return truncated + "\n\n[... truncated to fit context window ...]" def build_bootstrap_context( repo: Optional[str] = None, agent: Optional[str] = None, include_global: bool = True, max_tokens: int = 2000, index_path: Path = INDEX_PATH, ) -> str: """ Build the full bootstrap context block. Returns a markdown string suitable for injection into a session prompt. """ index = load_index(index_path) facts = index.get("facts", []) # Filter filtered = filter_facts(facts, repo=repo, agent=agent, include_global=include_global) # Sort sorted_facts = sort_facts(filtered) # Build sections sections = ["## What You Know (bootstrapped)\n"] # Per-repo markdown knowledge if repo: repo_md = load_repo_knowledge(repo) if repo_md: sections.append(f"### Repo Notes: {repo}\n") sections.append(repo_md + "\n") # Structured facts by category if sorted_facts: # Group by source repo_facts = [f for f in sorted_facts if f.get("repo") == repo] if repo else [] global_facts = [f for f in sorted_facts if f.get("repo") == "global"] agent_facts = [f for f in sorted_facts if f.get("agent") == agent] if agent else [] if repo_facts: sections.append(f"### Repo: {repo}\n") for cat, label in [ ("pitfall", "PITFALLS"), ("tool-quirk", "QUIRKS"), ("pattern", "PATTERNS"), ("fact", "FACTS"), ("question", "OPEN QUESTIONS"), ]: section = render_facts_section(repo_facts, cat, label) if section: sections.append(section) if global_facts: sections.append("### Global\n") for cat, label in [ ("pitfall", "PITFALLS"), ("tool-quirk", "QUIRKS"), ("pattern", "PATTERNS"), ("fact", "FACTS"), ]: section = render_facts_section(global_facts, cat, label) if section: sections.append(section) if agent_facts: sections.append(f"### Agent Notes ({agent})\n") for cat, label in [ ("pitfall", "PITFALLS"), ("tool-quirk", "QUIRKS"), ("pattern", "PATTERNS"), ("fact", "FACTS"), ]: section = render_facts_section(agent_facts, cat, label) if section: sections.append(section) # Per-agent markdown knowledge if agent: agent_md = load_agent_knowledge(agent) if agent_md: sections.append(f"### Agent Profile: {agent}\n") sections.append(agent_md + "\n") # Global markdown knowledge global_chunks = load_global_knowledge() if global_chunks: sections.append("### Global Notes\n") sections.extend(chunk + "\n" for chunk in global_chunks) # If nothing was found if len(sections) == 1: sections.append("_No relevant knowledge found. Starting fresh._\n") if not facts: sections.append( "_Knowledge store is empty. Run the harvester to populate it._\n" ) # Join and truncate context = "\n".join(sections) context = truncate_to_tokens(context, max_tokens) return context def main(): parser = argparse.ArgumentParser( description="Assemble pre-session context from knowledge store" ) parser.add_argument( "--repo", type=str, default=None, help="Repository name to filter facts by", ) parser.add_argument( "--agent", type=str, default=None, help="Agent type to filter facts by (e.g., mimo-sprint, groq-fast)", ) parser.add_argument( "--global", dest="include_global", action="store_true", default=True, help="Include global facts (default: true)", ) parser.add_argument( "--no-global", dest="include_global", action="store_false", help="Exclude global facts", ) parser.add_argument( "--max-tokens", type=int, default=2000, help="Maximum token count for output (default: 2000)", ) parser.add_argument( "--index", type=str, default=None, help="Path to index.json (default: knowledge/index.json)", ) parser.add_argument( "--json", dest="output_json", action="store_true", help="Output raw JSON instead of markdown", ) args = parser.parse_args() index_path = Path(args.index) if args.index else INDEX_PATH if args.output_json: # JSON mode: return the filtered, sorted facts index = load_index(index_path) facts = index.get("facts", []) filtered = filter_facts( facts, repo=args.repo, agent=args.agent, include_global=args.include_global, ) sorted_facts = sort_facts(filtered) output = { "repo": args.repo, "agent": args.agent, "include_global": args.include_global, "total_indexed": len(facts), "matched": len(sorted_facts), "facts": sorted_facts, } print(json.dumps(output, indent=2)) else: # Markdown mode: full bootstrap context context = build_bootstrap_context( repo=args.repo, agent=args.agent, include_global=args.include_global, max_tokens=args.max_tokens, index_path=index_path, ) print(context) return 0 if __name__ == "__main__": sys.exit(main())