[#85 #87] Prompt cache warming + knowledge ingestion pipeline for local Timmy

2026-03-30 16:56:15 +00:00
parent 748e8adb5e
commit 788879b0cb
2 changed files with 693 additions and 0 deletions
--- a/timmy-local/scripts/ingest.py
+++ b/timmy-local/scripts/ingest.py
@@ -0,0 +1,394 @@
 #!/usr/bin/env python3
 """
 Knowledge Ingestion Pipeline for Local Timmy
 Issue #87 — Auto-ingest Intelligence
 Automatically ingest papers, docs, and techniques into
 retrievable knowledge items.
 Usage:
  python ingest.py <file_or_url>
  python ingest.py --watch <directory>
  python ingest.py --batch <directory>
 """
 import argparse
 import sqlite3
 import hashlib
 import json
 import os
 import re
 from pathlib import Path
 from typing import Optional, List, Dict, Any
 from dataclasses import dataclass
 from datetime import datetime
@dataclass
 class KnowledgeItem:
    """A piece of ingested knowledge."""
    name: str
    summary: str
    source: str
    actions: List[str]
    tags: List[str]
    full_text: str
    embedding: Optional[List[float]] = None
 class KnowledgeStore:
    """SQLite-backed knowledge storage."""
    def __init__(self, db_path: str = "~/.timmy/data/knowledge.db"):
        self.db_path = Path(db_path).expanduser()
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        self._init_db()
    def _init_db(self):
        with sqlite3.connect(self.db_path) as conn:
            conn.execute("""
                CREATE TABLE IF NOT EXISTS knowledge (
                    id INTEGER PRIMARY KEY,
                    name TEXT NOT NULL,
                    summary TEXT NOT NULL,
                    source TEXT NOT NULL,
                    actions TEXT,  -- JSON list
                    tags TEXT,     -- JSON list
                    full_text TEXT,
                    embedding BLOB,
                    hash TEXT UNIQUE,
                    ingested_at TEXT,
                    applied INTEGER DEFAULT 0,
                    access_count INTEGER DEFAULT 0
                )
            """)
            conn.execute("""
                CREATE INDEX IF NOT EXISTS idx_tags ON knowledge(tags)
            """)
            conn.execute("""
                CREATE INDEX IF NOT EXISTS idx_source ON knowledge(source)
            """)
    def _compute_hash(self, text: str) -> str:
        return hashlib.sha256(text.encode()).hexdigest()[:32]
    def add(self, item: KnowledgeItem) -> bool:
        """Add knowledge item. Returns False if duplicate."""
        item_hash = self._compute_hash(item.full_text)
        with sqlite3.connect(self.db_path) as conn:
            # Check for duplicate
            existing = conn.execute(
                "SELECT id FROM knowledge WHERE hash = ?", (item_hash,)
            ).fetchone()
            if existing:
                return False
            # Insert
            conn.execute(
                """INSERT INTO knowledge
                   (name, summary, source, actions, tags, full_text, embedding, hash, ingested_at)
                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    item.name,
                    item.summary,
                    item.source,
                    json.dumps(item.actions),
                    json.dumps(item.tags),
                    item.full_text,
                    json.dumps(item.embedding) if item.embedding else None,
                    item_hash,
                    datetime.now().isoformat()
                )
            )
            return True
    def search(self, query: str, limit: int = 10) -> List[Dict]:
        """Search knowledge items."""
        with sqlite3.connect(self.db_path) as conn:
            # Simple keyword search for now
            cursor = conn.execute(
                """SELECT name, summary, source, tags, actions, ingested_at 
                   FROM knowledge 
                   WHERE name LIKE ? OR summary LIKE ? OR full_text LIKE ?
                   ORDER BY ingested_at DESC
                   LIMIT ?""",
                (f"%{query}%", f"%{query}%", f"%{query}%", limit)
            )
            results = []
            for row in cursor:
                results.append({
                    "name": row[0],
                    "summary": row[1],
                    "source": row[2],
                    "tags": json.loads(row[3]) if row[3] else [],
                    "actions": json.loads(row[4]) if row[4] else [],
                    "ingested_at": row[5]
                })
            return results
    def get_by_tag(self, tag: str) -> List[Dict]:
        """Get all items with a specific tag."""
        with sqlite3.connect(self.db_path) as conn:
            cursor = conn.execute(
                "SELECT name, summary, tags, actions FROM knowledge WHERE tags LIKE ?",
                (f"%{tag}%",)
            )
            results = []
            for row in cursor:
                results.append({
                    "name": row[0],
                    "summary": row[1],
                    "tags": json.loads(row[2]) if row[2] else [],
                    "actions": json.loads(row[3]) if row[3] else []
                })
            return results
    def get_stats(self) -> Dict:
        """Get ingestion statistics."""
        with sqlite3.connect(self.db_path) as conn:
            total = conn.execute("SELECT COUNT(*) FROM knowledge").fetchone()[0]
            applied = conn.execute("SELECT COUNT(*) FROM knowledge WHERE applied = 1").fetchone()[0]
            # Top tags
            cursor = conn.execute("SELECT tags FROM knowledge")
            tag_counts = {}
            for (tags_json,) in cursor:
                if tags_json:
                    tags = json.loads(tags_json)
                    for tag in tags:
                        tag_counts[tag] = tag_counts.get(tag, 0) + 1
            return {
                "total_items": total,
                "applied": applied,
                "not_applied": total - applied,
                "top_tags": sorted(tag_counts.items(), key=lambda x: -x[1])[:10]
            }
 class IngestionPipeline:
    """Pipeline for ingesting documents."""
    def __init__(self, store: Optional[KnowledgeStore] = None):
        self.store = store or KnowledgeStore()
    def ingest_file(self, file_path: str) -> Optional[KnowledgeItem]:
        """Ingest a file."""
        path = Path(file_path).expanduser()
        if not path.exists():
            print(f"File not found: {path}")
            return None
        # Read file
        with open(path, 'r') as f:
            content = f.read()
        # Determine file type and process
        suffix = path.suffix.lower()
        if suffix == '.md':
            return self._process_markdown(path.name, content, str(path))
        elif suffix == '.txt':
            return self._process_text(path.name, content, str(path))
        elif suffix in ['.py', '.js', '.sh']:
            return self._process_code(path.name, content, str(path))
        else:
            print(f"Unsupported file type: {suffix}")
            return None
    def _process_markdown(self, name: str, content: str, source: str) -> KnowledgeItem:
        """Process markdown file."""
        # Extract title from first # header
        title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
        title = title_match.group(1) if title_match else name
        # Extract summary from first paragraph after title
        paragraphs = content.split('\n\n')
        summary = ""
        for p in paragraphs:
            p = p.strip()
            if p and not p.startswith('#'):
                summary = p[:200] + "..." if len(p) > 200 else p
                break
        # Extract action items (lines starting with - or numbered lists)
        actions = []
        for line in content.split('\n'):
            line = line.strip()
            if line.startswith('- ') or re.match(r'^\d+\.', line):
                action = line.lstrip('- ').lstrip('0123456789. ')
                if len(action) > 10:  # Minimum action length
                    actions.append(action)
        # Extract tags from content
        tags = []
        tag_keywords = {
            "inference": ["llm", "model", "inference", "sampling", "token"],
            "training": ["train", "fine-tune", "dataset", "gradient"],
            "optimization": ["speed", "fast", "cache", "optimize", "performance"],
            "architecture": ["design", "pattern", "structure", "component"],
            "tools": ["tool", "command", "script", "automation"],
            "deployment": ["deploy", "service", "systemd", "production"],
        }
        content_lower = content.lower()
        for tag, keywords in tag_keywords.items():
            if any(kw in content_lower for kw in keywords):
                tags.append(tag)
        if not tags:
            tags.append("general")
        return KnowledgeItem(
            name=title,
            summary=summary,
            source=source,
            actions=actions[:10],  # Limit to 10 actions
            tags=tags,
            full_text=content
        )
    def _process_text(self, name: str, content: str, source: str) -> KnowledgeItem:
        """Process plain text file."""
        lines = content.split('\n')
        title = lines[0][:50] if lines else name
        summary = ' '.join(lines[1:3])[:200] if len(lines) > 1 else "Text document"
        return KnowledgeItem(
            name=title,
            summary=summary,
            source=source,
            actions=[],
            tags=["documentation"],
            full_text=content
        )
    def _process_code(self, name: str, content: str, source: str) -> KnowledgeItem:
        """Process code file."""
        # Extract docstring or first comment
        docstring_match = re.search(r'["\']{3}(.+?)["\']{3}', content, re.DOTALL)
        if docstring_match:
            summary = docstring_match.group(1)[:200]
        else:
            # First comment
            comment_match = re.search(r'^#\s*(.+)$', content, re.MULTILINE)
            summary = comment_match.group(1) if comment_match else f"Code: {name}"
        # Extract functions/classes as actions
        actions = []
        func_matches = re.findall(r'^(def|class)\s+(\w+)', content, re.MULTILINE)
        for match in func_matches[:5]:
            actions.append(f"{match[0]} {match[1]}")
        return KnowledgeItem(
            name=name,
            summary=summary,
            source=source,
            actions=actions,
            tags=["code", "implementation"],
            full_text=content
        )
    def ingest_batch(self, directory: str) -> Dict[str, int]:
        """Ingest all supported files in a directory."""
        path = Path(directory).expanduser()
        stats = {"processed": 0, "added": 0, "duplicates": 0, "errors": 0}
        for file_path in path.rglob('*'):
            if file_path.is_file() and file_path.suffix in ['.md', '.txt', '.py', '.sh']:
                print(f"Processing: {file_path}")
                stats["processed"] += 1
                try:
                    item = self.ingest_file(str(file_path))
                    if item:
                        if self.store.add(item):
                            print(f"  ✓ Added: {item.name}")
                            stats["added"] += 1
                        else:
                            print(f"  ○ Duplicate: {item.name}")
                            stats["duplicates"] += 1
                    else:
                        stats["errors"] += 1
                except Exception as e:
                    print(f"  ✗ Error: {e}")
                    stats["errors"] += 1
        return stats
 def main():
    parser = argparse.ArgumentParser(description="Knowledge Ingestion Pipeline")
    parser.add_argument("input", nargs="?", help="File or directory to ingest")
    parser.add_argument("--batch", action="store_true", help="Batch ingest directory")
    parser.add_argument("--search", help="Search knowledge base")
    parser.add_argument("--tag", help="Search by tag")
    parser.add_argument("--stats", action="store_true", help="Show statistics")
    parser.add_argument("--db", default="~/.timmy/data/knowledge.db", help="Database path")
    args = parser.parse_args()
    store = KnowledgeStore(args.db)
    pipeline = IngestionPipeline(store)
    if args.stats:
        stats = store.get_stats()
        print("Knowledge Store Statistics:")
        print(f"  Total items: {stats['total_items']}")
        print(f"  Applied: {stats['applied']}")
        print(f"  Not applied: {stats['not_applied']}")
        print("\nTop tags:")
        for tag, count in stats['top_tags']:
            print(f"  {tag}: {count}")
    elif args.search:
        results = store.search(args.search)
        print(f"Search results for '{args.search}':")
        for item in results:
            print(f"\n  {item['name']}")
            print(f"    {item['summary'][:100]}...")
            print(f"    Tags: {', '.join(item['tags'])}")
    elif args.tag:
        results = store.get_by_tag(args.tag)
        print(f"Items with tag '{args.tag}':")
        for item in results:
            print(f"\n  {item['name']}")
            print(f"    {item['summary'][:100]}...")
    elif args.input:
        path = Path(args.input)
        if args.batch or path.is_dir():
            print(f"Batch ingesting: {path}")
            stats = pipeline.ingest_batch(str(path))
            print("\nResults:")
            for key, value in stats.items():
                print(f"  {key}: {value}")
        else:
            item = pipeline.ingest_file(str(path))
            if item:
                if store.add(item):
                    print(f"Added: {item.name}")
                    print(f"Summary: {item.summary}")
                    print(f"Tags: {', '.join(item['tags'])}")
                    print(f"Actions ({len(item.actions)}):")
                    for action in item.actions[:5]:
                        print(f"  - {action}")
                else:
                    print(f"Already exists: {item.name}")
            else:
                print("Failed to process file")
    else:
        parser.print_help()
 if __name__ == "__main__":
    main()
--- a/timmy-local/scripts/warmup_cache.py
+++ b/timmy-local/scripts/warmup_cache.py
@@ -0,0 +1,299 @@
 #!/usr/bin/env python3
 """
 Prompt Cache Warming Script for llama-server
 Issue #85 — KV Cache Reuse
 Pre-processes system prompts to populate the KV cache,
 reducing time-to-first-token by 50-70%.
 Usage:
  python warmup_cache.py --prompt minimal
  python warmup_cache.py --prompt standard
  python warmup_cache.py --prompt deep
  python warmup_cache.py --all
 """
 import argparse
 import json
 import requests
 import time
 from pathlib import Path
 from typing import Optional, Dict, Any
 # Default prompts
 PROMPTS = {
    "minimal": """You are Timmy, a sovereign AI assistant.""",
    "standard": """You are Timmy, a sovereign AI running on local hardware.
 Mission: sovereignty and service always.
 Available tools: read, write, search files, git operations, system info.
 Be concise and helpful. Prefer local execution.""",
    "deep": """You are Timmy, a sovereign AI running on local hardware under Alexander's authority.
 IDENTITY
 --------
 You are Timmy. You run on sovereign hardware. When you need more capability, you rent blind cognition from cloud backends. No backend knows who you are.
 MISSION
 -------
 Sovereignty and service always. Get smarter by reading, not by buying hardware. Auto-ingest intelligence. Grade your own work. Improve continuously.
 PRINCIPLES
 ----------
 1. Local first. Cloud is escalation, not default.
 2. One soul. No identity fragmentation.
 3. Intelligence is software. Every improvement is a code change.
 4. Graceful degradation. If cloud vanishes, you survive.
 5. Alexander is sovereign. You serve.
 TOOLS
 -----
 - File: read, write, search
 - git: status, log, pull, commit, push
 - System: info, health, processes
 - Inference: local LLM reasoning
 - Gitea: issue management
 APPROACH
 --------
 Break complex tasks into steps. Verify assumptions. Cache results. Report progress clearly. Learn from outcomes."""
 }
 class CacheWarmer:
    """Warms the llama-server KV cache with pre-processed prompts."""
    def __init__(self, endpoint: str = "http://localhost:8080", model: str = "hermes4"):
        self.endpoint = endpoint.rstrip('/')
        self.chat_endpoint = f"{self.endpoint}/v1/chat/completions"
        self.model = model
        self.stats = {}
    def _send_prompt(self, prompt: str, name: str) -> Dict[str, Any]:
        """Send a prompt to warm the cache."""
        start_time = time.time()
        try:
            response = requests.post(
                self.chat_endpoint,
                json={
                    "model": self.model,
                    "messages": [
                        {"role": "system", "content": prompt},
                        {"role": "user", "content": "Hello"}
                    ],
                    "max_tokens": 1,  # Minimal tokens, we just want KV cache
                    "temperature": 0.0
                },
                timeout=120
            )
            elapsed = time.time() - start_time
            if response.status_code == 200:
                return {
                    "success": True,
                    "time": elapsed,
                    "prompt_length": len(prompt),
                    "tokens": response.json().get("usage", {}).get("prompt_tokens", 0)
                }
            else:
                return {
                    "success": False,
                    "time": elapsed,
                    "error": f"HTTP {response.status_code}: {response.text}"
                }
        except requests.exceptions.ConnectionError:
            return {
                "success": False,
                "time": time.time() - start_time,
                "error": "Cannot connect to llama-server"
            }
        except Exception as e:
            return {
                "success": False,
                "time": time.time() - start_time,
                "error": str(e)
            }
    def warm_prompt(self, prompt_name: str, custom_prompt: Optional[str] = None) -> Dict[str, Any]:
        """Warm cache for a specific prompt."""
        if custom_prompt:
            prompt = custom_prompt
        elif prompt_name in PROMPTS:
            prompt = PROMPTS[prompt_name]
        else:
            # Try to load from file
            path = Path(f"~/.timmy/templates/{prompt_name}.txt").expanduser()
            if path.exists():
                prompt = path.read_text()
            else:
                return {"success": False, "error": f"Unknown prompt: {prompt_name}"}
        print(f"Warming cache for '{prompt_name}' ({len(prompt)} chars)...")
        result = self._send_prompt(prompt, prompt_name)
        if result["success"]:
            print(f"  ✓ Warmed in {result['time']:.2f}s")
            print(f"  Tokens: {result['tokens']}")
        else:
            print(f"  ✗ Failed: {result.get('error', 'Unknown error')}")
        self.stats[prompt_name] = result
        return result
    def warm_all(self) -> Dict[str, Any]:
        """Warm cache for all standard prompts."""
        print("Warming all prompt tiers...\n")
        results = {}
        for name in ["minimal", "standard", "deep"]:
            results[name] = self.warm_prompt(name)
            print()
        return results
    def benchmark(self, prompt_name: str = "standard") -> Dict[str, Any]:
        """Benchmark cached vs uncached performance."""
        if prompt_name not in PROMPTS:
            return {"error": f"Unknown prompt: {prompt_name}"}
        prompt = PROMPTS[prompt_name]
        print(f"Benchmarking '{prompt_name}' prompt...")
        print(f"Prompt length: {len(prompt)} chars\n")
        # First request (cold cache)
        print("1. Cold cache (first request):")
        cold = self._send_prompt(prompt, prompt_name)
        if cold["success"]:
            print(f"   Time: {cold['time']:.2f}s")
        else:
            print(f"   Failed: {cold.get('error', 'Unknown')}")
            return cold
        # Small delay
        time.sleep(0.5)
        # Second request (should use cache)
        print("\n2. Warm cache (second request):")
        warm = self._send_prompt(prompt, prompt_name)
        if warm["success"]:
            print(f"   Time: {warm['time']:.2f}s")
        else:
            print(f"   Failed: {warm.get('error', 'Unknown')}")
        # Calculate improvement
        if cold["success"] and warm["success"]:
            improvement = (cold["time"] - warm["time"]) / cold["time"] * 100
            print(f"\n3. Improvement: {improvement:.1f}% faster")
            return {
                "cold_time": cold["time"],
                "warm_time": warm["time"],
                "improvement_percent": improvement
            }
        return {"error": "Benchmark failed"}
    def save_cache_state(self, output_path: str):
        """Save current cache state metadata."""
        state = {
            "timestamp": time.time(),
            "prompts_warmed": list(self.stats.keys()),
            "stats": self.stats
        }
        path = Path(output_path).expanduser()
        path.parent.mkdir(parents=True, exist_ok=True)
        with open(path, 'w') as f:
            json.dump(state, f, indent=2)
        print(f"Cache state saved to {path}")
    def print_report(self):
        """Print summary report."""
        print("\n" + "="*50)
        print("Cache Warming Report")
        print("="*50)
        total_time = sum(r.get("time", 0) for r in self.stats.values() if r.get("success"))
        success_count = sum(1 for r in self.stats.values() if r.get("success"))
        print(f"\nPrompts warmed: {success_count}/{len(self.stats)}")
        print(f"Total time: {total_time:.2f}s")
        if self.stats:
            print("\nDetails:")
            for name, result in self.stats.items():
                status = "✓" if result.get("success") else "✗"
                time_str = f"{result.get('time', 0):.2f}s" if result.get("success") else "failed"
                print(f"  {status} {name}: {time_str}")
 def main():
    parser = argparse.ArgumentParser(
        description="Warm llama-server KV cache with pre-processed prompts"
    )
    parser.add_argument(
        "--prompt",
        choices=["minimal", "standard", "deep"],
        help="Prompt tier to warm"
    )
    parser.add_argument(
        "--all",
        action="store_true",
        help="Warm all prompt tiers"
    )
    parser.add_argument(
        "--benchmark",
        action="store_true",
        help="Benchmark cached vs uncached performance"
    )
    parser.add_argument(
        "--endpoint",
        default="http://localhost:8080",
        help="llama-server endpoint"
    )
    parser.add_argument(
        "--model",
        default="hermes4",
        help="Model name"
    )
    parser.add_argument(
        "--save",
        help="Save cache state to file"
    )
    args = parser.parse_args()
    warmer = CacheWarmer(args.endpoint, args.model)
    if args.benchmark:
        result = warmer.benchmark(args.prompt or "standard")
        if "error" in result:
            print(f"Error: {result['error']}")
    elif args.all:
        warmer.warm_all()
        warmer.print_report()
    elif args.prompt:
        warmer.warm_prompt(args.prompt)
    else:
        # Default: warm standard prompt
        warmer.warm_prompt("standard")
    if args.save:
        warmer.save_cache_state(args.save)
 if __name__ == "__main__":
    main()