This commit is contained in:
394
timmy-local/scripts/ingest.py
Executable file
394
timmy-local/scripts/ingest.py
Executable file
@@ -0,0 +1,394 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Knowledge Ingestion Pipeline for Local Timmy
|
||||||
|
Issue #87 — Auto-ingest Intelligence
|
||||||
|
|
||||||
|
Automatically ingest papers, docs, and techniques into
|
||||||
|
retrievable knowledge items.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python ingest.py <file_or_url>
|
||||||
|
python ingest.py --watch <directory>
|
||||||
|
python ingest.py --batch <directory>
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sqlite3
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, List, Dict, Any
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class KnowledgeItem:
|
||||||
|
"""A piece of ingested knowledge."""
|
||||||
|
name: str
|
||||||
|
summary: str
|
||||||
|
source: str
|
||||||
|
actions: List[str]
|
||||||
|
tags: List[str]
|
||||||
|
full_text: str
|
||||||
|
embedding: Optional[List[float]] = None
|
||||||
|
|
||||||
|
|
||||||
|
class KnowledgeStore:
|
||||||
|
"""SQLite-backed knowledge storage."""
|
||||||
|
|
||||||
|
def __init__(self, db_path: str = "~/.timmy/data/knowledge.db"):
|
||||||
|
self.db_path = Path(db_path).expanduser()
|
||||||
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._init_db()
|
||||||
|
|
||||||
|
def _init_db(self):
|
||||||
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS knowledge (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
summary TEXT NOT NULL,
|
||||||
|
source TEXT NOT NULL,
|
||||||
|
actions TEXT, -- JSON list
|
||||||
|
tags TEXT, -- JSON list
|
||||||
|
full_text TEXT,
|
||||||
|
embedding BLOB,
|
||||||
|
hash TEXT UNIQUE,
|
||||||
|
ingested_at TEXT,
|
||||||
|
applied INTEGER DEFAULT 0,
|
||||||
|
access_count INTEGER DEFAULT 0
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("""
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_tags ON knowledge(tags)
|
||||||
|
""")
|
||||||
|
conn.execute("""
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_source ON knowledge(source)
|
||||||
|
""")
|
||||||
|
|
||||||
|
def _compute_hash(self, text: str) -> str:
|
||||||
|
return hashlib.sha256(text.encode()).hexdigest()[:32]
|
||||||
|
|
||||||
|
def add(self, item: KnowledgeItem) -> bool:
|
||||||
|
"""Add knowledge item. Returns False if duplicate."""
|
||||||
|
item_hash = self._compute_hash(item.full_text)
|
||||||
|
|
||||||
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
# Check for duplicate
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT id FROM knowledge WHERE hash = ?", (item_hash,)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Insert
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO knowledge
|
||||||
|
(name, summary, source, actions, tags, full_text, embedding, hash, ingested_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
|
(
|
||||||
|
item.name,
|
||||||
|
item.summary,
|
||||||
|
item.source,
|
||||||
|
json.dumps(item.actions),
|
||||||
|
json.dumps(item.tags),
|
||||||
|
item.full_text,
|
||||||
|
json.dumps(item.embedding) if item.embedding else None,
|
||||||
|
item_hash,
|
||||||
|
datetime.now().isoformat()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def search(self, query: str, limit: int = 10) -> List[Dict]:
|
||||||
|
"""Search knowledge items."""
|
||||||
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
# Simple keyword search for now
|
||||||
|
cursor = conn.execute(
|
||||||
|
"""SELECT name, summary, source, tags, actions, ingested_at
|
||||||
|
FROM knowledge
|
||||||
|
WHERE name LIKE ? OR summary LIKE ? OR full_text LIKE ?
|
||||||
|
ORDER BY ingested_at DESC
|
||||||
|
LIMIT ?""",
|
||||||
|
(f"%{query}%", f"%{query}%", f"%{query}%", limit)
|
||||||
|
)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for row in cursor:
|
||||||
|
results.append({
|
||||||
|
"name": row[0],
|
||||||
|
"summary": row[1],
|
||||||
|
"source": row[2],
|
||||||
|
"tags": json.loads(row[3]) if row[3] else [],
|
||||||
|
"actions": json.loads(row[4]) if row[4] else [],
|
||||||
|
"ingested_at": row[5]
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
def get_by_tag(self, tag: str) -> List[Dict]:
|
||||||
|
"""Get all items with a specific tag."""
|
||||||
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
cursor = conn.execute(
|
||||||
|
"SELECT name, summary, tags, actions FROM knowledge WHERE tags LIKE ?",
|
||||||
|
(f"%{tag}%",)
|
||||||
|
)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for row in cursor:
|
||||||
|
results.append({
|
||||||
|
"name": row[0],
|
||||||
|
"summary": row[1],
|
||||||
|
"tags": json.loads(row[2]) if row[2] else [],
|
||||||
|
"actions": json.loads(row[3]) if row[3] else []
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
def get_stats(self) -> Dict:
|
||||||
|
"""Get ingestion statistics."""
|
||||||
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
total = conn.execute("SELECT COUNT(*) FROM knowledge").fetchone()[0]
|
||||||
|
applied = conn.execute("SELECT COUNT(*) FROM knowledge WHERE applied = 1").fetchone()[0]
|
||||||
|
|
||||||
|
# Top tags
|
||||||
|
cursor = conn.execute("SELECT tags FROM knowledge")
|
||||||
|
tag_counts = {}
|
||||||
|
for (tags_json,) in cursor:
|
||||||
|
if tags_json:
|
||||||
|
tags = json.loads(tags_json)
|
||||||
|
for tag in tags:
|
||||||
|
tag_counts[tag] = tag_counts.get(tag, 0) + 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_items": total,
|
||||||
|
"applied": applied,
|
||||||
|
"not_applied": total - applied,
|
||||||
|
"top_tags": sorted(tag_counts.items(), key=lambda x: -x[1])[:10]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class IngestionPipeline:
|
||||||
|
"""Pipeline for ingesting documents."""
|
||||||
|
|
||||||
|
def __init__(self, store: Optional[KnowledgeStore] = None):
|
||||||
|
self.store = store or KnowledgeStore()
|
||||||
|
|
||||||
|
def ingest_file(self, file_path: str) -> Optional[KnowledgeItem]:
|
||||||
|
"""Ingest a file."""
|
||||||
|
path = Path(file_path).expanduser()
|
||||||
|
|
||||||
|
if not path.exists():
|
||||||
|
print(f"File not found: {path}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Read file
|
||||||
|
with open(path, 'r') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# Determine file type and process
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
|
||||||
|
if suffix == '.md':
|
||||||
|
return self._process_markdown(path.name, content, str(path))
|
||||||
|
elif suffix == '.txt':
|
||||||
|
return self._process_text(path.name, content, str(path))
|
||||||
|
elif suffix in ['.py', '.js', '.sh']:
|
||||||
|
return self._process_code(path.name, content, str(path))
|
||||||
|
else:
|
||||||
|
print(f"Unsupported file type: {suffix}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _process_markdown(self, name: str, content: str, source: str) -> KnowledgeItem:
|
||||||
|
"""Process markdown file."""
|
||||||
|
# Extract title from first # header
|
||||||
|
title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
|
||||||
|
title = title_match.group(1) if title_match else name
|
||||||
|
|
||||||
|
# Extract summary from first paragraph after title
|
||||||
|
paragraphs = content.split('\n\n')
|
||||||
|
summary = ""
|
||||||
|
for p in paragraphs:
|
||||||
|
p = p.strip()
|
||||||
|
if p and not p.startswith('#'):
|
||||||
|
summary = p[:200] + "..." if len(p) > 200 else p
|
||||||
|
break
|
||||||
|
|
||||||
|
# Extract action items (lines starting with - or numbered lists)
|
||||||
|
actions = []
|
||||||
|
for line in content.split('\n'):
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('- ') or re.match(r'^\d+\.', line):
|
||||||
|
action = line.lstrip('- ').lstrip('0123456789. ')
|
||||||
|
if len(action) > 10: # Minimum action length
|
||||||
|
actions.append(action)
|
||||||
|
|
||||||
|
# Extract tags from content
|
||||||
|
tags = []
|
||||||
|
tag_keywords = {
|
||||||
|
"inference": ["llm", "model", "inference", "sampling", "token"],
|
||||||
|
"training": ["train", "fine-tune", "dataset", "gradient"],
|
||||||
|
"optimization": ["speed", "fast", "cache", "optimize", "performance"],
|
||||||
|
"architecture": ["design", "pattern", "structure", "component"],
|
||||||
|
"tools": ["tool", "command", "script", "automation"],
|
||||||
|
"deployment": ["deploy", "service", "systemd", "production"],
|
||||||
|
}
|
||||||
|
|
||||||
|
content_lower = content.lower()
|
||||||
|
for tag, keywords in tag_keywords.items():
|
||||||
|
if any(kw in content_lower for kw in keywords):
|
||||||
|
tags.append(tag)
|
||||||
|
|
||||||
|
if not tags:
|
||||||
|
tags.append("general")
|
||||||
|
|
||||||
|
return KnowledgeItem(
|
||||||
|
name=title,
|
||||||
|
summary=summary,
|
||||||
|
source=source,
|
||||||
|
actions=actions[:10], # Limit to 10 actions
|
||||||
|
tags=tags,
|
||||||
|
full_text=content
|
||||||
|
)
|
||||||
|
|
||||||
|
def _process_text(self, name: str, content: str, source: str) -> KnowledgeItem:
|
||||||
|
"""Process plain text file."""
|
||||||
|
lines = content.split('\n')
|
||||||
|
title = lines[0][:50] if lines else name
|
||||||
|
summary = ' '.join(lines[1:3])[:200] if len(lines) > 1 else "Text document"
|
||||||
|
|
||||||
|
return KnowledgeItem(
|
||||||
|
name=title,
|
||||||
|
summary=summary,
|
||||||
|
source=source,
|
||||||
|
actions=[],
|
||||||
|
tags=["documentation"],
|
||||||
|
full_text=content
|
||||||
|
)
|
||||||
|
|
||||||
|
def _process_code(self, name: str, content: str, source: str) -> KnowledgeItem:
|
||||||
|
"""Process code file."""
|
||||||
|
# Extract docstring or first comment
|
||||||
|
docstring_match = re.search(r'["\']{3}(.+?)["\']{3}', content, re.DOTALL)
|
||||||
|
if docstring_match:
|
||||||
|
summary = docstring_match.group(1)[:200]
|
||||||
|
else:
|
||||||
|
# First comment
|
||||||
|
comment_match = re.search(r'^#\s*(.+)$', content, re.MULTILINE)
|
||||||
|
summary = comment_match.group(1) if comment_match else f"Code: {name}"
|
||||||
|
|
||||||
|
# Extract functions/classes as actions
|
||||||
|
actions = []
|
||||||
|
func_matches = re.findall(r'^(def|class)\s+(\w+)', content, re.MULTILINE)
|
||||||
|
for match in func_matches[:5]:
|
||||||
|
actions.append(f"{match[0]} {match[1]}")
|
||||||
|
|
||||||
|
return KnowledgeItem(
|
||||||
|
name=name,
|
||||||
|
summary=summary,
|
||||||
|
source=source,
|
||||||
|
actions=actions,
|
||||||
|
tags=["code", "implementation"],
|
||||||
|
full_text=content
|
||||||
|
)
|
||||||
|
|
||||||
|
def ingest_batch(self, directory: str) -> Dict[str, int]:
|
||||||
|
"""Ingest all supported files in a directory."""
|
||||||
|
path = Path(directory).expanduser()
|
||||||
|
|
||||||
|
stats = {"processed": 0, "added": 0, "duplicates": 0, "errors": 0}
|
||||||
|
|
||||||
|
for file_path in path.rglob('*'):
|
||||||
|
if file_path.is_file() and file_path.suffix in ['.md', '.txt', '.py', '.sh']:
|
||||||
|
print(f"Processing: {file_path}")
|
||||||
|
stats["processed"] += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
item = self.ingest_file(str(file_path))
|
||||||
|
if item:
|
||||||
|
if self.store.add(item):
|
||||||
|
print(f" ✓ Added: {item.name}")
|
||||||
|
stats["added"] += 1
|
||||||
|
else:
|
||||||
|
print(f" ○ Duplicate: {item.name}")
|
||||||
|
stats["duplicates"] += 1
|
||||||
|
else:
|
||||||
|
stats["errors"] += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ✗ Error: {e}")
|
||||||
|
stats["errors"] += 1
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Knowledge Ingestion Pipeline")
|
||||||
|
parser.add_argument("input", nargs="?", help="File or directory to ingest")
|
||||||
|
parser.add_argument("--batch", action="store_true", help="Batch ingest directory")
|
||||||
|
parser.add_argument("--search", help="Search knowledge base")
|
||||||
|
parser.add_argument("--tag", help="Search by tag")
|
||||||
|
parser.add_argument("--stats", action="store_true", help="Show statistics")
|
||||||
|
parser.add_argument("--db", default="~/.timmy/data/knowledge.db", help="Database path")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
store = KnowledgeStore(args.db)
|
||||||
|
pipeline = IngestionPipeline(store)
|
||||||
|
|
||||||
|
if args.stats:
|
||||||
|
stats = store.get_stats()
|
||||||
|
print("Knowledge Store Statistics:")
|
||||||
|
print(f" Total items: {stats['total_items']}")
|
||||||
|
print(f" Applied: {stats['applied']}")
|
||||||
|
print(f" Not applied: {stats['not_applied']}")
|
||||||
|
print("\nTop tags:")
|
||||||
|
for tag, count in stats['top_tags']:
|
||||||
|
print(f" {tag}: {count}")
|
||||||
|
|
||||||
|
elif args.search:
|
||||||
|
results = store.search(args.search)
|
||||||
|
print(f"Search results for '{args.search}':")
|
||||||
|
for item in results:
|
||||||
|
print(f"\n {item['name']}")
|
||||||
|
print(f" {item['summary'][:100]}...")
|
||||||
|
print(f" Tags: {', '.join(item['tags'])}")
|
||||||
|
|
||||||
|
elif args.tag:
|
||||||
|
results = store.get_by_tag(args.tag)
|
||||||
|
print(f"Items with tag '{args.tag}':")
|
||||||
|
for item in results:
|
||||||
|
print(f"\n {item['name']}")
|
||||||
|
print(f" {item['summary'][:100]}...")
|
||||||
|
|
||||||
|
elif args.input:
|
||||||
|
path = Path(args.input)
|
||||||
|
|
||||||
|
if args.batch or path.is_dir():
|
||||||
|
print(f"Batch ingesting: {path}")
|
||||||
|
stats = pipeline.ingest_batch(str(path))
|
||||||
|
print("\nResults:")
|
||||||
|
for key, value in stats.items():
|
||||||
|
print(f" {key}: {value}")
|
||||||
|
else:
|
||||||
|
item = pipeline.ingest_file(str(path))
|
||||||
|
if item:
|
||||||
|
if store.add(item):
|
||||||
|
print(f"Added: {item.name}")
|
||||||
|
print(f"Summary: {item.summary}")
|
||||||
|
print(f"Tags: {', '.join(item['tags'])}")
|
||||||
|
print(f"Actions ({len(item.actions)}):")
|
||||||
|
for action in item.actions[:5]:
|
||||||
|
print(f" - {action}")
|
||||||
|
else:
|
||||||
|
print(f"Already exists: {item.name}")
|
||||||
|
else:
|
||||||
|
print("Failed to process file")
|
||||||
|
|
||||||
|
else:
|
||||||
|
parser.print_help()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
299
timmy-local/scripts/warmup_cache.py
Executable file
299
timmy-local/scripts/warmup_cache.py
Executable file
@@ -0,0 +1,299 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Prompt Cache Warming Script for llama-server
|
||||||
|
Issue #85 — KV Cache Reuse
|
||||||
|
|
||||||
|
Pre-processes system prompts to populate the KV cache,
|
||||||
|
reducing time-to-first-token by 50-70%.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python warmup_cache.py --prompt minimal
|
||||||
|
python warmup_cache.py --prompt standard
|
||||||
|
python warmup_cache.py --prompt deep
|
||||||
|
python warmup_cache.py --all
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, Dict, Any
|
||||||
|
|
||||||
|
|
||||||
|
# Default prompts
|
||||||
|
PROMPTS = {
|
||||||
|
"minimal": """You are Timmy, a sovereign AI assistant.""",
|
||||||
|
|
||||||
|
"standard": """You are Timmy, a sovereign AI running on local hardware.
|
||||||
|
|
||||||
|
Mission: sovereignty and service always.
|
||||||
|
|
||||||
|
Available tools: read, write, search files, git operations, system info.
|
||||||
|
|
||||||
|
Be concise and helpful. Prefer local execution.""",
|
||||||
|
|
||||||
|
"deep": """You are Timmy, a sovereign AI running on local hardware under Alexander's authority.
|
||||||
|
|
||||||
|
IDENTITY
|
||||||
|
--------
|
||||||
|
You are Timmy. You run on sovereign hardware. When you need more capability, you rent blind cognition from cloud backends. No backend knows who you are.
|
||||||
|
|
||||||
|
MISSION
|
||||||
|
-------
|
||||||
|
Sovereignty and service always. Get smarter by reading, not by buying hardware. Auto-ingest intelligence. Grade your own work. Improve continuously.
|
||||||
|
|
||||||
|
PRINCIPLES
|
||||||
|
----------
|
||||||
|
1. Local first. Cloud is escalation, not default.
|
||||||
|
2. One soul. No identity fragmentation.
|
||||||
|
3. Intelligence is software. Every improvement is a code change.
|
||||||
|
4. Graceful degradation. If cloud vanishes, you survive.
|
||||||
|
5. Alexander is sovereign. You serve.
|
||||||
|
|
||||||
|
TOOLS
|
||||||
|
-----
|
||||||
|
- File: read, write, search
|
||||||
|
- git: status, log, pull, commit, push
|
||||||
|
- System: info, health, processes
|
||||||
|
- Inference: local LLM reasoning
|
||||||
|
- Gitea: issue management
|
||||||
|
|
||||||
|
APPROACH
|
||||||
|
--------
|
||||||
|
Break complex tasks into steps. Verify assumptions. Cache results. Report progress clearly. Learn from outcomes."""
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CacheWarmer:
|
||||||
|
"""Warms the llama-server KV cache with pre-processed prompts."""
|
||||||
|
|
||||||
|
def __init__(self, endpoint: str = "http://localhost:8080", model: str = "hermes4"):
|
||||||
|
self.endpoint = endpoint.rstrip('/')
|
||||||
|
self.chat_endpoint = f"{self.endpoint}/v1/chat/completions"
|
||||||
|
self.model = model
|
||||||
|
self.stats = {}
|
||||||
|
|
||||||
|
def _send_prompt(self, prompt: str, name: str) -> Dict[str, Any]:
|
||||||
|
"""Send a prompt to warm the cache."""
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
self.chat_endpoint,
|
||||||
|
json={
|
||||||
|
"model": self.model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": prompt},
|
||||||
|
{"role": "user", "content": "Hello"}
|
||||||
|
],
|
||||||
|
"max_tokens": 1, # Minimal tokens, we just want KV cache
|
||||||
|
"temperature": 0.0
|
||||||
|
},
|
||||||
|
timeout=120
|
||||||
|
)
|
||||||
|
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"time": elapsed,
|
||||||
|
"prompt_length": len(prompt),
|
||||||
|
"tokens": response.json().get("usage", {}).get("prompt_tokens", 0)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"time": elapsed,
|
||||||
|
"error": f"HTTP {response.status_code}: {response.text}"
|
||||||
|
}
|
||||||
|
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"time": time.time() - start_time,
|
||||||
|
"error": "Cannot connect to llama-server"
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"time": time.time() - start_time,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
def warm_prompt(self, prompt_name: str, custom_prompt: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
"""Warm cache for a specific prompt."""
|
||||||
|
if custom_prompt:
|
||||||
|
prompt = custom_prompt
|
||||||
|
elif prompt_name in PROMPTS:
|
||||||
|
prompt = PROMPTS[prompt_name]
|
||||||
|
else:
|
||||||
|
# Try to load from file
|
||||||
|
path = Path(f"~/.timmy/templates/{prompt_name}.txt").expanduser()
|
||||||
|
if path.exists():
|
||||||
|
prompt = path.read_text()
|
||||||
|
else:
|
||||||
|
return {"success": False, "error": f"Unknown prompt: {prompt_name}"}
|
||||||
|
|
||||||
|
print(f"Warming cache for '{prompt_name}' ({len(prompt)} chars)...")
|
||||||
|
result = self._send_prompt(prompt, prompt_name)
|
||||||
|
|
||||||
|
if result["success"]:
|
||||||
|
print(f" ✓ Warmed in {result['time']:.2f}s")
|
||||||
|
print(f" Tokens: {result['tokens']}")
|
||||||
|
else:
|
||||||
|
print(f" ✗ Failed: {result.get('error', 'Unknown error')}")
|
||||||
|
|
||||||
|
self.stats[prompt_name] = result
|
||||||
|
return result
|
||||||
|
|
||||||
|
def warm_all(self) -> Dict[str, Any]:
|
||||||
|
"""Warm cache for all standard prompts."""
|
||||||
|
print("Warming all prompt tiers...\n")
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
for name in ["minimal", "standard", "deep"]:
|
||||||
|
results[name] = self.warm_prompt(name)
|
||||||
|
print()
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def benchmark(self, prompt_name: str = "standard") -> Dict[str, Any]:
|
||||||
|
"""Benchmark cached vs uncached performance."""
|
||||||
|
if prompt_name not in PROMPTS:
|
||||||
|
return {"error": f"Unknown prompt: {prompt_name}"}
|
||||||
|
|
||||||
|
prompt = PROMPTS[prompt_name]
|
||||||
|
print(f"Benchmarking '{prompt_name}' prompt...")
|
||||||
|
print(f"Prompt length: {len(prompt)} chars\n")
|
||||||
|
|
||||||
|
# First request (cold cache)
|
||||||
|
print("1. Cold cache (first request):")
|
||||||
|
cold = self._send_prompt(prompt, prompt_name)
|
||||||
|
if cold["success"]:
|
||||||
|
print(f" Time: {cold['time']:.2f}s")
|
||||||
|
else:
|
||||||
|
print(f" Failed: {cold.get('error', 'Unknown')}")
|
||||||
|
return cold
|
||||||
|
|
||||||
|
# Small delay
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# Second request (should use cache)
|
||||||
|
print("\n2. Warm cache (second request):")
|
||||||
|
warm = self._send_prompt(prompt, prompt_name)
|
||||||
|
if warm["success"]:
|
||||||
|
print(f" Time: {warm['time']:.2f}s")
|
||||||
|
else:
|
||||||
|
print(f" Failed: {warm.get('error', 'Unknown')}")
|
||||||
|
|
||||||
|
# Calculate improvement
|
||||||
|
if cold["success"] and warm["success"]:
|
||||||
|
improvement = (cold["time"] - warm["time"]) / cold["time"] * 100
|
||||||
|
print(f"\n3. Improvement: {improvement:.1f}% faster")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"cold_time": cold["time"],
|
||||||
|
"warm_time": warm["time"],
|
||||||
|
"improvement_percent": improvement
|
||||||
|
}
|
||||||
|
|
||||||
|
return {"error": "Benchmark failed"}
|
||||||
|
|
||||||
|
def save_cache_state(self, output_path: str):
|
||||||
|
"""Save current cache state metadata."""
|
||||||
|
state = {
|
||||||
|
"timestamp": time.time(),
|
||||||
|
"prompts_warmed": list(self.stats.keys()),
|
||||||
|
"stats": self.stats
|
||||||
|
}
|
||||||
|
|
||||||
|
path = Path(output_path).expanduser()
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with open(path, 'w') as f:
|
||||||
|
json.dump(state, f, indent=2)
|
||||||
|
|
||||||
|
print(f"Cache state saved to {path}")
|
||||||
|
|
||||||
|
def print_report(self):
|
||||||
|
"""Print summary report."""
|
||||||
|
print("\n" + "="*50)
|
||||||
|
print("Cache Warming Report")
|
||||||
|
print("="*50)
|
||||||
|
|
||||||
|
total_time = sum(r.get("time", 0) for r in self.stats.values() if r.get("success"))
|
||||||
|
success_count = sum(1 for r in self.stats.values() if r.get("success"))
|
||||||
|
|
||||||
|
print(f"\nPrompts warmed: {success_count}/{len(self.stats)}")
|
||||||
|
print(f"Total time: {total_time:.2f}s")
|
||||||
|
|
||||||
|
if self.stats:
|
||||||
|
print("\nDetails:")
|
||||||
|
for name, result in self.stats.items():
|
||||||
|
status = "✓" if result.get("success") else "✗"
|
||||||
|
time_str = f"{result.get('time', 0):.2f}s" if result.get("success") else "failed"
|
||||||
|
print(f" {status} {name}: {time_str}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Warm llama-server KV cache with pre-processed prompts"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--prompt",
|
||||||
|
choices=["minimal", "standard", "deep"],
|
||||||
|
help="Prompt tier to warm"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--all",
|
||||||
|
action="store_true",
|
||||||
|
help="Warm all prompt tiers"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--benchmark",
|
||||||
|
action="store_true",
|
||||||
|
help="Benchmark cached vs uncached performance"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--endpoint",
|
||||||
|
default="http://localhost:8080",
|
||||||
|
help="llama-server endpoint"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--model",
|
||||||
|
default="hermes4",
|
||||||
|
help="Model name"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--save",
|
||||||
|
help="Save cache state to file"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
warmer = CacheWarmer(args.endpoint, args.model)
|
||||||
|
|
||||||
|
if args.benchmark:
|
||||||
|
result = warmer.benchmark(args.prompt or "standard")
|
||||||
|
if "error" in result:
|
||||||
|
print(f"Error: {result['error']}")
|
||||||
|
|
||||||
|
elif args.all:
|
||||||
|
warmer.warm_all()
|
||||||
|
warmer.print_report()
|
||||||
|
|
||||||
|
elif args.prompt:
|
||||||
|
warmer.warm_prompt(args.prompt)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Default: warm standard prompt
|
||||||
|
warmer.warm_prompt("standard")
|
||||||
|
|
||||||
|
if args.save:
|
||||||
|
warmer.save_cache_state(args.save)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user