#!/usr/bin/env python3 """ Multi-Tier Caching Layer for Local Timmy Issue #103 — Cache Everywhere Provides: - Tier 1: KV Cache (prompt prefix caching) - Tier 2: Semantic Response Cache (full LLM responses) - Tier 3: Tool Result Cache (stable tool outputs) - Tier 4: Embedding Cache (RAG embeddings) - Tier 5: Template Cache (pre-compiled prompts) - Tier 6: HTTP Response Cache (API responses) """ import sqlite3 import hashlib import json import time import threading from typing import Optional, Any, Dict, List, Callable from dataclasses import dataclass, asdict from pathlib import Path import pickle import functools @dataclass class CacheStats: """Statistics for cache monitoring.""" hits: int = 0 misses: int = 0 evictions: int = 0 hit_rate: float = 0.0 def record_hit(self): self.hits += 1 self._update_rate() def record_miss(self): self.misses += 1 self._update_rate() def record_eviction(self): self.evictions += 1 def _update_rate(self): total = self.hits + self.misses if total > 0: self.hit_rate = self.hits / total class LRUCache: """In-memory LRU cache for hot path.""" def __init__(self, max_size: int = 1000): self.max_size = max_size self.cache: Dict[str, Any] = {} self.access_order: List[str] = [] self.lock = threading.RLock() def get(self, key: str) -> Optional[Any]: with self.lock: if key in self.cache: # Move to front (most recent) self.access_order.remove(key) self.access_order.append(key) return self.cache[key] return None def put(self, key: str, value: Any): with self.lock: if key in self.cache: self.access_order.remove(key) elif len(self.cache) >= self.max_size: # Evict oldest oldest = self.access_order.pop(0) del self.cache[oldest] self.cache[key] = value self.access_order.append(key) def invalidate(self, key: str): with self.lock: if key in self.cache: self.access_order.remove(key) del self.cache[key] def clear(self): with self.lock: self.cache.clear() self.access_order.clear() class ResponseCache: """Tier 2: Semantic Response Cache — full LLM responses.""" def __init__(self, db_path: str = "~/.timmy/cache/responses.db"): self.db_path = Path(db_path).expanduser() self.db_path.parent.mkdir(parents=True, exist_ok=True) self.stats = CacheStats() self.lru = LRUCache(max_size=100) self._init_db() def _init_db(self): with sqlite3.connect(self.db_path) as conn: conn.execute(""" CREATE TABLE IF NOT EXISTS responses ( prompt_hash TEXT PRIMARY KEY, response TEXT NOT NULL, created_at REAL NOT NULL, ttl INTEGER NOT NULL, access_count INTEGER DEFAULT 0, last_accessed REAL ) """) conn.execute(""" CREATE INDEX IF NOT EXISTS idx_accessed ON responses(last_accessed) """) def _hash_prompt(self, prompt: str) -> str: """Hash prompt after normalizing (removing timestamps, etc).""" # Normalize: lowercase, strip extra whitespace normalized = " ".join(prompt.lower().split()) return hashlib.sha256(normalized.encode()).hexdigest()[:32] def get(self, prompt: str, ttl: int = 3600) -> Optional[str]: """Get cached response if available and not expired.""" prompt_hash = self._hash_prompt(prompt) # Check LRU first cached = self.lru.get(prompt_hash) if cached: self.stats.record_hit() return cached # Check disk cache with sqlite3.connect(self.db_path) as conn: row = conn.execute( "SELECT response, created_at, ttl FROM responses WHERE prompt_hash = ?", (prompt_hash,) ).fetchone() if row: response, created_at, stored_ttl = row # Use minimum of requested and stored TTL effective_ttl = min(ttl, stored_ttl) if time.time() - created_at < effective_ttl: # Cache hit self.stats.record_hit() # Update access stats conn.execute( "UPDATE responses SET access_count = access_count + 1, last_accessed = ? WHERE prompt_hash = ?", (time.time(), prompt_hash) ) # Add to LRU self.lru.put(prompt_hash, response) return response else: # Expired conn.execute("DELETE FROM responses WHERE prompt_hash = ?", (prompt_hash,)) self.stats.record_eviction() self.stats.record_miss() return None def put(self, prompt: str, response: str, ttl: int = 3600): """Cache a response with TTL.""" prompt_hash = self._hash_prompt(prompt) # Add to LRU self.lru.put(prompt_hash, response) # Add to disk cache with sqlite3.connect(self.db_path) as conn: conn.execute( """INSERT OR REPLACE INTO responses (prompt_hash, response, created_at, ttl, last_accessed) VALUES (?, ?, ?, ?, ?)""", (prompt_hash, response, time.time(), ttl, time.time()) ) def invalidate_pattern(self, pattern: str): """Invalidate all cached responses matching pattern.""" with sqlite3.connect(self.db_path) as conn: conn.execute("DELETE FROM responses WHERE response LIKE ?", (f"%{pattern}%",)) def get_stats(self) -> Dict[str, Any]: """Get cache statistics.""" with sqlite3.connect(self.db_path) as conn: count = conn.execute("SELECT COUNT(*) FROM responses").fetchone()[0] total_accesses = conn.execute("SELECT SUM(access_count) FROM responses").fetchone()[0] or 0 return { "tier": "response_cache", "memory_entries": len(self.lru.cache), "disk_entries": count, "hits": self.stats.hits, "misses": self.stats.misses, "hit_rate": f"{self.stats.hit_rate:.1%}", "total_accesses": total_accesses } class ToolCache: """Tier 3: Tool Result Cache — stable tool outputs.""" # TTL configuration per tool type (seconds) TOOL_TTL = { "system_info": 60, "disk_usage": 120, "git_status": 30, "git_log": 300, "health_check": 60, "gitea_list_issues": 120, "file_read": 30, "process_list": 30, "service_status": 60, } # Tools that invalidate cache on write operations INVALIDATORS = { "git_commit": ["git_status", "git_log"], "git_pull": ["git_status", "git_log"], "file_write": ["file_read"], "gitea_create_issue": ["gitea_list_issues"], "gitea_comment": ["gitea_list_issues"], } def __init__(self, db_path: str = "~/.timmy/cache/tool_cache.db"): self.db_path = Path(db_path).expanduser() self.db_path.parent.mkdir(parents=True, exist_ok=True) self.stats = CacheStats() self.lru = LRUCache(max_size=500) self._init_db() def _init_db(self): with sqlite3.connect(self.db_path) as conn: conn.execute(""" CREATE TABLE IF NOT EXISTS tool_results ( tool_hash TEXT PRIMARY KEY, tool_name TEXT NOT NULL, params_hash TEXT NOT NULL, result TEXT NOT NULL, created_at REAL NOT NULL, ttl INTEGER NOT NULL ) """) conn.execute(""" CREATE INDEX IF NOT EXISTS idx_tool_name ON tool_results(tool_name) """) def _hash_call(self, tool_name: str, params: Dict) -> str: """Hash tool name and params for cache key.""" param_str = json.dumps(params, sort_keys=True) combined = f"{tool_name}:{param_str}" return hashlib.sha256(combined.encode()).hexdigest()[:32] def get(self, tool_name: str, params: Dict) -> Optional[Any]: """Get cached tool result if available.""" if tool_name not in self.TOOL_TTL: return None # Not cacheable tool_hash = self._hash_call(tool_name, params) # Check LRU cached = self.lru.get(tool_hash) if cached: self.stats.record_hit() return pickle.loads(cached) # Check disk with sqlite3.connect(self.db_path) as conn: row = conn.execute( "SELECT result, created_at, ttl FROM tool_results WHERE tool_hash = ?", (tool_hash,) ).fetchone() if row: result, created_at, ttl = row if time.time() - created_at < ttl: self.stats.record_hit() self.lru.put(tool_hash, result) return pickle.loads(result) else: conn.execute("DELETE FROM tool_results WHERE tool_hash = ?", (tool_hash,)) self.stats.record_eviction() self.stats.record_miss() return None def put(self, tool_name: str, params: Dict, result: Any): """Cache a tool result.""" if tool_name not in self.TOOL_TTL: return # Not cacheable ttl = self.TOOL_TTL[tool_name] tool_hash = self._hash_call(tool_name, params) params_hash = hashlib.sha256(json.dumps(params, sort_keys=True).encode()).hexdigest()[:16] # Add to LRU pickled = pickle.dumps(result) self.lru.put(tool_hash, pickled) # Add to disk with sqlite3.connect(self.db_path) as conn: conn.execute( """INSERT OR REPLACE INTO tool_results (tool_hash, tool_name, params_hash, result, created_at, ttl) VALUES (?, ?, ?, ?, ?, ?)""", (tool_hash, tool_name, params_hash, pickled, time.time(), ttl) ) def invalidate(self, tool_name: str): """Invalidate all cached results for a tool.""" with sqlite3.connect(self.db_path) as conn: conn.execute("DELETE FROM tool_results WHERE tool_name = ?", (tool_name,)) # Clear matching LRU entries # (simplified: clear all since LRU doesn't track tool names) self.lru.clear() def handle_invalidation(self, tool_name: str): """Handle cache invalidation after a write operation.""" if tool_name in self.INVALIDATORS: for dependent in self.INVALIDATORS[tool_name]: self.invalidate(dependent) def get_stats(self) -> Dict[str, Any]: """Get cache statistics.""" with sqlite3.connect(self.db_path) as conn: count = conn.execute("SELECT COUNT(*) FROM tool_results").fetchone()[0] by_tool = conn.execute( "SELECT tool_name, COUNT(*) FROM tool_results GROUP BY tool_name" ).fetchall() return { "tier": "tool_cache", "memory_entries": len(self.lru.cache), "disk_entries": count, "hits": self.stats.hits, "misses": self.stats.misses, "hit_rate": f"{self.stats.hit_rate:.1%}", "by_tool": dict(by_tool) } class EmbeddingCache: """Tier 4: Embedding Cache — for RAG pipeline (#93).""" def __init__(self, db_path: str = "~/.timmy/cache/embeddings.db"): self.db_path = Path(db_path).expanduser() self.db_path.parent.mkdir(parents=True, exist_ok=True) self.stats = CacheStats() self._init_db() def _init_db(self): with sqlite3.connect(self.db_path) as conn: conn.execute(""" CREATE TABLE IF NOT EXISTS embeddings ( file_path TEXT PRIMARY KEY, mtime REAL NOT NULL, embedding BLOB NOT NULL, model_name TEXT NOT NULL, created_at REAL NOT NULL ) """) def get(self, file_path: str, mtime: float, model_name: str) -> Optional[List[float]]: """Get embedding if file hasn't changed and model matches.""" with sqlite3.connect(self.db_path) as conn: row = conn.execute( "SELECT embedding, mtime, model_name FROM embeddings WHERE file_path = ?", (file_path,) ).fetchone() if row: embedding_blob, stored_mtime, stored_model = row if stored_mtime == mtime and stored_model == model_name: self.stats.record_hit() return pickle.loads(embedding_blob) self.stats.record_miss() return None def put(self, file_path: str, mtime: float, embedding: List[float], model_name: str): """Store embedding with file metadata.""" with sqlite3.connect(self.db_path) as conn: conn.execute( """INSERT OR REPLACE INTO embeddings (file_path, mtime, embedding, model_name, created_at) VALUES (?, ?, ?, ?, ?)""", (file_path, mtime, pickle.dumps(embedding), model_name, time.time()) ) def get_stats(self) -> Dict[str, Any]: """Get cache statistics.""" with sqlite3.connect(self.db_path) as conn: count = conn.execute("SELECT COUNT(*) FROM embeddings").fetchone()[0] models = conn.execute( "SELECT model_name, COUNT(*) FROM embeddings GROUP BY model_name" ).fetchall() return { "tier": "embedding_cache", "entries": count, "hits": self.stats.hits, "misses": self.stats.misses, "hit_rate": f"{self.stats.hit_rate:.1%}", "by_model": dict(models) } class TemplateCache: """Tier 5: Template Cache — pre-compiled prompts.""" def __init__(self): self.templates: Dict[str, str] = {} self.tokenized: Dict[str, Any] = {} # For tokenizer outputs self.stats = CacheStats() def load_template(self, name: str, path: str) -> str: """Load and cache a template file.""" if name not in self.templates: with open(path, 'r') as f: self.templates[name] = f.read() self.stats.record_miss() else: self.stats.record_hit() return self.templates[name] def get(self, name: str) -> Optional[str]: """Get cached template.""" if name in self.templates: self.stats.record_hit() return self.templates[name] self.stats.record_miss() return None def cache_tokenized(self, name: str, tokens: Any): """Cache tokenized version of template.""" self.tokenized[name] = tokens def get_tokenized(self, name: str) -> Optional[Any]: """Get cached tokenized template.""" return self.tokenized.get(name) def get_stats(self) -> Dict[str, Any]: """Get cache statistics.""" return { "tier": "template_cache", "templates_cached": len(self.templates), "tokenized_cached": len(self.tokenized), "hits": self.stats.hits, "misses": self.stats.misses, "hit_rate": f"{self.stats.hit_rate:.1%}" } class HTTPCache: """Tier 6: HTTP Response Cache — for API calls.""" def __init__(self, db_path: str = "~/.timmy/cache/http_cache.db"): self.db_path = Path(db_path).expanduser() self.db_path.parent.mkdir(parents=True, exist_ok=True) self.stats = CacheStats() self.lru = LRUCache(max_size=200) self._init_db() def _init_db(self): with sqlite3.connect(self.db_path) as conn: conn.execute(""" CREATE TABLE IF NOT EXISTS http_responses ( url_hash TEXT PRIMARY KEY, url TEXT NOT NULL, response TEXT NOT NULL, etag TEXT, last_modified TEXT, created_at REAL NOT NULL, ttl INTEGER NOT NULL ) """) def _hash_url(self, url: str) -> str: return hashlib.sha256(url.encode()).hexdigest()[:32] def get(self, url: str, ttl: int = 300) -> Optional[Dict]: """Get cached HTTP response.""" url_hash = self._hash_url(url) # Check LRU cached = self.lru.get(url_hash) if cached: self.stats.record_hit() return cached # Check disk with sqlite3.connect(self.db_path) as conn: row = conn.execute( "SELECT response, etag, last_modified, created_at, ttl FROM http_responses WHERE url_hash = ?", (url_hash,) ).fetchone() if row: response, etag, last_modified, created_at, stored_ttl = row effective_ttl = min(ttl, stored_ttl) if time.time() - created_at < effective_ttl: self.stats.record_hit() result = { "response": response, "etag": etag, "last_modified": last_modified } self.lru.put(url_hash, result) return result else: conn.execute("DELETE FROM http_responses WHERE url_hash = ?", (url_hash,)) self.stats.record_eviction() self.stats.record_miss() return None def put(self, url: str, response: str, etag: Optional[str] = None, last_modified: Optional[str] = None, ttl: int = 300): """Cache HTTP response.""" url_hash = self._hash_url(url) result = { "response": response, "etag": etag, "last_modified": last_modified } self.lru.put(url_hash, result) with sqlite3.connect(self.db_path) as conn: conn.execute( """INSERT OR REPLACE INTO http_responses (url_hash, url, response, etag, last_modified, created_at, ttl) VALUES (?, ?, ?, ?, ?, ?, ?)""", (url_hash, url, response, etag, last_modified, time.time(), ttl) ) def get_stats(self) -> Dict[str, Any]: """Get cache statistics.""" with sqlite3.connect(self.db_path) as conn: count = conn.execute("SELECT COUNT(*) FROM http_responses").fetchone()[0] return { "tier": "http_cache", "memory_entries": len(self.lru.cache), "disk_entries": count, "hits": self.stats.hits, "misses": self.stats.misses, "hit_rate": f"{self.stats.hit_rate:.1%}" } class CacheManager: """Central manager for all cache tiers.""" def __init__(self, base_path: str = "~/.timmy/cache"): self.base_path = Path(base_path).expanduser() self.base_path.mkdir(parents=True, exist_ok=True) # Initialize all tiers self.response = ResponseCache(self.base_path / "responses.db") self.tool = ToolCache(self.base_path / "tool_cache.db") self.embedding = EmbeddingCache(self.base_path / "embeddings.db") self.template = TemplateCache() self.http = HTTPCache(self.base_path / "http_cache.db") # KV cache handled by llama-server (external) def get_all_stats(self) -> Dict[str, Dict]: """Get statistics for all cache tiers.""" return { "response_cache": self.response.get_stats(), "tool_cache": self.tool.get_stats(), "embedding_cache": self.embedding.get_stats(), "template_cache": self.template.get_stats(), "http_cache": self.http.get_stats(), } def clear_all(self): """Clear all caches.""" self.response.lru.clear() self.tool.lru.clear() self.http.lru.clear() self.template.templates.clear() self.template.tokenized.clear() # Clear databases for db_file in self.base_path.glob("*.db"): with sqlite3.connect(db_file) as conn: cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") tables = cursor.fetchall() for (table,) in tables: conn.execute(f"DELETE FROM {table}") def cached_tool(self, ttl: Optional[int] = None): """Decorator for caching tool results.""" def decorator(func: Callable) -> Callable: @functools.wraps(func) def wrapper(*args, **kwargs): tool_name = func.__name__ params = {"args": args, "kwargs": kwargs} # Try cache cached = self.tool.get(tool_name, params) if cached is not None: return cached # Execute and cache result = func(*args, **kwargs) self.tool.put(tool_name, params, result) return result return wrapper return decorator # Singleton instance cache_manager = CacheManager() if __name__ == "__main__": # Test the cache print("Testing Timmy Cache Layer...") print() # Test response cache print("1. Response Cache:") cache_manager.response.put("What is 2+2?", "4", ttl=60) cached = cache_manager.response.get("What is 2+2?") print(f" Cached: {cached}") print(f" Stats: {cache_manager.response.get_stats()}") print() # Test tool cache print("2. Tool Cache:") cache_manager.tool.put("system_info", {}, {"cpu": "ARM64", "ram": "8GB"}) cached = cache_manager.tool.get("system_info", {}) print(f" Cached: {cached}") print(f" Stats: {cache_manager.tool.get_stats()}") print() # Test all stats print("3. All Cache Stats:") stats = cache_manager.get_all_stats() for tier, tier_stats in stats.items(): print(f" {tier}: {tier_stats}") print() print("✅ Cache layer operational")