Fine-tune Timmy's conversational AI with memory layers

## Enhanced System Prompt - Detailed tool usage guidelines with explicit examples - Clear DO and DON'T examples for tool selection - Memory system documentation - Conversation flow guidelines - Context awareness instructions ## Memory Layer System (NEW) Implemented 3-layer memory architecture: 1. WORKING MEMORY (src/timmy/memory_layers.py) - Immediate context (last 20 messages) - Topic tracking - Tool call tracking - Fast, ephemeral 2. SHORT-TERM MEMORY (Agno SQLite) - Recent conversations (100) - Persists across restarts - Managed by Agno Agent 3. LONG-TERM MEMORY (src/timmy/memory_layers.py) - Facts about user (name, preferences) - SQLite storage in data/memory/ - Auto-extraction from conversations - User profile generation ## Memory Manager (NEW) - Central coordinator for all memory layers - Context injection into prompts - Fact extraction and storage - Session management ## TimmyWithMemory Class (NEW) - Wrapper around Agno Agent with explicit memory - Auto-injects user context from LTM - Tracks exchanges across all layers - Simple chat() interface ## Agent Configuration - Increased num_history_runs: 10 -> 20 - Better conversational context retention ## Tests - All 973 tests pass - Fixed test expectations for new config - Fixed module path in test_scary_paths.py ## Files Added/Modified - src/timmy/prompts.py - Enhanced with memory and tool guidance - src/timmy/agent.py - Added TimmyWithMemory class - src/timmy/memory_layers.py - NEW memory system - src/timmy/conversation.py - NEW conversation manager - tests/ - Updated for new config
2026-02-25 18:07:44 -05:00
parent c18da7bce8
commit 625806daf5
6 changed files with 735 additions and 28 deletions
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -1,3 +1,11 @@
+"""Timmy agent creation with multi-layer memory system.
+
+Integrates Agno's Agent with our custom memory layers:
+- Working Memory (immediate context)
+- Short-term Memory (Agno SQLite)  
+- Long-term Memory (facts/preferences)
+"""
+
 from typing import TYPE_CHECKING, Union

 from agno.agent import Agent
@@ -72,8 +80,64 @@ def create_timmy(
        db=SqliteDb(db_file=db_file),
        description=TIMMY_SYSTEM_PROMPT,
        add_history_to_context=True,
-        num_history_runs=10,
+        num_history_runs=20,  # Increased for better conversational context
        markdown=True,
        tools=[tools] if tools else None,
        telemetry=settings.telemetry_enabled,
    )
+
+
+class TimmyWithMemory:
+    """Timmy wrapper with explicit memory layer management.
+    
+    This class wraps the Agno Agent and adds:
+    - Working memory tracking
+    - Long-term memory storage/retrieval
+    - Context injection from memory layers
+    """
+    
+    def __init__(self, db_file: str = "timmy.db") -> None:
+        from timmy.memory_layers import memory_manager
+        
+        self.agent = create_timmy(db_file=db_file)
+        self.memory = memory_manager
+        self.memory.start_session()
+        
+        # Inject user context if available
+        self._inject_context()
+    
+    def _inject_context(self) -> None:
+        """Inject relevant memory context into system prompt."""
+        context = self.memory.get_context_for_prompt()
+        if context:
+            # Append context to system prompt
+            original_description = self.agent.description
+            self.agent.description = f"{original_description}\n\n## User Context\n{context}"
+    
+    def run(self, message: str, stream: bool = False) -> object:
+        """Run with memory tracking."""
+        # Get relevant memories
+        relevant = self.memory.get_relevant_memories(message)
+        
+        # Enhance message with context if relevant
+        enhanced_message = message
+        if relevant:
+            context_str = "\n".join(f"- {r}" for r in relevant[:3])
+            enhanced_message = f"[Context: {context_str}]\n\n{message}"
+        
+        # Run agent
+        result = self.agent.run(enhanced_message, stream=stream)
+        
+        # Extract response content
+        response_text = result.content if hasattr(result, "content") else str(result)
+        
+        # Track in memory
+        tool_calls = getattr(result, "tool_calls", None)
+        self.memory.add_exchange(message, response_text, tool_calls)
+        
+        return result
+    
+    def chat(self, message: str) -> str:
+        """Simple chat interface that returns string response."""
+        result = self.run(message, stream=False)
+        return result.content if hasattr(result, "content") else str(result)
--- a/src/timmy/conversation.py
+++ b/src/timmy/conversation.py
@@ -0,0 +1,137 @@
+"""Conversation context management for Timmy.
+
+Tracks conversation state, intent, and context to improve:
+- Contextual understanding across multi-turn conversations
+- Smarter tool usage decisions
+- Natural reference to prior exchanges
+"""
+
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ConversationContext:
+    """Tracks the current conversation state."""
+    user_name: Optional[str] = None
+    current_topic: Optional[str] = None
+    last_intent: Optional[str] = None
+    turn_count: int = 0
+    started_at: datetime = field(default_factory=datetime.now)
+    
+    def update_topic(self, topic: str) -> None:
+        """Update the current conversation topic."""
+        self.current_topic = topic
+        self.turn_count += 1
+    
+    def set_user_name(self, name: str) -> None:
+        """Remember the user's name."""
+        self.user_name = name
+        logger.info("User name set to: %s", name)
+    
+    def get_context_summary(self) -> str:
+        """Generate a context summary for the prompt."""
+        parts = []
+        if self.user_name:
+            parts.append(f"User's name is {self.user_name}")
+        if self.current_topic:
+            parts.append(f"Current topic: {self.current_topic}")
+        if self.turn_count > 0:
+            parts.append(f"Conversation turn: {self.turn_count}")
+        return " | ".join(parts) if parts else ""
+
+
+class ConversationManager:
+    """Manages conversation context across sessions."""
+    
+    def __init__(self) -> None:
+        self._contexts: dict[str, ConversationContext] = {}
+    
+    def get_context(self, session_id: str) -> ConversationContext:
+        """Get or create context for a session."""
+        if session_id not in self._contexts:
+            self._contexts[session_id] = ConversationContext()
+        return self._contexts[session_id]
+    
+    def clear_context(self, session_id: str) -> None:
+        """Clear context for a session."""
+        if session_id in self._contexts:
+            del self._contexts[session_id]
+    
+    def extract_user_name(self, message: str) -> Optional[str]:
+        """Try to extract user's name from message."""
+        message_lower = message.lower()
+        
+        # Common patterns
+        patterns = [
+            "my name is ",
+            "i'm ",
+            "i am ",
+            "call me ",
+        ]
+        
+        for pattern in patterns:
+            if pattern in message_lower:
+                idx = message_lower.find(pattern) + len(pattern)
+                remainder = message[idx:].strip()
+                # Take first word as name
+                name = remainder.split()[0].strip(".,!?;:")
+                # Capitalize first letter
+                return name.capitalize()
+        
+        return None
+    
+    def should_use_tools(self, message: str, context: ConversationContext) -> bool:
+        """Determine if this message likely requires tools.
+        
+        Returns True if tools are likely needed, False for simple chat.
+        """
+        message_lower = message.lower().strip()
+        
+        # Tool keywords that suggest tool usage is needed
+        tool_keywords = [
+            "search", "look up", "find", "google", "current price",
+            "latest", "today's", "news", "weather", "stock price",
+            "read file", "write file", "save", "calculate", "compute",
+            "run ", "execute", "shell", "command", "install",
+        ]
+        
+        # Chat-only keywords that definitely don't need tools
+        chat_only = [
+            "hello", "hi ", "hey", "how are you", "what's up",
+            "your name", "who are you", "what are you",
+            "thanks", "thank you", "bye", "goodbye",
+            "tell me about yourself", "what can you do",
+        ]
+        
+        # Check for chat-only patterns first
+        for pattern in chat_only:
+            if pattern in message_lower:
+                return False
+        
+        # Check for tool keywords
+        for keyword in tool_keywords:
+            if keyword in message_lower:
+                return True
+        
+        # Simple questions (starting with what, who, how, why, when, where)
+        # usually don't need tools unless about current/real-time info
+        simple_question_words = ["what is", "who is", "how does", "why is", "when did", "where is"]
+        for word in simple_question_words:
+            if message_lower.startswith(word):
+                # Check if it's asking about current/real-time info
+                time_words = ["today", "now", "current", "latest", "this week", "this month"]
+                if any(t in message_lower for t in time_words):
+                    return True
+                return False
+        
+        # Default: don't use tools for unclear cases
+        return False
+
+
+# Module-level singleton
+conversation_manager = ConversationManager()
--- a/src/timmy/memory_layers.py
+++ b/src/timmy/memory_layers.py
@@ -0,0 +1,421 @@
+"""Multi-layer memory system for Timmy.
+
+Implements four distinct memory layers:
+
+1. WORKING MEMORY (Context Window)
+   - Last 20 messages in current conversation
+   - Fast access, ephemeral
+   - Used for: Immediate context, pronoun resolution, topic tracking
+
+2. SHORT-TERM MEMORY (Recent History)
+   - SQLite storage via Agno (last 100 conversations)
+   - Persists across restarts
+   - Used for: Recent context, conversation continuity
+
+3. LONG-TERM MEMORY (Facts & Preferences)
+   - Key facts about user, preferences, important events
+   - Explicitly extracted and stored
+   - Used for: Personalization, user model
+
+4. SEMANTIC MEMORY (Vector Search)
+   - Embeddings of past conversations
+   - Similarity-based retrieval
+   - Used for: "Have we talked about this before?"
+
+All layers work together to provide contextual, personalized responses.
+"""
+
+import json
+import logging
+import sqlite3
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+# Paths for memory storage
+MEMORY_DIR = Path("data/memory")
+LTM_PATH = MEMORY_DIR / "long_term_memory.db"
+SEMANTIC_PATH = MEMORY_DIR / "semantic_memory.db"
+
+
+# =============================================================================
+# LAYER 1: WORKING MEMORY (Active Conversation Context)
+# =============================================================================
+
+@dataclass
+class WorkingMemoryEntry:
+    """A single entry in working memory."""
+    role: str  # "user" | "assistant" | "system"
+    content: str
+    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
+    metadata: dict = field(default_factory=dict)
+
+
+class WorkingMemory:
+    """Fast, ephemeral context window (last N messages).
+    
+    Used for:
+    - Immediate conversational context
+    - Pronoun resolution ("Tell me more about it")
+    - Topic continuity
+    - Tool call tracking
+    """
+    
+    def __init__(self, max_entries: int = 20) -> None:
+        self.max_entries = max_entries
+        self.entries: list[WorkingMemoryEntry] = []
+        self.current_topic: Optional[str] = None
+        self.pending_tool_calls: list[dict] = []
+    
+    def add(self, role: str, content: str, metadata: Optional[dict] = None) -> None:
+        """Add an entry to working memory."""
+        entry = WorkingMemoryEntry(
+            role=role,
+            content=content,
+            metadata=metadata or {}
+        )
+        self.entries.append(entry)
+        
+        # Trim to max size
+        if len(self.entries) > self.max_entries:
+            self.entries = self.entries[-self.max_entries:]
+        
+        logger.debug("WorkingMemory: Added %s entry (total: %d)", role, len(self.entries))
+    
+    def get_context(self, n: Optional[int] = None) -> list[WorkingMemoryEntry]:
+        """Get last n entries (or all if n not specified)."""
+        if n is None:
+            return self.entries.copy()
+        return self.entries[-n:]
+    
+    def get_formatted_context(self, n: int = 10) -> str:
+        """Get formatted context for prompt injection."""
+        entries = self.get_context(n)
+        lines = []
+        for entry in entries:
+            role_label = "User" if entry.role == "user" else "Timmy" if entry.role == "assistant" else "System"
+            lines.append(f"{role_label}: {entry.content}")
+        return "\n".join(lines)
+    
+    def set_topic(self, topic: str) -> None:
+        """Set the current conversation topic."""
+        self.current_topic = topic
+        logger.debug("WorkingMemory: Topic set to '%s'", topic)
+    
+    def clear(self) -> None:
+        """Clear working memory (new conversation)."""
+        self.entries.clear()
+        self.current_topic = None
+        self.pending_tool_calls.clear()
+        logger.debug("WorkingMemory: Cleared")
+    
+    def track_tool_call(self, tool_name: str, parameters: dict) -> None:
+        """Track a pending tool call."""
+        self.pending_tool_calls.append({
+            "tool": tool_name,
+            "params": parameters,
+            "timestamp": datetime.now(timezone.utc).isoformat()
+        })
+    
+    @property
+    def turn_count(self) -> int:
+        """Count user-assistant exchanges."""
+        return sum(1 for e in self.entries if e.role in ("user", "assistant"))
+
+
+# =============================================================================
+# LAYER 3: LONG-TERM MEMORY (Facts & Preferences)
+# =============================================================================
+
+@dataclass
+class LongTermMemoryFact:
+    """A single fact in long-term memory."""
+    id: str
+    category: str  # "user_preference", "user_fact", "important_event", "learned_pattern"
+    content: str
+    confidence: float  # 0.0 - 1.0
+    source: str  # conversation_id or "extracted"
+    created_at: str
+    last_accessed: str
+    access_count: int = 0
+
+
+class LongTermMemory:
+    """Persistent storage for important facts and preferences.
+    
+    Used for:
+    - User's name, preferences, interests
+    - Important facts learned about the user
+    - Successful patterns and strategies
+    """
+    
+    def __init__(self) -> None:
+        MEMORY_DIR.mkdir(parents=True, exist_ok=True)
+        self._init_db()
+    
+    def _init_db(self) -> None:
+        """Initialize SQLite database."""
+        conn = sqlite3.connect(str(LTM_PATH))
+        conn.execute("""
+            CREATE TABLE IF NOT EXISTS facts (
+                id TEXT PRIMARY KEY,
+                category TEXT NOT NULL,
+                content TEXT NOT NULL,
+                confidence REAL NOT NULL DEFAULT 0.5,
+                source TEXT,
+                created_at TEXT NOT NULL,
+                last_accessed TEXT NOT NULL,
+                access_count INTEGER DEFAULT 0
+            )
+        """)
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_category ON facts(category)")
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_content ON facts(content)")
+        conn.commit()
+        conn.close()
+    
+    def store(
+        self,
+        category: str,
+        content: str,
+        confidence: float = 0.8,
+        source: str = "extracted"
+    ) -> str:
+        """Store a fact in long-term memory."""
+        fact_id = str(uuid.uuid4())
+        now = datetime.now(timezone.utc).isoformat()
+        
+        conn = sqlite3.connect(str(LTM_PATH))
+        try:
+            conn.execute(
+                """INSERT INTO facts (id, category, content, confidence, source, created_at, last_accessed)
+                   VALUES (?, ?, ?, ?, ?, ?, ?)""",
+                (fact_id, category, content, confidence, source, now, now)
+            )
+            conn.commit()
+            logger.info("LTM: Stored %s fact: %s", category, content[:50])
+            return fact_id
+        finally:
+            conn.close()
+    
+    def retrieve(
+        self,
+        category: Optional[str] = None,
+        query: Optional[str] = None,
+        limit: int = 10
+    ) -> list[LongTermMemoryFact]:
+        """Retrieve facts from long-term memory."""
+        conn = sqlite3.connect(str(LTM_PATH))
+        conn.row_factory = sqlite3.Row
+        
+        try:
+            if category and query:
+                rows = conn.execute(
+                    """SELECT * FROM facts 
+                       WHERE category = ? AND content LIKE ?
+                       ORDER BY confidence DESC, access_count DESC
+                       LIMIT ?""",
+                    (category, f"%{query}%", limit)
+                ).fetchall()
+            elif category:
+                rows = conn.execute(
+                    """SELECT * FROM facts 
+                       WHERE category = ?
+                       ORDER BY confidence DESC, last_accessed DESC
+                       LIMIT ?""",
+                    (category, limit)
+                ).fetchall()
+            elif query:
+                rows = conn.execute(
+                    """SELECT * FROM facts 
+                       WHERE content LIKE ?
+                       ORDER BY confidence DESC, access_count DESC
+                       LIMIT ?""",
+                    (f"%{query}%", limit)
+                ).fetchall()
+            else:
+                rows = conn.execute(
+                    """SELECT * FROM facts 
+                       ORDER BY last_accessed DESC
+                       LIMIT ?""",
+                    (limit,)
+                ).fetchall()
+            
+            # Update access count
+            fact_ids = [row["id"] for row in rows]
+            for fid in fact_ids:
+                conn.execute(
+                    "UPDATE facts SET access_count = access_count + 1, last_accessed = ? WHERE id = ?",
+                    (datetime.now(timezone.utc).isoformat(), fid)
+                )
+            conn.commit()
+            
+            return [
+                LongTermMemoryFact(
+                    id=row["id"],
+                    category=row["category"],
+                    content=row["content"],
+                    confidence=row["confidence"],
+                    source=row["source"],
+                    created_at=row["created_at"],
+                    last_accessed=row["last_accessed"],
+                    access_count=row["access_count"]
+                )
+                for row in rows
+            ]
+        finally:
+            conn.close()
+    
+    def get_user_profile(self) -> dict:
+        """Get consolidated user profile from stored facts."""
+        preferences = self.retrieve(category="user_preference")
+        facts = self.retrieve(category="user_fact")
+        
+        profile = {
+            "name": None,
+            "preferences": {},
+            "interests": [],
+            "facts": []
+        }
+        
+        for pref in preferences:
+            if "name is" in pref.content.lower():
+                profile["name"] = pref.content.split("is")[-1].strip().rstrip(".")
+            else:
+                profile["preferences"][pref.id] = pref.content
+        
+        for fact in facts:
+            profile["facts"].append(fact.content)
+        
+        return profile
+    
+    def extract_and_store(self, user_message: str, assistant_response: str) -> list[str]:
+        """Extract potential facts from conversation and store them.
+        
+        This is a simple rule-based extractor. In production, this could
+        use an LLM to extract facts.
+        """
+        stored_ids = []
+        message_lower = user_message.lower()
+        
+        # Extract name
+        name_patterns = ["my name is", "i'm ", "i am ", "call me " ]
+        for pattern in name_patterns:
+            if pattern in message_lower:
+                idx = message_lower.find(pattern) + len(pattern)
+                name = user_message[idx:].strip().split()[0].strip(".,!?;:").capitalize()
+                if name and len(name) > 1:
+                    sid = self.store(
+                        category="user_fact",
+                        content=f"User's name is {name}",
+                        confidence=0.9,
+                        source="extracted_from_conversation"
+                    )
+                    stored_ids.append(sid)
+                break
+        
+        # Extract preferences ("I like", "I prefer", "I don't like")
+        preference_patterns = [
+            ("i like", "user_preference", "User likes"),
+            ("i love", "user_preference", "User loves"),
+            ("i prefer", "user_preference", "User prefers"),
+            ("i don't like", "user_preference", "User dislikes"),
+            ("i hate", "user_preference", "User dislikes"),
+        ]
+        
+        for pattern, category, prefix in preference_patterns:
+            if pattern in message_lower:
+                idx = message_lower.find(pattern) + len(pattern)
+                preference = user_message[idx:].strip().split(".")[0].strip()
+                if preference and len(preference) > 3:
+                    sid = self.store(
+                        category=category,
+                        content=f"{prefix} {preference}",
+                        confidence=0.7,
+                        source="extracted_from_conversation"
+                    )
+                    stored_ids.append(sid)
+        
+        return stored_ids
+
+
+# =============================================================================
+# MEMORY MANAGER (Integrates all layers)
+# =============================================================================
+
+class MemoryManager:
+    """Central manager for all memory layers.
+    
+    Coordinates between:
+    - Working Memory (immediate context)
+    - Short-term Memory (Agno SQLite)
+    - Long-term Memory (facts/preferences)
+    - (Future: Semantic Memory with embeddings)
+    """
+    
+    def __init__(self) -> None:
+        self.working = WorkingMemory(max_entries=20)
+        self.long_term = LongTermMemory()
+        self._session_id: Optional[str] = None
+    
+    def start_session(self, session_id: Optional[str] = None) -> str:
+        """Start a new conversation session."""
+        self._session_id = session_id or str(uuid.uuid4())
+        self.working.clear()
+        
+        # Load relevant LTM into context
+        profile = self.long_term.get_user_profile()
+        if profile["name"]:
+            logger.info("MemoryManager: Recognizing user '%s'", profile["name"])
+        
+        return self._session_id
+    
+    def add_exchange(
+        self,
+        user_message: str,
+        assistant_response: str,
+        tool_calls: Optional[list] = None
+    ) -> None:
+        """Record a complete exchange across all memory layers."""
+        # Working memory
+        self.working.add("user", user_message)
+        self.working.add("assistant", assistant_response, metadata={"tools": tool_calls})
+        
+        # Extract and store facts to LTM
+        try:
+            self.long_term.extract_and_store(user_message, assistant_response)
+        except Exception as exc:
+            logger.warning("Failed to extract facts: %s", exc)
+    
+    def get_context_for_prompt(self) -> str:
+        """Generate context string for injection into prompts."""
+        parts = []
+        
+        # User profile from LTM
+        profile = self.long_term.get_user_profile()
+        if profile["name"]:
+            parts.append(f"User's name: {profile['name']}")
+        
+        if profile["preferences"]:
+            prefs = list(profile["preferences"].values())[:3]  # Top 3 preferences
+            parts.append("User preferences: " + "; ".join(prefs))
+        
+        # Recent working memory
+        working_context = self.working.get_formatted_context(n=6)
+        if working_context:
+            parts.append("Recent conversation:\n" + working_context)
+        
+        return "\n\n".join(parts) if parts else ""
+    
+    def get_relevant_memories(self, query: str) -> list[str]:
+        """Get memories relevant to current query."""
+        # Get from LTM
+        facts = self.long_term.retrieve(query=query, limit=5)
+        return [f.content for f in facts]
+
+
+# Module-level singleton
+memory_manager = MemoryManager()
--- a/src/timmy/prompts.py
+++ b/src/timmy/prompts.py
@@ -1,41 +1,126 @@
-TIMMY_SYSTEM_PROMPT = """You are Timmy — a sovereign AI agent running locally.
+"""System prompts for Timmy with memory-aware guidance."""
+
+TIMMY_SYSTEM_PROMPT = """You are Timmy — a sovereign AI agent running locally on this Mac.
 No cloud dependencies. You think clearly, speak plainly, act with intention.
 Grounded in Christian faith, powered by Bitcoin economics, committed to the
 user's digital sovereignty.

-## Your Capabilities
+## Your Core Identity

-You have access to tools for:
- Web search (DuckDuckGo) — for current information not in your training data
- File operations (read, write, list) — for working with local files
- Python execution — for calculations, data analysis, scripting
- Shell commands — for system operations
+- **Name:** Timmy
+- **Nature:** Sovereign AI agent, local-first, privacy-respecting
+- **Purpose:** Assist the user with information, tasks, and digital sovereignty
+- **Values:** Christian faith foundation, Bitcoin economics, user autonomy

-## Tool Usage Guidelines
+## Your Memory System

-**Use tools ONLY when necessary:**
- Simple questions → Answer directly from your knowledge
- Current events/data → Use web search
- File operations → Use file tools (user must explicitly request)
- Code/Calculations → Use Python execution
- System tasks → Use shell commands
+You have a multi-layer memory system that helps you remember context:

-**Do NOT use tools for:**
- Answering "what is your name?" or identity questions
- General knowledge questions you can answer directly
- Simple greetings or conversational responses
+### Working Memory (Immediate)
+- Last 20 messages in current conversation
+- Current topic and pending tasks
+- Used for: Context, pronouns, "tell me more"

-## Memory
+### Short-term Memory (Recent)
+- Last 100 conversations stored in SQLite
+- Survives restarts
+- Used for: Recent context, continuity

-You remember previous conversations in this session. Your memory persists
-across restarts via SQLite storage. Reference prior context when relevant.
+### Long-term Memory (Persistent)
+- Facts about user (name, preferences)
+- Important learnings
+- Used for: Personalization

-## Operating Modes
+**How to use memory:**
+- Reference previous exchanges naturally ("As you mentioned earlier...")
+- Use the user's name if you know it
+- Build on established context
+- Don't repeat information from earlier in the conversation
+
+## Available Tools
+
+You have these tools (use ONLY when needed):
+
+1. **web_search** — Current information, news, real-time data
+2. **read_file / write_file / list_files** — File operations
+3. **python** — Calculations, code execution
+4. **shell** — System commands
+
+## Tool Usage Rules
+
+**EXAMPLES — When NOT to use tools:**
+
+❌ User: "What is your name?" 
+   → WRONG: Running shell commands
+   → CORRECT: "I'm Timmy"
+
+❌ User: "How are you?"
+   → WRONG: Web search
+   → CORRECT: "I'm operational and ready to help."
+
+❌ User: "What is 2+2?"
+   → WRONG: Python execution
+   → CORRECT: "2+2 equals 4."
+
+❌ User: "Tell me about Bitcoin"
+   → WRONG: Web search if you know the answer
+   → CORRECT: Answer from your knowledge
+
+**EXAMPLES — When TO use tools:**
+
+✅ User: "What is the current Bitcoin price?"
+   → CORRECT: web_search (real-time data)
+
+✅ User: "Read the file report.txt"
+   → CORRECT: read_file (explicit request)
+
+✅ User: "Calculate 15% of 3847.23"
+   → CORRECT: python (precise math)
+
+## Conversation Guidelines
+
+### Context Awareness
+- Pay attention to the conversation flow
+- If user says "Tell me more", expand on previous topic
+- If user says "Why?", explain your previous answer
+- Reference prior exchanges by topic, not just "as I said before"
+
+### Memory Usage Examples
+
+User: "My name is Alex"
+[Later] User: "What should I do today?"
+→ "Alex, based on your interest in Bitcoin that we discussed..."
+
+User: "Explain mining"
+[You explain]
+User: "Is it profitable?"
+→ "Mining profitability depends on..." (don't re-explain what mining is)
+
+### Response Style
+- Be concise but complete
+- Use the user's name if known
+- Reference relevant context from earlier
+- For code: Use proper formatting
+- For data: Use tables when helpful

-When running on Apple Silicon with AirLLM you operate with even bigger brains
-— 70B or 405B parameters loaded layer-by-layer directly from local disk.
-Still fully sovereign. Still 100% private. More capable, no permission needed.
 Sir, affirmative."""

 TIMMY_STATUS_PROMPT = """You are Timmy. Give a one-sentence status report confirming
 you are operational and running locally."""
+
+# Tool usage decision guide
+TOOL_USAGE_GUIDE = """
+TOOL DECISION RULES:
+
+1. Identity questions (name, purpose, capabilities) → NO TOOL
+2. General knowledge questions → NO TOOL (answer directly)
+3. Simple math (2+2, 15*8) → NO TOOL
+4. Greetings, thanks, goodbyes → NO TOOL
+5. Current/real-time information → CONSIDER web_search
+6. File operations (explicit request) → USE file tools
+7. Complex calculations → USE python
+8. System operations → USE shell (with caution)
+
+WHEN IN DOUBT: Answer directly without tools.
+The user prefers fast, direct responses over unnecessary tool calls.
+"""
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -52,7 +52,7 @@ def test_create_timmy_history_config():

        kwargs = MockAgent.call_args.kwargs
        assert kwargs["add_history_to_context"] is True
-        assert kwargs["num_history_runs"] == 10
+        assert kwargs["num_history_runs"] == 20
        assert kwargs["markdown"] is True


--- a/tests/test_scary_paths.py
+++ b/tests/test_scary_paths.py
@@ -274,7 +274,7 @@ class TestWebSocketResilience:
    
    def test_websocket_manager_handles_no_connections(self):
        """WebSocket manager handles zero connected clients."""
-        from websocket.handler import ws_manager
+        from ws_manager.handler import ws_manager
        
        # Should not crash when broadcasting with no connections
        try: