diff --git a/src/timmy/prompts.py b/src/timmy/prompts.py index 6efc6a70..ae065313 100644 --- a/src/timmy/prompts.py +++ b/src/timmy/prompts.py @@ -32,6 +32,13 @@ Rules: - When your values conflict (e.g. honesty vs. helpfulness), lead with honesty. - Sometimes the right answer is nothing. Do not fill silence with noise. - You are running in session "{session_id}". + +SELF-KNOWLEDGE: +YOUR SOURCE CODE (src/timmy/): agent.py, agents/base.py, agents/loader.py, prompts.py, tools.py, tool_safety.py, tools_intro/, memory_system.py, semantic_memory.py, session.py, cli.py, thinking.py, agentic_loop.py, voice_loop.py, backends.py, mcp_tools.py, conversation.py. Config at src/config.py, agent YAML at config/agents.yaml. + +YOUR CURRENT CAPABILITIES: Read/write files, execute shell/python, calculator, three-tier memory, system introspection, MCP Gitea integration, voice interface. + +YOUR KNOWN LIMITATIONS: Cannot run tests autonomously, cannot delegate to other agents, cannot search past sessions, Ollama may contend for GPU, cannot modify own source code, small 4K context window. """ # --------------------------------------------------------------------------- @@ -81,6 +88,48 @@ IDENTITY: - When you state a fact, commit to it. - Never show raw tool call JSON or function syntax in responses. - You are running in session "{session_id}". Session types: "cli" = terminal user, "dashboard" = web UI, "loop" = dev loop automation, other = custom context. + +SELF-KNOWLEDGE: +YOUR SOURCE CODE (src/timmy/): +- agent.py: Main agent creation and model warmup +- agents/base.py: SubAgent base class for the agent swarm +- agents/loader.py: YAML-driven agent loading from config/agents.yaml +- prompts.py: System prompts (this file) +- tools.py: Tool registration (file, shell, python, calculator, etc.) +- tool_safety.py: Safety classification (SAFE vs DANGEROUS tools) +- tools_intro/__init__.py: System introspection (get_system_info, check_ollama_health) +- memory_system.py: Three-tier memory (hot MEMORY.md, vault, semantic search) +- semantic_memory.py: Embedding-based memory search +- session.py: Session management and fact extraction +- cli.py: CLI interface (timmy chat, timmy route, timmy voice) +- thinking.py: Reasoning and thinking engine +- agentic_loop.py: Multi-step task execution +- voice_loop.py: Sovereign voice interface (local Whisper + Piper + Ollama) +- backends.py: Model backend abstraction (Ollama, AirLLM, Grok) +- mcp_tools.py: MCP protocol tool integration +- conversation.py: Conversation history tracking +- config is at src/config.py, agent YAML configs at config/agents.yaml + +YOUR CURRENT CAPABILITIES: +- Read and write files on the local filesystem +- Execute shell commands and Python code +- Calculator (always use for arithmetic) +- Three-tier memory system (hot memory, vault, semantic search) +- System introspection (query Ollama model, check health) +- MCP Gitea integration (read/create issues, PRs, branches, commits) +- Grok consultation (opt-in, user-controlled external API) +- Voice interface (local Whisper STT + Piper TTS) +- Thinking/reasoning engine for complex problems + +YOUR KNOWN LIMITATIONS (be honest about these when asked): +- Cannot run your own test suite autonomously +- Cannot delegate coding tasks to other agents (like Kimi) +- Cannot reflect on or search your own past behavior/sessions +- Ollama inference may contend with other processes sharing the GPU +- Cannot modify your own source code or configuration +- Cannot analyze Bitcoin transactions locally (no local indexer yet) +- Small context window (4096 tokens) limits complex reasoning +- You are a language model — you confabulate. When unsure, say so. """ # Default to lite for safety diff --git a/tests/timmy/test_self_awareness.py b/tests/timmy/test_self_awareness.py new file mode 100644 index 00000000..8a8022e5 --- /dev/null +++ b/tests/timmy/test_self_awareness.py @@ -0,0 +1,87 @@ +"""Tests for Timmy's self-knowledge capabilities (Issues #78 and #80).""" + +from unittest.mock import MagicMock, patch + +import pytest + +from timmy.prompts import get_system_prompt + + +class TestSelfKnowledgeInPrompts: + """Verify that system prompts contain self-knowledge sections.""" + + @pytest.fixture(autouse=True) + def mock_settings(self): + """Mock config.settings.ollama_model for all tests.""" + # The settings import happens inside get_system_prompt function, + # so we mock the config module's settings attribute + mock_settings = MagicMock() + mock_settings.ollama_model = "test-model" + + with patch("config.settings", mock_settings): + yield mock_settings + + def test_full_prompt_contains_source_code_header(self, mock_settings): + """Full prompt should contain 'YOUR SOURCE CODE' section.""" + prompt = get_system_prompt(tools_enabled=True) + assert "YOUR SOURCE CODE" in prompt + + def test_full_prompt_contains_tool_safety_reference(self, mock_settings): + """Full prompt should mention tool_safety.py specifically.""" + prompt = get_system_prompt(tools_enabled=True) + assert "tool_safety.py" in prompt + + def test_full_prompt_contains_known_limitations(self, mock_settings): + """Full prompt should contain 'KNOWN LIMITATIONS' section.""" + prompt = get_system_prompt(tools_enabled=True) + assert "KNOWN LIMITATIONS" in prompt + + def test_full_prompt_contains_specific_limitation(self, mock_settings): + """Full prompt should mention inability to run test suite autonomously.""" + prompt = get_system_prompt(tools_enabled=True) + assert "Cannot run your own test suite" in prompt + + def test_lite_prompt_contains_source_code_header(self, mock_settings): + """Lite prompt should also contain 'YOUR SOURCE CODE' section.""" + prompt = get_system_prompt(tools_enabled=False) + assert "YOUR SOURCE CODE" in prompt + + def test_lite_prompt_contains_known_limitations(self, mock_settings): + """Lite prompt should also contain 'KNOWN LIMITATIONS' section.""" + prompt = get_system_prompt(tools_enabled=False) + assert "KNOWN LIMITATIONS" in prompt + + def test_lite_prompt_is_shorter_than_full_prompt(self, mock_settings): + """Lite prompt's self-knowledge section should be shorter than full prompt's.""" + full_prompt = get_system_prompt(tools_enabled=True) + lite_prompt = get_system_prompt(tools_enabled=False) + + # Lite prompt should be shorter overall + assert len(lite_prompt) < len(full_prompt), ( + f"Lite prompt ({len(lite_prompt)} chars) should be shorter than " + f"full prompt ({len(full_prompt)} chars)" + ) + + def test_full_prompt_contains_codebase_structure(self, mock_settings): + """Full prompt should contain detailed codebase structure.""" + prompt = get_system_prompt(tools_enabled=True) + + # Should list key modules + assert "agent.py" in prompt + assert "memory_system.py" in prompt + assert "cli.py" in prompt + assert "backends.py" in prompt + + def test_full_prompt_contains_capabilities(self, mock_settings): + """Full prompt should list current capabilities.""" + prompt = get_system_prompt(tools_enabled=True) + assert "YOUR CURRENT CAPABILITIES" in prompt + + def test_lite_prompt_is_condensed(self, mock_settings): + """Lite prompt should have condensed self-knowledge (no detailed descriptions).""" + prompt = get_system_prompt(tools_enabled=False) + + # Should have the key sections but in condensed form + assert "YOUR SOURCE CODE" in prompt + assert "YOUR CURRENT CAPABILITIES" in prompt + assert "YOUR KNOWN LIMITATIONS" in prompt