forked from Rockachopa/Timmy-time-dashboard
Adds SELF-KNOWLEDGE section to both SYSTEM_PROMPT_LITE and SYSTEM_PROMPT_FULL with: - Codebase map (all src/timmy/ modules with descriptions) - Current capabilities list (grounded, not generic) - Known limitations (real gaps, not LLM platitudes) Lite prompt gets condensed version; full prompt gets detailed. Timmy can now answer 'what does tool_safety.py do?' and give grounded answers about his actual limitations. 10 new tests. 1456 total passing.
88 lines
3.7 KiB
Python
88 lines
3.7 KiB
Python
"""Tests for Timmy's self-knowledge capabilities (Issues #78 and #80)."""
|
|
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from timmy.prompts import get_system_prompt
|
|
|
|
|
|
class TestSelfKnowledgeInPrompts:
|
|
"""Verify that system prompts contain self-knowledge sections."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def mock_settings(self):
|
|
"""Mock config.settings.ollama_model for all tests."""
|
|
# The settings import happens inside get_system_prompt function,
|
|
# so we mock the config module's settings attribute
|
|
mock_settings = MagicMock()
|
|
mock_settings.ollama_model = "test-model"
|
|
|
|
with patch("config.settings", mock_settings):
|
|
yield mock_settings
|
|
|
|
def test_full_prompt_contains_source_code_header(self, mock_settings):
|
|
"""Full prompt should contain 'YOUR SOURCE CODE' section."""
|
|
prompt = get_system_prompt(tools_enabled=True)
|
|
assert "YOUR SOURCE CODE" in prompt
|
|
|
|
def test_full_prompt_contains_tool_safety_reference(self, mock_settings):
|
|
"""Full prompt should mention tool_safety.py specifically."""
|
|
prompt = get_system_prompt(tools_enabled=True)
|
|
assert "tool_safety.py" in prompt
|
|
|
|
def test_full_prompt_contains_known_limitations(self, mock_settings):
|
|
"""Full prompt should contain 'KNOWN LIMITATIONS' section."""
|
|
prompt = get_system_prompt(tools_enabled=True)
|
|
assert "KNOWN LIMITATIONS" in prompt
|
|
|
|
def test_full_prompt_contains_specific_limitation(self, mock_settings):
|
|
"""Full prompt should mention inability to run test suite autonomously."""
|
|
prompt = get_system_prompt(tools_enabled=True)
|
|
assert "Cannot run your own test suite" in prompt
|
|
|
|
def test_lite_prompt_contains_source_code_header(self, mock_settings):
|
|
"""Lite prompt should also contain 'YOUR SOURCE CODE' section."""
|
|
prompt = get_system_prompt(tools_enabled=False)
|
|
assert "YOUR SOURCE CODE" in prompt
|
|
|
|
def test_lite_prompt_contains_known_limitations(self, mock_settings):
|
|
"""Lite prompt should also contain 'KNOWN LIMITATIONS' section."""
|
|
prompt = get_system_prompt(tools_enabled=False)
|
|
assert "KNOWN LIMITATIONS" in prompt
|
|
|
|
def test_lite_prompt_is_shorter_than_full_prompt(self, mock_settings):
|
|
"""Lite prompt's self-knowledge section should be shorter than full prompt's."""
|
|
full_prompt = get_system_prompt(tools_enabled=True)
|
|
lite_prompt = get_system_prompt(tools_enabled=False)
|
|
|
|
# Lite prompt should be shorter overall
|
|
assert len(lite_prompt) < len(full_prompt), (
|
|
f"Lite prompt ({len(lite_prompt)} chars) should be shorter than "
|
|
f"full prompt ({len(full_prompt)} chars)"
|
|
)
|
|
|
|
def test_full_prompt_contains_codebase_structure(self, mock_settings):
|
|
"""Full prompt should contain detailed codebase structure."""
|
|
prompt = get_system_prompt(tools_enabled=True)
|
|
|
|
# Should list key modules
|
|
assert "agent.py" in prompt
|
|
assert "memory_system.py" in prompt
|
|
assert "cli.py" in prompt
|
|
assert "backends.py" in prompt
|
|
|
|
def test_full_prompt_contains_capabilities(self, mock_settings):
|
|
"""Full prompt should list current capabilities."""
|
|
prompt = get_system_prompt(tools_enabled=True)
|
|
assert "YOUR CURRENT CAPABILITIES" in prompt
|
|
|
|
def test_lite_prompt_is_condensed(self, mock_settings):
|
|
"""Lite prompt should have condensed self-knowledge (no detailed descriptions)."""
|
|
prompt = get_system_prompt(tools_enabled=False)
|
|
|
|
# Should have the key sections but in condensed form
|
|
assert "YOUR SOURCE CODE" in prompt
|
|
assert "YOUR CURRENT CAPABILITIES" in prompt
|
|
assert "YOUR KNOWN LIMITATIONS" in prompt
|