diff --git a/src/timmy/backends.py b/src/timmy/backends.py index cf1ba8b7..4fa7b947 100644 --- a/src/timmy/backends.py +++ b/src/timmy/backends.py @@ -18,7 +18,7 @@ import time from dataclasses import dataclass from typing import Literal -from timmy.prompts import SYSTEM_PROMPT +from timmy.prompts import get_system_prompt logger = logging.getLogger(__name__) @@ -128,7 +128,7 @@ class TimmyAirLLMAgent: # ── private helpers ────────────────────────────────────────────────────── def _build_prompt(self, message: str) -> str: - context = SYSTEM_PROMPT + "\n\n" + context = get_system_prompt(tools_enabled=False, session_id="airllm") + "\n\n" # Include the last 10 turns (5 exchanges) for continuity. if self._history: context += "\n".join(self._history[-10:]) + "\n\n" @@ -388,7 +388,9 @@ class GrokBackend: def _build_messages(self, message: str) -> list[dict[str, str]]: """Build the messages array for the API call.""" - messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages = [ + {"role": "system", "content": get_system_prompt(tools_enabled=True, session_id="grok")} + ] # Include conversation history for context messages.extend(self._history[-10:]) messages.append({"role": "user", "content": message}) @@ -481,7 +483,7 @@ class ClaudeBackend: response = client.messages.create( model=self._model, max_tokens=1024, - system=SYSTEM_PROMPT, + system=get_system_prompt(tools_enabled=True, session_id="claude"), messages=messages, ) diff --git a/tests/timmy/test_backends.py b/tests/timmy/test_backends.py index cd128f78..52935f30 100644 --- a/tests/timmy/test_backends.py +++ b/tests/timmy/test_backends.py @@ -160,6 +160,61 @@ def test_print_response_stream_flag_accepted(): agent.print_response("hello", stream=False) # no error +# ── Prompt formatting tests ──────────────────────────────────────────────── + + +def test_airllm_prompt_contains_formatted_model_name(): + """AirLLM prompt should have actual model name, not literal {model_name}.""" + with ( + patch("timmy.backends.is_apple_silicon", return_value=False), + patch("config.settings") as mock_settings, + ): + mock_settings.ollama_model = "llama3.2:3b" + from timmy.backends import TimmyAirLLMAgent + + agent = TimmyAirLLMAgent(model_size="8b") + prompt = agent._build_prompt("test message") + + # Should contain the actual model name, not the placeholder + assert "{model_name}" not in prompt + assert "llama3.2:3b" in prompt + + +def test_airllm_prompt_gets_lite_tier(): + """AirLLM should get LITE tier prompt (tools_enabled=False).""" + with ( + patch("timmy.backends.is_apple_silicon", return_value=False), + patch("config.settings") as mock_settings, + ): + mock_settings.ollama_model = "test-model" + from timmy.backends import TimmyAirLLMAgent + + agent = TimmyAirLLMAgent(model_size="8b") + prompt = agent._build_prompt("test message") + + # LITE tier should NOT have TOOL USAGE section + assert "TOOL USAGE" not in prompt + # LITE tier should have the basic rules + assert "Be brief by default" in prompt + + +def test_airllm_prompt_contains_session_id(): + """AirLLM prompt should have session_id formatted, not placeholder.""" + with ( + patch("timmy.backends.is_apple_silicon", return_value=False), + patch("config.settings") as mock_settings, + ): + mock_settings.ollama_model = "test-model" + from timmy.backends import TimmyAirLLMAgent + + agent = TimmyAirLLMAgent(model_size="8b") + prompt = agent._build_prompt("test message") + + # Should contain the session_id, not the placeholder + assert '{session_id}"' not in prompt + assert 'session "airllm"' in prompt + + # ── ClaudeBackend ───────────────────────────────────────────────────────── @@ -270,3 +325,87 @@ def test_claude_backend_history_rolling_window(): backend.run(f"message {i}") assert len(backend._history) <= 20 + + +# ── ClaudeBackend prompt formatting ───────────────────────────────────────── + + +def test_claude_prompt_contains_formatted_model_name(): + """Claude system prompt should have actual model name, not literal {model_name}.""" + with patch("config.settings") as mock_settings: + mock_settings.ollama_model = "llama3.2:3b" + from timmy.backends import ClaudeBackend + + backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") + + # Mock the client to capture the system parameter + mock_client = MagicMock() + mock_content = MagicMock() + mock_content.text = "test response" + mock_response = MagicMock() + mock_response.content = [mock_content] + mock_client.messages.create.return_value = mock_response + + with patch.object(backend, "_get_client", return_value=mock_client): + backend.run("test message") + + # Get the system parameter from the create call + call_kwargs = mock_client.messages.create.call_args[1] + system_prompt = call_kwargs.get("system", "") + + # Should contain the actual model name, not the placeholder + assert "{model_name}" not in system_prompt + assert "llama3.2:3b" in system_prompt + + +def test_claude_prompt_gets_full_tier(): + """Claude should get FULL tier prompt (tools_enabled=True).""" + with patch("config.settings") as mock_settings: + mock_settings.ollama_model = "test-model" + from timmy.backends import ClaudeBackend + + backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") + + mock_client = MagicMock() + mock_content = MagicMock() + mock_content.text = "test response" + mock_response = MagicMock() + mock_response.content = [mock_content] + mock_client.messages.create.return_value = mock_response + + with patch.object(backend, "_get_client", return_value=mock_client): + backend.run("test message") + + call_kwargs = mock_client.messages.create.call_args[1] + system_prompt = call_kwargs.get("system", "") + + # FULL tier should have TOOL USAGE section + assert "TOOL USAGE" in system_prompt + # FULL tier should have the full voice and brevity section + assert "VOICE AND BREVITY" in system_prompt + + +def test_claude_prompt_contains_session_id(): + """Claude prompt should have session_id formatted, not placeholder.""" + with patch("config.settings") as mock_settings: + mock_settings.ollama_model = "test-model" + from timmy.backends import ClaudeBackend + + backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") + + mock_client = MagicMock() + mock_content = MagicMock() + mock_content.text = "test response" + mock_response = MagicMock() + mock_response.content = [mock_content] + mock_client.messages.create.return_value = mock_response + + with patch.object(backend, "_get_client", return_value=mock_client): + backend.run("test message") + + call_kwargs = mock_client.messages.create.call_args[1] + system_prompt = call_kwargs.get("system", "") + + # Should contain the session_id, not the placeholder + assert '{session_id}"' not in system_prompt + assert 'session "claude"' in system_prompt diff --git a/tests/timmy/test_grok_backend.py b/tests/timmy/test_grok_backend.py index c63ee1f3..736bd304 100644 --- a/tests/timmy/test_grok_backend.py +++ b/tests/timmy/test_grok_backend.py @@ -194,6 +194,52 @@ def test_grok_backend_build_messages(): assert messages[-1]["content"] == "new question" +def test_grok_prompt_contains_formatted_model_name(): + """Grok prompt should have actual model name, not literal {model_name}.""" + with patch("config.settings") as mock_settings: + mock_settings.ollama_model = "llama3.2:3b" + from timmy.backends import GrokBackend + + backend = GrokBackend(api_key="xai-test", model="grok-3-fast") + messages = backend._build_messages("test message") + system_prompt = messages[0]["content"] + + # Should contain the actual model name, not the placeholder + assert "{model_name}" not in system_prompt + assert "llama3.2:3b" in system_prompt + + +def test_grok_prompt_gets_full_tier(): + """Grok should get FULL tier prompt (tools_enabled=True).""" + with patch("config.settings") as mock_settings: + mock_settings.ollama_model = "test-model" + from timmy.backends import GrokBackend + + backend = GrokBackend(api_key="xai-test", model="grok-3-fast") + messages = backend._build_messages("test message") + system_prompt = messages[0]["content"] + + # FULL tier should have TOOL USAGE section + assert "TOOL USAGE" in system_prompt + # FULL tier should have the full voice and brevity section + assert "VOICE AND BREVITY" in system_prompt + + +def test_grok_prompt_contains_session_id(): + """Grok prompt should have session_id formatted, not placeholder.""" + with patch("config.settings") as mock_settings: + mock_settings.ollama_model = "test-model" + from timmy.backends import GrokBackend + + backend = GrokBackend(api_key="xai-test", model="grok-3-fast") + messages = backend._build_messages("test message") + system_prompt = messages[0]["content"] + + # Should contain the session_id, not the placeholder + assert '{session_id}"' not in system_prompt + assert 'session "grok"' in system_prompt + + # ── get_grok_backend singleton ──────────────────────────────────────────────