[loop-cycle-40] fix: use get_system_prompt() in cloud backends (#135) (#138)

## What Cloud backends (Grok, Claude, AirLLM) were importing SYSTEM_PROMPT directly, which is always SYSTEM_PROMPT_LITE and contains unformatted {model_name} and {session_id} placeholders. ## Changes - backends.py: Replace `from timmy.prompts import SYSTEM_PROMPT` with `from timmy.prompts import get_system_prompt` - AirLLM: uses `get_system_prompt(tools_enabled=False, session_id="airllm")` (LITE tier, correct) - Grok: uses `get_system_prompt(tools_enabled=True, session_id="grok")` (FULL tier) - Claude: uses `get_system_prompt(tools_enabled=True, session_id="claude")` (FULL tier) - 9 new tests verify formatted model names, correct tier selection, and session_id formatting ## Tests 1508 passed, 0 failed (41 new tests this cycle) Fixes #135 Co-authored-by: Kimi Agent <kimi@timmy.local> Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/138 Reviewed-by: rockachopa <alexpaynex@gmail.com> Co-authored-by: hermes <hermes@timmy.local> Co-committed-by: hermes <hermes@timmy.local>
2026-03-15 09:44:43 -04:00
parent d48d56ecc0
commit 48c8efb2fb
3 changed files with 191 additions and 4 deletions
--- a/tests/timmy/test_backends.py
+++ b/tests/timmy/test_backends.py
@@ -160,6 +160,61 @@ def test_print_response_stream_flag_accepted():
    agent.print_response("hello", stream=False)  # no error


+# ── Prompt formatting tests ────────────────────────────────────────────────
+
+
+def test_airllm_prompt_contains_formatted_model_name():
+    """AirLLM prompt should have actual model name, not literal {model_name}."""
+    with (
+        patch("timmy.backends.is_apple_silicon", return_value=False),
+        patch("config.settings") as mock_settings,
+    ):
+        mock_settings.ollama_model = "llama3.2:3b"
+        from timmy.backends import TimmyAirLLMAgent
+
+        agent = TimmyAirLLMAgent(model_size="8b")
+        prompt = agent._build_prompt("test message")
+
+    # Should contain the actual model name, not the placeholder
+    assert "{model_name}" not in prompt
+    assert "llama3.2:3b" in prompt
+
+
+def test_airllm_prompt_gets_lite_tier():
+    """AirLLM should get LITE tier prompt (tools_enabled=False)."""
+    with (
+        patch("timmy.backends.is_apple_silicon", return_value=False),
+        patch("config.settings") as mock_settings,
+    ):
+        mock_settings.ollama_model = "test-model"
+        from timmy.backends import TimmyAirLLMAgent
+
+        agent = TimmyAirLLMAgent(model_size="8b")
+        prompt = agent._build_prompt("test message")
+
+    # LITE tier should NOT have TOOL USAGE section
+    assert "TOOL USAGE" not in prompt
+    # LITE tier should have the basic rules
+    assert "Be brief by default" in prompt
+
+
+def test_airllm_prompt_contains_session_id():
+    """AirLLM prompt should have session_id formatted, not placeholder."""
+    with (
+        patch("timmy.backends.is_apple_silicon", return_value=False),
+        patch("config.settings") as mock_settings,
+    ):
+        mock_settings.ollama_model = "test-model"
+        from timmy.backends import TimmyAirLLMAgent
+
+        agent = TimmyAirLLMAgent(model_size="8b")
+        prompt = agent._build_prompt("test message")
+
+    # Should contain the session_id, not the placeholder
+    assert '{session_id}"' not in prompt
+    assert 'session "airllm"' in prompt
+
+
 # ── ClaudeBackend ─────────────────────────────────────────────────────────


@@ -270,3 +325,87 @@ def test_claude_backend_history_rolling_window():
            backend.run(f"message {i}")

    assert len(backend._history) <= 20
+
+
+# ── ClaudeBackend prompt formatting ─────────────────────────────────────────
+
+
+def test_claude_prompt_contains_formatted_model_name():
+    """Claude system prompt should have actual model name, not literal {model_name}."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.ollama_model = "llama3.2:3b"
+        from timmy.backends import ClaudeBackend
+
+        backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
+
+        # Mock the client to capture the system parameter
+        mock_client = MagicMock()
+        mock_content = MagicMock()
+        mock_content.text = "test response"
+        mock_response = MagicMock()
+        mock_response.content = [mock_content]
+        mock_client.messages.create.return_value = mock_response
+
+        with patch.object(backend, "_get_client", return_value=mock_client):
+            backend.run("test message")
+
+        # Get the system parameter from the create call
+        call_kwargs = mock_client.messages.create.call_args[1]
+        system_prompt = call_kwargs.get("system", "")
+
+    # Should contain the actual model name, not the placeholder
+    assert "{model_name}" not in system_prompt
+    assert "llama3.2:3b" in system_prompt
+
+
+def test_claude_prompt_gets_full_tier():
+    """Claude should get FULL tier prompt (tools_enabled=True)."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.ollama_model = "test-model"
+        from timmy.backends import ClaudeBackend
+
+        backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
+
+        mock_client = MagicMock()
+        mock_content = MagicMock()
+        mock_content.text = "test response"
+        mock_response = MagicMock()
+        mock_response.content = [mock_content]
+        mock_client.messages.create.return_value = mock_response
+
+        with patch.object(backend, "_get_client", return_value=mock_client):
+            backend.run("test message")
+
+        call_kwargs = mock_client.messages.create.call_args[1]
+        system_prompt = call_kwargs.get("system", "")
+
+    # FULL tier should have TOOL USAGE section
+    assert "TOOL USAGE" in system_prompt
+    # FULL tier should have the full voice and brevity section
+    assert "VOICE AND BREVITY" in system_prompt
+
+
+def test_claude_prompt_contains_session_id():
+    """Claude prompt should have session_id formatted, not placeholder."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.ollama_model = "test-model"
+        from timmy.backends import ClaudeBackend
+
+        backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
+
+        mock_client = MagicMock()
+        mock_content = MagicMock()
+        mock_content.text = "test response"
+        mock_response = MagicMock()
+        mock_response.content = [mock_content]
+        mock_client.messages.create.return_value = mock_response
+
+        with patch.object(backend, "_get_client", return_value=mock_client):
+            backend.run("test message")
+
+        call_kwargs = mock_client.messages.create.call_args[1]
+        system_prompt = call_kwargs.get("system", "")
+
+    # Should contain the session_id, not the placeholder
+    assert '{session_id}"' not in system_prompt
+    assert 'session "claude"' in system_prompt
--- a/tests/timmy/test_grok_backend.py
+++ b/tests/timmy/test_grok_backend.py
@@ -194,6 +194,52 @@ def test_grok_backend_build_messages():
    assert messages[-1]["content"] == "new question"


+def test_grok_prompt_contains_formatted_model_name():
+    """Grok prompt should have actual model name, not literal {model_name}."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.ollama_model = "llama3.2:3b"
+        from timmy.backends import GrokBackend
+
+        backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+        messages = backend._build_messages("test message")
+        system_prompt = messages[0]["content"]
+
+    # Should contain the actual model name, not the placeholder
+    assert "{model_name}" not in system_prompt
+    assert "llama3.2:3b" in system_prompt
+
+
+def test_grok_prompt_gets_full_tier():
+    """Grok should get FULL tier prompt (tools_enabled=True)."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.ollama_model = "test-model"
+        from timmy.backends import GrokBackend
+
+        backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+        messages = backend._build_messages("test message")
+        system_prompt = messages[0]["content"]
+
+    # FULL tier should have TOOL USAGE section
+    assert "TOOL USAGE" in system_prompt
+    # FULL tier should have the full voice and brevity section
+    assert "VOICE AND BREVITY" in system_prompt
+
+
+def test_grok_prompt_contains_session_id():
+    """Grok prompt should have session_id formatted, not placeholder."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.ollama_model = "test-model"
+        from timmy.backends import GrokBackend
+
+        backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+        messages = backend._build_messages("test message")
+        system_prompt = messages[0]["content"]
+
+    # Should contain the session_id, not the placeholder
+    assert '{session_id}"' not in system_prompt
+    assert 'session "grok"' in system_prompt
+
+
 # ── get_grok_backend singleton ──────────────────────────────────────────────