[loop-cycle-40] fix: use get_system_prompt() in cloud backends (#135) (#138)
Some checks failed
Tests / lint (push) Successful in 2s
Tests / test (push) Failing after 1m10s

## What

Cloud backends (Grok, Claude, AirLLM) were importing SYSTEM_PROMPT directly, which is always SYSTEM_PROMPT_LITE and contains unformatted {model_name} and {session_id} placeholders.

## Changes

- backends.py: Replace `from timmy.prompts import SYSTEM_PROMPT` with `from timmy.prompts import get_system_prompt`
- AirLLM: uses `get_system_prompt(tools_enabled=False, session_id="airllm")` (LITE tier, correct)
- Grok: uses `get_system_prompt(tools_enabled=True, session_id="grok")` (FULL tier)
- Claude: uses `get_system_prompt(tools_enabled=True, session_id="claude")` (FULL tier)
- 9 new tests verify formatted model names, correct tier selection, and session_id formatting

## Tests

1508 passed, 0 failed (41 new tests this cycle)

Fixes #135

Co-authored-by: Kimi Agent <kimi@timmy.local>
Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/138
Reviewed-by: rockachopa <alexpaynex@gmail.com>
Co-authored-by: hermes <hermes@timmy.local>
Co-committed-by: hermes <hermes@timmy.local>
This commit was merged in pull request #138.
This commit is contained in:
2026-03-15 09:44:43 -04:00
committed by rockachopa
parent d48d56ecc0
commit 48c8efb2fb
3 changed files with 191 additions and 4 deletions

View File

@@ -18,7 +18,7 @@ import time
from dataclasses import dataclass
from typing import Literal
from timmy.prompts import SYSTEM_PROMPT
from timmy.prompts import get_system_prompt
logger = logging.getLogger(__name__)
@@ -128,7 +128,7 @@ class TimmyAirLLMAgent:
# ── private helpers ──────────────────────────────────────────────────────
def _build_prompt(self, message: str) -> str:
context = SYSTEM_PROMPT + "\n\n"
context = get_system_prompt(tools_enabled=False, session_id="airllm") + "\n\n"
# Include the last 10 turns (5 exchanges) for continuity.
if self._history:
context += "\n".join(self._history[-10:]) + "\n\n"
@@ -388,7 +388,9 @@ class GrokBackend:
def _build_messages(self, message: str) -> list[dict[str, str]]:
"""Build the messages array for the API call."""
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
messages = [
{"role": "system", "content": get_system_prompt(tools_enabled=True, session_id="grok")}
]
# Include conversation history for context
messages.extend(self._history[-10:])
messages.append({"role": "user", "content": message})
@@ -481,7 +483,7 @@ class ClaudeBackend:
response = client.messages.create(
model=self._model,
max_tokens=1024,
system=SYSTEM_PROMPT,
system=get_system_prompt(tools_enabled=True, session_id="claude"),
messages=messages,
)

View File

@@ -160,6 +160,61 @@ def test_print_response_stream_flag_accepted():
agent.print_response("hello", stream=False) # no error
# ── Prompt formatting tests ────────────────────────────────────────────────
def test_airllm_prompt_contains_formatted_model_name():
"""AirLLM prompt should have actual model name, not literal {model_name}."""
with (
patch("timmy.backends.is_apple_silicon", return_value=False),
patch("config.settings") as mock_settings,
):
mock_settings.ollama_model = "llama3.2:3b"
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size="8b")
prompt = agent._build_prompt("test message")
# Should contain the actual model name, not the placeholder
assert "{model_name}" not in prompt
assert "llama3.2:3b" in prompt
def test_airllm_prompt_gets_lite_tier():
"""AirLLM should get LITE tier prompt (tools_enabled=False)."""
with (
patch("timmy.backends.is_apple_silicon", return_value=False),
patch("config.settings") as mock_settings,
):
mock_settings.ollama_model = "test-model"
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size="8b")
prompt = agent._build_prompt("test message")
# LITE tier should NOT have TOOL USAGE section
assert "TOOL USAGE" not in prompt
# LITE tier should have the basic rules
assert "Be brief by default" in prompt
def test_airllm_prompt_contains_session_id():
"""AirLLM prompt should have session_id formatted, not placeholder."""
with (
patch("timmy.backends.is_apple_silicon", return_value=False),
patch("config.settings") as mock_settings,
):
mock_settings.ollama_model = "test-model"
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size="8b")
prompt = agent._build_prompt("test message")
# Should contain the session_id, not the placeholder
assert '{session_id}"' not in prompt
assert 'session "airllm"' in prompt
# ── ClaudeBackend ─────────────────────────────────────────────────────────
@@ -270,3 +325,87 @@ def test_claude_backend_history_rolling_window():
backend.run(f"message {i}")
assert len(backend._history) <= 20
# ── ClaudeBackend prompt formatting ─────────────────────────────────────────
def test_claude_prompt_contains_formatted_model_name():
"""Claude system prompt should have actual model name, not literal {model_name}."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "llama3.2:3b"
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
# Mock the client to capture the system parameter
mock_client = MagicMock()
mock_content = MagicMock()
mock_content.text = "test response"
mock_response = MagicMock()
mock_response.content = [mock_content]
mock_client.messages.create.return_value = mock_response
with patch.object(backend, "_get_client", return_value=mock_client):
backend.run("test message")
# Get the system parameter from the create call
call_kwargs = mock_client.messages.create.call_args[1]
system_prompt = call_kwargs.get("system", "")
# Should contain the actual model name, not the placeholder
assert "{model_name}" not in system_prompt
assert "llama3.2:3b" in system_prompt
def test_claude_prompt_gets_full_tier():
"""Claude should get FULL tier prompt (tools_enabled=True)."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "test-model"
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
mock_client = MagicMock()
mock_content = MagicMock()
mock_content.text = "test response"
mock_response = MagicMock()
mock_response.content = [mock_content]
mock_client.messages.create.return_value = mock_response
with patch.object(backend, "_get_client", return_value=mock_client):
backend.run("test message")
call_kwargs = mock_client.messages.create.call_args[1]
system_prompt = call_kwargs.get("system", "")
# FULL tier should have TOOL USAGE section
assert "TOOL USAGE" in system_prompt
# FULL tier should have the full voice and brevity section
assert "VOICE AND BREVITY" in system_prompt
def test_claude_prompt_contains_session_id():
"""Claude prompt should have session_id formatted, not placeholder."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "test-model"
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
mock_client = MagicMock()
mock_content = MagicMock()
mock_content.text = "test response"
mock_response = MagicMock()
mock_response.content = [mock_content]
mock_client.messages.create.return_value = mock_response
with patch.object(backend, "_get_client", return_value=mock_client):
backend.run("test message")
call_kwargs = mock_client.messages.create.call_args[1]
system_prompt = call_kwargs.get("system", "")
# Should contain the session_id, not the placeholder
assert '{session_id}"' not in system_prompt
assert 'session "claude"' in system_prompt

View File

@@ -194,6 +194,52 @@ def test_grok_backend_build_messages():
assert messages[-1]["content"] == "new question"
def test_grok_prompt_contains_formatted_model_name():
"""Grok prompt should have actual model name, not literal {model_name}."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "llama3.2:3b"
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
messages = backend._build_messages("test message")
system_prompt = messages[0]["content"]
# Should contain the actual model name, not the placeholder
assert "{model_name}" not in system_prompt
assert "llama3.2:3b" in system_prompt
def test_grok_prompt_gets_full_tier():
"""Grok should get FULL tier prompt (tools_enabled=True)."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "test-model"
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
messages = backend._build_messages("test message")
system_prompt = messages[0]["content"]
# FULL tier should have TOOL USAGE section
assert "TOOL USAGE" in system_prompt
# FULL tier should have the full voice and brevity section
assert "VOICE AND BREVITY" in system_prompt
def test_grok_prompt_contains_session_id():
"""Grok prompt should have session_id formatted, not placeholder."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "test-model"
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
messages = backend._build_messages("test message")
system_prompt = messages[0]["content"]
# Should contain the session_id, not the placeholder
assert '{session_id}"' not in system_prompt
assert 'session "grok"' in system_prompt
# ── get_grok_backend singleton ──────────────────────────────────────────────