[loop-cycle-40] fix: use get_system_prompt() in cloud backends (#135) (#138)

## What

Cloud backends (Grok, Claude, AirLLM) were importing SYSTEM_PROMPT directly, which is always SYSTEM_PROMPT_LITE and contains unformatted {model_name} and {session_id} placeholders.

## Changes

- backends.py: Replace `from timmy.prompts import SYSTEM_PROMPT` with `from timmy.prompts import get_system_prompt`
- AirLLM: uses `get_system_prompt(tools_enabled=False, session_id="airllm")` (LITE tier, correct)
- Grok: uses `get_system_prompt(tools_enabled=True, session_id="grok")` (FULL tier)
- Claude: uses `get_system_prompt(tools_enabled=True, session_id="claude")` (FULL tier)
- 9 new tests verify formatted model names, correct tier selection, and session_id formatting

## Tests

1508 passed, 0 failed (41 new tests this cycle)

Fixes #135

Co-authored-by: Kimi Agent <kimi@timmy.local>
Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/138
Reviewed-by: rockachopa <alexpaynex@gmail.com>
Co-authored-by: hermes <hermes@timmy.local>
Co-committed-by: hermes <hermes@timmy.local>
This commit is contained in:
2026-03-15 09:44:43 -04:00
committed by rockachopa
parent d48d56ecc0
commit 48c8efb2fb
3 changed files with 191 additions and 4 deletions

View File

@@ -160,6 +160,61 @@ def test_print_response_stream_flag_accepted():
agent.print_response("hello", stream=False) # no error
# ── Prompt formatting tests ────────────────────────────────────────────────
def test_airllm_prompt_contains_formatted_model_name():
"""AirLLM prompt should have actual model name, not literal {model_name}."""
with (
patch("timmy.backends.is_apple_silicon", return_value=False),
patch("config.settings") as mock_settings,
):
mock_settings.ollama_model = "llama3.2:3b"
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size="8b")
prompt = agent._build_prompt("test message")
# Should contain the actual model name, not the placeholder
assert "{model_name}" not in prompt
assert "llama3.2:3b" in prompt
def test_airllm_prompt_gets_lite_tier():
"""AirLLM should get LITE tier prompt (tools_enabled=False)."""
with (
patch("timmy.backends.is_apple_silicon", return_value=False),
patch("config.settings") as mock_settings,
):
mock_settings.ollama_model = "test-model"
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size="8b")
prompt = agent._build_prompt("test message")
# LITE tier should NOT have TOOL USAGE section
assert "TOOL USAGE" not in prompt
# LITE tier should have the basic rules
assert "Be brief by default" in prompt
def test_airllm_prompt_contains_session_id():
"""AirLLM prompt should have session_id formatted, not placeholder."""
with (
patch("timmy.backends.is_apple_silicon", return_value=False),
patch("config.settings") as mock_settings,
):
mock_settings.ollama_model = "test-model"
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size="8b")
prompt = agent._build_prompt("test message")
# Should contain the session_id, not the placeholder
assert '{session_id}"' not in prompt
assert 'session "airllm"' in prompt
# ── ClaudeBackend ─────────────────────────────────────────────────────────
@@ -270,3 +325,87 @@ def test_claude_backend_history_rolling_window():
backend.run(f"message {i}")
assert len(backend._history) <= 20
# ── ClaudeBackend prompt formatting ─────────────────────────────────────────
def test_claude_prompt_contains_formatted_model_name():
"""Claude system prompt should have actual model name, not literal {model_name}."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "llama3.2:3b"
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
# Mock the client to capture the system parameter
mock_client = MagicMock()
mock_content = MagicMock()
mock_content.text = "test response"
mock_response = MagicMock()
mock_response.content = [mock_content]
mock_client.messages.create.return_value = mock_response
with patch.object(backend, "_get_client", return_value=mock_client):
backend.run("test message")
# Get the system parameter from the create call
call_kwargs = mock_client.messages.create.call_args[1]
system_prompt = call_kwargs.get("system", "")
# Should contain the actual model name, not the placeholder
assert "{model_name}" not in system_prompt
assert "llama3.2:3b" in system_prompt
def test_claude_prompt_gets_full_tier():
"""Claude should get FULL tier prompt (tools_enabled=True)."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "test-model"
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
mock_client = MagicMock()
mock_content = MagicMock()
mock_content.text = "test response"
mock_response = MagicMock()
mock_response.content = [mock_content]
mock_client.messages.create.return_value = mock_response
with patch.object(backend, "_get_client", return_value=mock_client):
backend.run("test message")
call_kwargs = mock_client.messages.create.call_args[1]
system_prompt = call_kwargs.get("system", "")
# FULL tier should have TOOL USAGE section
assert "TOOL USAGE" in system_prompt
# FULL tier should have the full voice and brevity section
assert "VOICE AND BREVITY" in system_prompt
def test_claude_prompt_contains_session_id():
"""Claude prompt should have session_id formatted, not placeholder."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "test-model"
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
mock_client = MagicMock()
mock_content = MagicMock()
mock_content.text = "test response"
mock_response = MagicMock()
mock_response.content = [mock_content]
mock_client.messages.create.return_value = mock_response
with patch.object(backend, "_get_client", return_value=mock_client):
backend.run("test message")
call_kwargs = mock_client.messages.create.call_args[1]
system_prompt = call_kwargs.get("system", "")
# Should contain the session_id, not the placeholder
assert '{session_id}"' not in system_prompt
assert 'session "claude"' in system_prompt

View File

@@ -194,6 +194,52 @@ def test_grok_backend_build_messages():
assert messages[-1]["content"] == "new question"
def test_grok_prompt_contains_formatted_model_name():
"""Grok prompt should have actual model name, not literal {model_name}."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "llama3.2:3b"
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
messages = backend._build_messages("test message")
system_prompt = messages[0]["content"]
# Should contain the actual model name, not the placeholder
assert "{model_name}" not in system_prompt
assert "llama3.2:3b" in system_prompt
def test_grok_prompt_gets_full_tier():
"""Grok should get FULL tier prompt (tools_enabled=True)."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "test-model"
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
messages = backend._build_messages("test message")
system_prompt = messages[0]["content"]
# FULL tier should have TOOL USAGE section
assert "TOOL USAGE" in system_prompt
# FULL tier should have the full voice and brevity section
assert "VOICE AND BREVITY" in system_prompt
def test_grok_prompt_contains_session_id():
"""Grok prompt should have session_id formatted, not placeholder."""
with patch("config.settings") as mock_settings:
mock_settings.ollama_model = "test-model"
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
messages = backend._build_messages("test message")
system_prompt = messages[0]["content"]
# Should contain the session_id, not the placeholder
assert '{session_id}"' not in system_prompt
assert 'session "grok"' in system_prompt
# ── get_grok_backend singleton ──────────────────────────────────────────────