forked from Rockachopa/Timmy-time-dashboard
## What
Cloud backends (Grok, Claude, AirLLM) were importing SYSTEM_PROMPT directly, which is always SYSTEM_PROMPT_LITE and contains unformatted {model_name} and {session_id} placeholders.
## Changes
- backends.py: Replace `from timmy.prompts import SYSTEM_PROMPT` with `from timmy.prompts import get_system_prompt`
- AirLLM: uses `get_system_prompt(tools_enabled=False, session_id="airllm")` (LITE tier, correct)
- Grok: uses `get_system_prompt(tools_enabled=True, session_id="grok")` (FULL tier)
- Claude: uses `get_system_prompt(tools_enabled=True, session_id="claude")` (FULL tier)
- 9 new tests verify formatted model names, correct tier selection, and session_id formatting
## Tests
1508 passed, 0 failed (41 new tests this cycle)
Fixes #135
Co-authored-by: Kimi Agent <kimi@timmy.local>
Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/138
Reviewed-by: rockachopa <alexpaynex@gmail.com>
Co-authored-by: hermes <hermes@timmy.local>
Co-committed-by: hermes <hermes@timmy.local>
This commit is contained in:
@@ -160,6 +160,61 @@ def test_print_response_stream_flag_accepted():
|
||||
agent.print_response("hello", stream=False) # no error
|
||||
|
||||
|
||||
# ── Prompt formatting tests ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_airllm_prompt_contains_formatted_model_name():
|
||||
"""AirLLM prompt should have actual model name, not literal {model_name}."""
|
||||
with (
|
||||
patch("timmy.backends.is_apple_silicon", return_value=False),
|
||||
patch("config.settings") as mock_settings,
|
||||
):
|
||||
mock_settings.ollama_model = "llama3.2:3b"
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
|
||||
agent = TimmyAirLLMAgent(model_size="8b")
|
||||
prompt = agent._build_prompt("test message")
|
||||
|
||||
# Should contain the actual model name, not the placeholder
|
||||
assert "{model_name}" not in prompt
|
||||
assert "llama3.2:3b" in prompt
|
||||
|
||||
|
||||
def test_airllm_prompt_gets_lite_tier():
|
||||
"""AirLLM should get LITE tier prompt (tools_enabled=False)."""
|
||||
with (
|
||||
patch("timmy.backends.is_apple_silicon", return_value=False),
|
||||
patch("config.settings") as mock_settings,
|
||||
):
|
||||
mock_settings.ollama_model = "test-model"
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
|
||||
agent = TimmyAirLLMAgent(model_size="8b")
|
||||
prompt = agent._build_prompt("test message")
|
||||
|
||||
# LITE tier should NOT have TOOL USAGE section
|
||||
assert "TOOL USAGE" not in prompt
|
||||
# LITE tier should have the basic rules
|
||||
assert "Be brief by default" in prompt
|
||||
|
||||
|
||||
def test_airllm_prompt_contains_session_id():
|
||||
"""AirLLM prompt should have session_id formatted, not placeholder."""
|
||||
with (
|
||||
patch("timmy.backends.is_apple_silicon", return_value=False),
|
||||
patch("config.settings") as mock_settings,
|
||||
):
|
||||
mock_settings.ollama_model = "test-model"
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
|
||||
agent = TimmyAirLLMAgent(model_size="8b")
|
||||
prompt = agent._build_prompt("test message")
|
||||
|
||||
# Should contain the session_id, not the placeholder
|
||||
assert '{session_id}"' not in prompt
|
||||
assert 'session "airllm"' in prompt
|
||||
|
||||
|
||||
# ── ClaudeBackend ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -270,3 +325,87 @@ def test_claude_backend_history_rolling_window():
|
||||
backend.run(f"message {i}")
|
||||
|
||||
assert len(backend._history) <= 20
|
||||
|
||||
|
||||
# ── ClaudeBackend prompt formatting ─────────────────────────────────────────
|
||||
|
||||
|
||||
def test_claude_prompt_contains_formatted_model_name():
|
||||
"""Claude system prompt should have actual model name, not literal {model_name}."""
|
||||
with patch("config.settings") as mock_settings:
|
||||
mock_settings.ollama_model = "llama3.2:3b"
|
||||
from timmy.backends import ClaudeBackend
|
||||
|
||||
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
|
||||
|
||||
# Mock the client to capture the system parameter
|
||||
mock_client = MagicMock()
|
||||
mock_content = MagicMock()
|
||||
mock_content.text = "test response"
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = [mock_content]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
|
||||
with patch.object(backend, "_get_client", return_value=mock_client):
|
||||
backend.run("test message")
|
||||
|
||||
# Get the system parameter from the create call
|
||||
call_kwargs = mock_client.messages.create.call_args[1]
|
||||
system_prompt = call_kwargs.get("system", "")
|
||||
|
||||
# Should contain the actual model name, not the placeholder
|
||||
assert "{model_name}" not in system_prompt
|
||||
assert "llama3.2:3b" in system_prompt
|
||||
|
||||
|
||||
def test_claude_prompt_gets_full_tier():
|
||||
"""Claude should get FULL tier prompt (tools_enabled=True)."""
|
||||
with patch("config.settings") as mock_settings:
|
||||
mock_settings.ollama_model = "test-model"
|
||||
from timmy.backends import ClaudeBackend
|
||||
|
||||
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_content = MagicMock()
|
||||
mock_content.text = "test response"
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = [mock_content]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
|
||||
with patch.object(backend, "_get_client", return_value=mock_client):
|
||||
backend.run("test message")
|
||||
|
||||
call_kwargs = mock_client.messages.create.call_args[1]
|
||||
system_prompt = call_kwargs.get("system", "")
|
||||
|
||||
# FULL tier should have TOOL USAGE section
|
||||
assert "TOOL USAGE" in system_prompt
|
||||
# FULL tier should have the full voice and brevity section
|
||||
assert "VOICE AND BREVITY" in system_prompt
|
||||
|
||||
|
||||
def test_claude_prompt_contains_session_id():
|
||||
"""Claude prompt should have session_id formatted, not placeholder."""
|
||||
with patch("config.settings") as mock_settings:
|
||||
mock_settings.ollama_model = "test-model"
|
||||
from timmy.backends import ClaudeBackend
|
||||
|
||||
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_content = MagicMock()
|
||||
mock_content.text = "test response"
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = [mock_content]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
|
||||
with patch.object(backend, "_get_client", return_value=mock_client):
|
||||
backend.run("test message")
|
||||
|
||||
call_kwargs = mock_client.messages.create.call_args[1]
|
||||
system_prompt = call_kwargs.get("system", "")
|
||||
|
||||
# Should contain the session_id, not the placeholder
|
||||
assert '{session_id}"' not in system_prompt
|
||||
assert 'session "claude"' in system_prompt
|
||||
|
||||
@@ -194,6 +194,52 @@ def test_grok_backend_build_messages():
|
||||
assert messages[-1]["content"] == "new question"
|
||||
|
||||
|
||||
def test_grok_prompt_contains_formatted_model_name():
|
||||
"""Grok prompt should have actual model name, not literal {model_name}."""
|
||||
with patch("config.settings") as mock_settings:
|
||||
mock_settings.ollama_model = "llama3.2:3b"
|
||||
from timmy.backends import GrokBackend
|
||||
|
||||
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
|
||||
messages = backend._build_messages("test message")
|
||||
system_prompt = messages[0]["content"]
|
||||
|
||||
# Should contain the actual model name, not the placeholder
|
||||
assert "{model_name}" not in system_prompt
|
||||
assert "llama3.2:3b" in system_prompt
|
||||
|
||||
|
||||
def test_grok_prompt_gets_full_tier():
|
||||
"""Grok should get FULL tier prompt (tools_enabled=True)."""
|
||||
with patch("config.settings") as mock_settings:
|
||||
mock_settings.ollama_model = "test-model"
|
||||
from timmy.backends import GrokBackend
|
||||
|
||||
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
|
||||
messages = backend._build_messages("test message")
|
||||
system_prompt = messages[0]["content"]
|
||||
|
||||
# FULL tier should have TOOL USAGE section
|
||||
assert "TOOL USAGE" in system_prompt
|
||||
# FULL tier should have the full voice and brevity section
|
||||
assert "VOICE AND BREVITY" in system_prompt
|
||||
|
||||
|
||||
def test_grok_prompt_contains_session_id():
|
||||
"""Grok prompt should have session_id formatted, not placeholder."""
|
||||
with patch("config.settings") as mock_settings:
|
||||
mock_settings.ollama_model = "test-model"
|
||||
from timmy.backends import GrokBackend
|
||||
|
||||
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
|
||||
messages = backend._build_messages("test message")
|
||||
system_prompt = messages[0]["content"]
|
||||
|
||||
# Should contain the session_id, not the placeholder
|
||||
assert '{session_id}"' not in system_prompt
|
||||
assert 'session "grok"' in system_prompt
|
||||
|
||||
|
||||
# ── get_grok_backend singleton ──────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user