"""Tests for src/timmy/backends.py — AirLLM wrapper and helpers.""" import sys from unittest.mock import MagicMock, patch import pytest # ── is_apple_silicon ────────────────────────────────────────────────────────── def test_is_apple_silicon_true_on_arm_darwin(): with ( patch("timmy.backends.platform.system", return_value="Darwin"), patch("timmy.backends.platform.machine", return_value="arm64"), ): from timmy.backends import is_apple_silicon assert is_apple_silicon() is True def test_is_apple_silicon_false_on_linux(): with ( patch("timmy.backends.platform.system", return_value="Linux"), patch("timmy.backends.platform.machine", return_value="x86_64"), ): from timmy.backends import is_apple_silicon assert is_apple_silicon() is False def test_is_apple_silicon_false_on_intel_mac(): with ( patch("timmy.backends.platform.system", return_value="Darwin"), patch("timmy.backends.platform.machine", return_value="x86_64"), ): from timmy.backends import is_apple_silicon assert is_apple_silicon() is False # ── airllm_available ───────────────────────────────────────────────────────── def test_airllm_available_true_when_stub_in_sys_modules(): # conftest already stubs 'airllm' — importable → True. from timmy.backends import airllm_available assert airllm_available() is True def test_airllm_available_false_when_not_importable(): # Temporarily remove the stub to simulate airllm not installed. saved = sys.modules.pop("airllm", None) try: from timmy.backends import airllm_available assert airllm_available() is False finally: if saved is not None: sys.modules["airllm"] = saved # ── TimmyAirLLMAgent construction ──────────────────────────────────────────── def test_airllm_agent_raises_on_unknown_size(): from timmy.backends import TimmyAirLLMAgent with pytest.raises(ValueError, match="Unknown model size"): TimmyAirLLMAgent(model_size="3b") def test_airllm_agent_uses_automodel_on_non_apple(): """Non-Apple-Silicon path uses AutoModel.from_pretrained.""" with patch("timmy.backends.is_apple_silicon", return_value=False): from timmy.backends import TimmyAirLLMAgent TimmyAirLLMAgent(model_size="8b") # sys.modules["airllm"] is a MagicMock; AutoModel.from_pretrained was called. assert sys.modules["airllm"].AutoModel.from_pretrained.called def test_airllm_agent_uses_mlx_on_apple_silicon(): """Apple Silicon path uses AirLLMMLX, not AutoModel.""" with patch("timmy.backends.is_apple_silicon", return_value=True): from timmy.backends import TimmyAirLLMAgent TimmyAirLLMAgent(model_size="8b") assert sys.modules["airllm"].AirLLMMLX.called def test_airllm_agent_resolves_correct_model_id_for_70b(): with patch("timmy.backends.is_apple_silicon", return_value=False): from timmy.backends import _AIRLLM_MODELS, TimmyAirLLMAgent TimmyAirLLMAgent(model_size="70b") sys.modules["airllm"].AutoModel.from_pretrained.assert_called_with(_AIRLLM_MODELS["70b"]) # ── TimmyAirLLMAgent.print_response ────────────────────────────────────────── def _make_agent(model_size: str = "8b") -> "TimmyAirLLMAgent": # noqa: F821 """Helper: create an agent with a fully mocked underlying model.""" with patch("timmy.backends.is_apple_silicon", return_value=False): from timmy.backends import TimmyAirLLMAgent agent = TimmyAirLLMAgent(model_size=model_size) # Replace the underlying model with a clean mock that returns predictable output. mock_model = MagicMock() mock_tokenizer = MagicMock() # tokenizer() returns a dict-like object with an "input_ids" tensor mock. input_ids_mock = MagicMock() input_ids_mock.shape = [1, 10] # shape[1] = prompt token count = 10 token_dict = {"input_ids": input_ids_mock} mock_tokenizer.return_value = token_dict # generate() returns a list of token sequences. mock_tokenizer.decode.return_value = "Sir, affirmative." mock_model.tokenizer = mock_tokenizer mock_model.generate.return_value = [list(range(15))] # 15 tokens total agent._model = mock_model return agent def test_print_response_calls_generate(): agent = _make_agent() agent.print_response("What is sovereignty?", stream=True) agent._model.generate.assert_called_once() def test_print_response_decodes_only_generated_tokens(): agent = _make_agent() agent.print_response("Hello", stream=False) # decode should be called with tokens starting at index 10 (prompt length). decode_call = agent._model.tokenizer.decode.call_args token_slice = decode_call[0][0] assert list(token_slice) == list(range(10, 15)) def test_print_response_updates_history(): agent = _make_agent() agent.print_response("First message") assert any("First message" in turn for turn in agent._history) assert any("Timmy:" in turn for turn in agent._history) def test_print_response_history_included_in_second_prompt(): agent = _make_agent() agent.print_response("First") # Build the prompt for the second call — history should appear. prompt = agent._build_prompt("Second") assert "First" in prompt assert "Second" in prompt def test_print_response_stream_flag_accepted(): """stream=False should not raise — it's accepted for API compatibility.""" agent = _make_agent() agent.print_response("hello", stream=False) # no error # ── Prompt formatting tests ──────────────────────────────────────────────── def test_airllm_prompt_contains_formatted_model_name(): """AirLLM prompt should have actual model name, not literal {model_name}.""" with ( patch("timmy.backends.is_apple_silicon", return_value=False), patch("config.settings") as mock_settings, ): mock_settings.ollama_model = "llama3.2:3b" from timmy.backends import TimmyAirLLMAgent agent = TimmyAirLLMAgent(model_size="8b") prompt = agent._build_prompt("test message") # Should contain the actual model name, not the placeholder assert "{model_name}" not in prompt assert "llama3.2:3b" in prompt def test_airllm_prompt_gets_lite_tier(): """AirLLM should get LITE tier prompt (tools_enabled=False).""" with ( patch("timmy.backends.is_apple_silicon", return_value=False), patch("config.settings") as mock_settings, ): mock_settings.ollama_model = "test-model" from timmy.backends import TimmyAirLLMAgent agent = TimmyAirLLMAgent(model_size="8b") prompt = agent._build_prompt("test message") # LITE tier should NOT have TOOL USAGE section assert "TOOL USAGE" not in prompt # LITE tier should have the basic rules assert "Be brief by default" in prompt def test_airllm_prompt_contains_session_id(): """AirLLM prompt should have session_id formatted, not placeholder.""" with ( patch("timmy.backends.is_apple_silicon", return_value=False), patch("config.settings") as mock_settings, ): mock_settings.ollama_model = "test-model" from timmy.backends import TimmyAirLLMAgent agent = TimmyAirLLMAgent(model_size="8b") prompt = agent._build_prompt("test message") # Should contain the session_id, not the placeholder assert '{session_id}"' not in prompt assert 'session "airllm"' in prompt # ── ClaudeBackend ───────────────────────────────────────────────────────── def test_claude_available_false_when_no_key(): """claude_available() returns False when ANTHROPIC_API_KEY is empty.""" with patch("config.settings") as mock_settings: mock_settings.anthropic_api_key = "" from timmy.backends import claude_available assert claude_available() is False def test_claude_available_true_when_key_set(): """claude_available() returns True when ANTHROPIC_API_KEY is set.""" with patch("config.settings") as mock_settings: mock_settings.anthropic_api_key = "sk-ant-test-key" from timmy.backends import claude_available assert claude_available() is True def test_claude_backend_init_with_explicit_params(): """ClaudeBackend can be created with explicit api_key and model.""" from timmy.backends import ClaudeBackend backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") assert backend._api_key == "sk-ant-test" assert "haiku" in backend._model def test_claude_backend_init_resolves_short_names(): """ClaudeBackend resolves short model names to full IDs.""" from timmy.backends import CLAUDE_MODELS, ClaudeBackend backend = ClaudeBackend(api_key="sk-test", model="sonnet") assert backend._model == CLAUDE_MODELS["sonnet"] def test_claude_backend_init_passes_through_full_model_id(): """ClaudeBackend passes through full model IDs unchanged.""" from timmy.backends import ClaudeBackend backend = ClaudeBackend(api_key="sk-test", model="claude-haiku-4-5-20251001") assert backend._model == "claude-haiku-4-5-20251001" def test_claude_backend_run_no_key_returns_error(): """run() gracefully returns error message when no API key.""" from timmy.backends import ClaudeBackend backend = ClaudeBackend(api_key="", model="haiku") result = backend.run("hello") assert "not configured" in result.content def test_claude_backend_run_success(): """run() returns content from the Anthropic API on success.""" from timmy.backends import ClaudeBackend backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") mock_content = MagicMock() mock_content.text = "Sir, affirmative. I am Timmy." mock_response = MagicMock() mock_response.content = [mock_content] mock_client = MagicMock() mock_client.messages.create.return_value = mock_response with patch.object(backend, "_get_client", return_value=mock_client): result = backend.run("Who are you?") assert "Timmy" in result.content assert len(backend._history) == 2 # user + assistant def test_claude_backend_run_handles_api_error(): """run() returns a graceful error when the API raises.""" from timmy.backends import ClaudeBackend backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") mock_client = MagicMock() mock_client.messages.create.side_effect = ConnectionError("network down") with patch.object(backend, "_get_client", return_value=mock_client): result = backend.run("hello") assert "unavailable" in result.content def test_claude_backend_history_rolling_window(): """History should be capped at 20 entries (10 exchanges).""" from timmy.backends import ClaudeBackend backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") mock_content = MagicMock() mock_content.text = "OK." mock_response = MagicMock() mock_response.content = [mock_content] mock_client = MagicMock() mock_client.messages.create.return_value = mock_response with patch.object(backend, "_get_client", return_value=mock_client): for i in range(15): backend.run(f"message {i}") assert len(backend._history) <= 20 # ── ClaudeBackend prompt formatting ───────────────────────────────────────── def test_claude_prompt_contains_formatted_model_name(): """Claude system prompt should have actual model name, not literal {model_name}.""" with patch("config.settings") as mock_settings: mock_settings.ollama_model = "llama3.2:3b" from timmy.backends import ClaudeBackend backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") # Mock the client to capture the system parameter mock_client = MagicMock() mock_content = MagicMock() mock_content.text = "test response" mock_response = MagicMock() mock_response.content = [mock_content] mock_client.messages.create.return_value = mock_response with patch.object(backend, "_get_client", return_value=mock_client): backend.run("test message") # Get the system parameter from the create call call_kwargs = mock_client.messages.create.call_args[1] system_prompt = call_kwargs.get("system", "") # Should contain the actual model name, not the placeholder assert "{model_name}" not in system_prompt assert "llama3.2:3b" in system_prompt def test_claude_prompt_gets_full_tier(): """Claude should get FULL tier prompt (tools_enabled=True).""" with patch("config.settings") as mock_settings: mock_settings.ollama_model = "test-model" from timmy.backends import ClaudeBackend backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") mock_client = MagicMock() mock_content = MagicMock() mock_content.text = "test response" mock_response = MagicMock() mock_response.content = [mock_content] mock_client.messages.create.return_value = mock_response with patch.object(backend, "_get_client", return_value=mock_client): backend.run("test message") call_kwargs = mock_client.messages.create.call_args[1] system_prompt = call_kwargs.get("system", "") # FULL tier should have TOOL USAGE section assert "TOOL USAGE" in system_prompt # FULL tier should have the full voice and brevity section assert "VOICE AND BREVITY" in system_prompt def test_claude_prompt_contains_session_id(): """Claude prompt should have session_id formatted, not placeholder.""" with patch("config.settings") as mock_settings: mock_settings.ollama_model = "test-model" from timmy.backends import ClaudeBackend backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") mock_client = MagicMock() mock_content = MagicMock() mock_content.text = "test response" mock_response = MagicMock() mock_response.content = [mock_content] mock_client.messages.create.return_value = mock_response with patch.object(backend, "_get_client", return_value=mock_client): backend.run("test message") call_kwargs = mock_client.messages.create.call_args[1] system_prompt = call_kwargs.get("system", "") # Should contain the session_id, not the placeholder assert '{session_id}"' not in system_prompt assert 'session "claude"' in system_prompt