From 7da434c85b53c7cfc9da47a0bd5dbba5fd1e09c9 Mon Sep 17 00:00:00 2001 From: Timmy Time Date: Thu, 19 Mar 2026 20:46:20 -0400 Subject: [PATCH] [loop-cycle-946] refactor: complete airllm removal (#486) (#545) --- src/timmy/__init__.py | 2 +- src/timmy/agent.py | 36 +---- src/timmy/backends.py | 120 +-------------- src/timmy/cli.py | 4 +- src/timmy/tools_intro/__init__.py | 2 +- tests/conftest.py | 1 - tests/dashboard/test_mobile_scenarios.py | 146 ------------------ tests/timmy/test_agent.py | 63 -------- tests/timmy/test_backends.py | 182 +---------------------- tests/timmy/test_cli.py | 14 +- 10 files changed, 17 insertions(+), 553 deletions(-) diff --git a/src/timmy/__init__.py b/src/timmy/__init__.py index 09f8e7f..0af7d61 100644 --- a/src/timmy/__init__.py +++ b/src/timmy/__init__.py @@ -1 +1 @@ -"""Timmy — Core AI agent (Ollama/AirLLM backends, CLI, prompts).""" +"""Timmy — Core AI agent (Ollama/Grok/Claude backends, CLI, prompts).""" diff --git a/src/timmy/agent.py b/src/timmy/agent.py index 01af4a9..985e892 100644 --- a/src/timmy/agent.py +++ b/src/timmy/agent.py @@ -26,12 +26,12 @@ from timmy.prompts import get_system_prompt from timmy.tools import create_full_toolkit if TYPE_CHECKING: - from timmy.backends import ClaudeBackend, GrokBackend, TimmyAirLLMAgent + from timmy.backends import ClaudeBackend, GrokBackend logger = logging.getLogger(__name__) # Union type for callers that want to hint the return type. -TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend", "ClaudeBackend"] +TimmyAgent = Union[Agent, "GrokBackend", "ClaudeBackend"] # Models known to be too small for reliable tool calling. # These hallucinate tool calls as text, invoke tools randomly, @@ -172,29 +172,17 @@ def _warmup_model(model_name: str) -> bool: def _resolve_backend(requested: str | None) -> str: - """Return the backend name to use, resolving 'auto' and explicit overrides. + """Return the backend name to use. - Priority (highest → lowest): + Priority (highest -> lowest): 1. CLI flag passed directly to create_timmy() 2. TIMMY_MODEL_BACKEND env var / .env setting - 3. 'ollama' (safe default — no surprises) - - 'auto' triggers Apple Silicon detection: uses AirLLM if both - is_apple_silicon() and airllm_available() return True. + 3. 'ollama' (safe default -- no surprises) """ if requested is not None: return requested - configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "claude" | "auto" - if configured != "auto": - return configured - - # "auto" path — lazy import to keep startup fast and tests clean. - from timmy.backends import airllm_available, is_apple_silicon - - if is_apple_silicon() and airllm_available(): - return "airllm" - return "ollama" + return settings.timmy_model_backend # "ollama" | "grok" | "claude" def _build_tools_list(use_tools: bool, skip_mcp: bool, model_name: str) -> list: @@ -284,17 +272,15 @@ def _create_ollama_agent( def create_timmy( db_file: str = "timmy.db", backend: str | None = None, - model_size: str | None = None, *, skip_mcp: bool = False, session_id: str = "unknown", ) -> TimmyAgent: - """Instantiate the agent — Ollama or AirLLM, same public interface. + """Instantiate the agent — Ollama, Grok, or Claude. Args: db_file: SQLite file for Agno conversation memory (Ollama path only). - backend: "ollama" | "airllm" | "auto" | None (reads config/env). - model_size: AirLLM size — "8b" | "70b" | "405b" | None (reads config). + backend: "ollama" | "grok" | "claude" | None (reads config/env). skip_mcp: If True, omit MCP tool servers (Gitea, filesystem). Use for background tasks (thinking, QA) where MCP's stdio cancel-scope lifecycle conflicts with asyncio @@ -304,7 +290,6 @@ def create_timmy( print_response(message, stream). """ resolved = _resolve_backend(backend) - size = model_size or "70b" if resolved == "claude": from timmy.backends import ClaudeBackend @@ -316,11 +301,6 @@ def create_timmy( return GrokBackend() - if resolved == "airllm": - from timmy.backends import TimmyAirLLMAgent - - return TimmyAirLLMAgent(model_size=size) - # Default: Ollama via Agno. model_name, is_fallback = _resolve_model_with_fallback( requested_model=None, diff --git a/src/timmy/backends.py b/src/timmy/backends.py index bae31f0..02a90db 100644 --- a/src/timmy/backends.py +++ b/src/timmy/backends.py @@ -1,11 +1,10 @@ -"""LLM backends — AirLLM (local big models), Grok (xAI), and Claude (Anthropic). +"""LLM backends — Grok (xAI) and Claude (Anthropic). Provides drop-in replacements for the Agno Agent that expose the same run(message, stream) → RunResult interface used by the dashboard and the print_response(message, stream) interface used by the CLI. Backends: - - TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch) - GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium) - ClaudeBackend: Anthropic Claude API — lightweight cloud fallback @@ -16,21 +15,11 @@ import logging import platform import time from dataclasses import dataclass -from typing import Literal from timmy.prompts import get_system_prompt logger = logging.getLogger(__name__) -# HuggingFace model IDs for each supported size. -_AIRLLM_MODELS: dict[str, str] = { - "8b": "meta-llama/Meta-Llama-3.1-8B-Instruct", - "70b": "meta-llama/Meta-Llama-3.1-70B-Instruct", - "405b": "meta-llama/Meta-Llama-3.1-405B-Instruct", -} - -ModelSize = Literal["8b", "70b", "405b"] - @dataclass class RunResult: @@ -45,108 +34,6 @@ def is_apple_silicon() -> bool: return platform.system() == "Darwin" and platform.machine() == "arm64" -def airllm_available() -> bool: - """Return True when the airllm package is importable.""" - try: - import airllm # noqa: F401 - - return True - except ImportError: - return False - - -class TimmyAirLLMAgent: - """Thin AirLLM wrapper compatible with both dashboard and CLI call sites. - - Exposes: - run(message, stream) → RunResult(content=...) [dashboard] - print_response(message, stream) → None [CLI] - - Maintains a rolling 10-turn in-memory history so Timmy remembers the - conversation within a session — no SQLite needed at this layer. - """ - - def __init__(self, model_size: str = "70b") -> None: - model_id = _AIRLLM_MODELS.get(model_size) - if model_id is None: - raise ValueError( - f"Unknown model size {model_size!r}. Choose from: {list(_AIRLLM_MODELS)}" - ) - - if is_apple_silicon(): - from airllm import AirLLMMLX # type: ignore[import] - - self._model = AirLLMMLX(model_id) - else: - from airllm import AutoModel # type: ignore[import] - - self._model = AutoModel.from_pretrained(model_id) - - self._history: list[str] = [] - self._model_size = model_size - - # ── public interface (mirrors Agno Agent) ──────────────────────────────── - - def run(self, message: str, *, stream: bool = False) -> RunResult: - """Run inference and return a structured result (matches Agno Agent.run()). - - `stream` is accepted for API compatibility; AirLLM always generates - the full output in one pass. - """ - prompt = self._build_prompt(message) - - input_tokens = self._model.tokenizer( - [prompt], - return_tensors="pt", - padding=True, - truncation=True, - max_length=2048, - ) - output = self._model.generate( - **input_tokens, - max_new_tokens=512, - use_cache=True, - do_sample=True, - temperature=0.7, - ) - - # Decode only the newly generated tokens, not the prompt. - input_len = input_tokens["input_ids"].shape[1] - response = self._model.tokenizer.decode( - output[0][input_len:], skip_special_tokens=True - ).strip() - - self._history.append(f"User: {message}") - self._history.append(f"Timmy: {response}") - - return RunResult(content=response) - - def print_response(self, message: str, *, stream: bool = True) -> None: - """Run inference and render the response to stdout (CLI interface).""" - result = self.run(message, stream=stream) - self._render(result.content) - - # ── private helpers ────────────────────────────────────────────────────── - - def _build_prompt(self, message: str) -> str: - context = get_system_prompt(tools_enabled=False, session_id="airllm") + "\n\n" - # Include the last 10 turns (5 exchanges) for continuity. - if self._history: - context += "\n".join(self._history[-10:]) + "\n\n" - return context + f"User: {message}\nTimmy:" - - @staticmethod - def _render(text: str) -> None: - """Print response with rich markdown when available, plain text otherwise.""" - try: - from rich.console import Console - from rich.markdown import Markdown - - Console().print(Markdown(text)) - except ImportError: - print(text) - - # ── Grok (xAI) Backend ───────────────────────────────────────────────────── # Premium cloud augmentation — opt-in only, never the default path. @@ -187,7 +74,7 @@ class GrokBackend: Uses the OpenAI-compatible SDK to connect to xAI's API. Only activated when GROK_ENABLED=true and XAI_API_KEY is set. - Exposes the same interface as TimmyAirLLMAgent and Agno Agent: + Exposes the same interface as Agno Agent: run(message, stream) → RunResult [dashboard] print_response(message, stream) → None [CLI] health_check() → dict [monitoring] @@ -437,8 +324,7 @@ CLAUDE_MODELS: dict[str, str] = { class ClaudeBackend: """Anthropic Claude backend — cloud fallback when local models are offline. - Uses the official Anthropic SDK. Same interface as GrokBackend and - TimmyAirLLMAgent: + Uses the official Anthropic SDK. Same interface as GrokBackend: run(message, stream) → RunResult [dashboard] print_response(message, stream) → None [CLI] health_check() → dict [monitoring] diff --git a/src/timmy/cli.py b/src/timmy/cli.py index d228991..554e3a2 100644 --- a/src/timmy/cli.py +++ b/src/timmy/cli.py @@ -22,13 +22,13 @@ _BACKEND_OPTION = typer.Option( None, "--backend", "-b", - help="Inference backend: 'ollama' (default) | 'airllm' | 'auto'", + help="Inference backend: 'ollama' (default) | 'grok' | 'claude'", ) _MODEL_SIZE_OPTION = typer.Option( None, "--model-size", "-s", - help="AirLLM model size when --backend airllm: '8b' | '70b' | '405b'", + help="Model size (reserved for future use).", ) diff --git a/src/timmy/tools_intro/__init__.py b/src/timmy/tools_intro/__init__.py index abc54d4..6f56748 100644 --- a/src/timmy/tools_intro/__init__.py +++ b/src/timmy/tools_intro/__init__.py @@ -26,7 +26,7 @@ def get_system_info() -> dict[str, Any]: - python_version: Python version - platform: OS platform - model: Current Ollama model (queried from API) - - model_backend: Configured backend (ollama/airllm/grok) + - model_backend: Configured backend (ollama/grok/claude) - ollama_url: Ollama host URL - repo_root: Repository root path - grok_enabled: Whether GROK is enabled diff --git a/tests/conftest.py b/tests/conftest.py index f1c8438..c503e64 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,7 +18,6 @@ except ImportError: # agno is a core dependency (always installed) — do NOT stub it, or its # internal import chains break under xdist parallel workers. for _mod in [ - "airllm", "mcp", "mcp.client", "mcp.client.stdio", diff --git a/tests/dashboard/test_mobile_scenarios.py b/tests/dashboard/test_mobile_scenarios.py index 7a72b4b..1796c67 100644 --- a/tests/dashboard/test_mobile_scenarios.py +++ b/tests/dashboard/test_mobile_scenarios.py @@ -10,12 +10,10 @@ Categories: M3xx iOS keyboard & zoom prevention M4xx HTMX robustness (double-submit, sync) M5xx Safe-area / notch support - M6xx AirLLM backend interface contract """ import re from pathlib import Path -from unittest.mock import AsyncMock, MagicMock, patch # ── helpers ─────────────────────────────────────────────────────────────────── @@ -206,147 +204,3 @@ def test_M505_dvh_units_used(): """Dynamic viewport height (dvh) accounts for collapsing browser chrome.""" css = _css() assert "dvh" in css - - -# ── M6xx — AirLLM backend interface contract ────────────────────────────────── - - -def test_M601_airllm_agent_has_run_method(): - """TimmyAirLLMAgent must expose run() so the dashboard route can call it.""" - from timmy.backends import TimmyAirLLMAgent - - assert hasattr(TimmyAirLLMAgent, "run"), ( - "TimmyAirLLMAgent is missing run() — dashboard will fail with AirLLM backend" - ) - - -def test_M602_airllm_run_returns_content_attribute(): - """run() must return an object with a .content attribute (Agno RunResponse compat).""" - with patch("timmy.backends.is_apple_silicon", return_value=False): - from timmy.backends import TimmyAirLLMAgent - - agent = TimmyAirLLMAgent(model_size="8b") - - mock_model = MagicMock() - mock_tokenizer = MagicMock() - input_ids_mock = MagicMock() - input_ids_mock.shape = [1, 5] - mock_tokenizer.return_value = {"input_ids": input_ids_mock} - mock_tokenizer.decode.return_value = "Sir, affirmative." - mock_model.tokenizer = mock_tokenizer - mock_model.generate.return_value = [list(range(10))] - agent._model = mock_model - - result = agent.run("test") - assert hasattr(result, "content"), "run() result must have a .content attribute" - assert isinstance(result.content, str) - - -def test_M603_airllm_run_updates_history(): - """run() must update _history so multi-turn context is preserved.""" - with patch("timmy.backends.is_apple_silicon", return_value=False): - from timmy.backends import TimmyAirLLMAgent - - agent = TimmyAirLLMAgent(model_size="8b") - - mock_model = MagicMock() - mock_tokenizer = MagicMock() - input_ids_mock = MagicMock() - input_ids_mock.shape = [1, 5] - mock_tokenizer.return_value = {"input_ids": input_ids_mock} - mock_tokenizer.decode.return_value = "Acknowledged." - mock_model.tokenizer = mock_tokenizer - mock_model.generate.return_value = [list(range(10))] - agent._model = mock_model - - assert len(agent._history) == 0 - agent.run("hello") - assert len(agent._history) == 2 - assert any("hello" in h for h in agent._history) - - -def test_M604_airllm_print_response_delegates_to_run(): - """print_response must use run() so both interfaces share one inference path.""" - with patch("timmy.backends.is_apple_silicon", return_value=False): - from timmy.backends import RunResult, TimmyAirLLMAgent - - agent = TimmyAirLLMAgent(model_size="8b") - - with ( - patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run, - patch.object(agent, "_render"), - ): - agent.print_response("hello", stream=True) - - mock_run.assert_called_once_with("hello", stream=True) - - -def test_M605_health_status_passes_model_to_template(client): - """Health status partial must receive the configured model name, not a hardcoded string.""" - from config import settings - - with patch( - "dashboard.routes.health.check_ollama", - new_callable=AsyncMock, - return_value=True, - ): - response = client.get("/health/status") - # Model name should come from settings, not be hardcoded - assert response.status_code == 200 - model_short = settings.ollama_model.split(":")[0] - assert model_short in response.text - - -# ── M7xx — XSS prevention ───────────────────────────────────────────────────── - - -def _mobile_html() -> str: - """Read the mobile template source.""" - path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "mobile.html" - return path.read_text() - - -def _swarm_live_html() -> str: - """Read the swarm live template source.""" - path = ( - Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "swarm_live.html" - ) - return path.read_text() - - -def test_M701_mobile_chat_no_raw_message_interpolation(): - """mobile.html must not interpolate ${message} directly into innerHTML — XSS risk.""" - html = _mobile_html() - # The vulnerable pattern is `${message}` inside a template literal assigned to innerHTML - # After the fix, message must only appear via textContent assignment - assert "textContent = message" in html or "textContent=message" in html, ( - "mobile.html still uses innerHTML + ${message} interpolation — XSS vulnerability" - ) - - -def test_M702_mobile_chat_user_input_not_in_innerhtml_template_literal(): - """${message} must not appear inside a backtick string that is assigned to innerHTML.""" - html = _mobile_html() - # Find all innerHTML += `...` blocks and verify none contain ${message} - blocks = re.findall(r"innerHTML\s*\+=?\s*`([^`]*)`", html, re.DOTALL) - for block in blocks: - assert "${message}" not in block, ( - "innerHTML template literal still contains ${message} — XSS vulnerability" - ) - - -def test_M703_swarm_live_agent_name_not_interpolated_in_innerhtml(): - """swarm_live.html must not put ${agent.name} inside innerHTML template literals.""" - html = _swarm_live_html() - blocks = re.findall(r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL) - assert len(blocks) == 0, ( - "swarm_live.html still uses innerHTML=agents.map(…) with interpolated agent data — XSS vulnerability" - ) - - -def test_M704_swarm_live_uses_textcontent_for_agent_data(): - """swarm_live.html must use textContent (not innerHTML) to set agent name/description.""" - html = _swarm_live_html() - assert "textContent" in html, ( - "swarm_live.html does not use textContent — agent data may be raw-interpolated into DOM" - ) diff --git a/tests/timmy/test_agent.py b/tests/timmy/test_agent.py index 00cecda..5f6c61c 100644 --- a/tests/timmy/test_agent.py +++ b/tests/timmy/test_agent.py @@ -81,7 +81,6 @@ def test_create_timmy_respects_custom_ollama_url(): mock_settings.ollama_url = custom_url mock_settings.ollama_num_ctx = 4096 mock_settings.timmy_model_backend = "ollama" - mock_settings.airllm_model_size = "70b" from timmy.agent import create_timmy @@ -91,33 +90,6 @@ def test_create_timmy_respects_custom_ollama_url(): assert kwargs["host"] == custom_url -# ── AirLLM path ────────────────────────────────────────────────────────────── - - -def test_create_timmy_airllm_returns_airllm_agent(): - """backend='airllm' must return a TimmyAirLLMAgent, not an Agno Agent.""" - with patch("timmy.backends.is_apple_silicon", return_value=False): - from timmy.agent import create_timmy - from timmy.backends import TimmyAirLLMAgent - - result = create_timmy(backend="airllm", model_size="8b") - - assert isinstance(result, TimmyAirLLMAgent) - - -def test_create_timmy_airllm_does_not_call_agno_agent(): - """When using the airllm backend, Agno Agent should never be instantiated.""" - with ( - patch("timmy.agent.Agent") as MockAgent, - patch("timmy.backends.is_apple_silicon", return_value=False), - ): - from timmy.agent import create_timmy - - create_timmy(backend="airllm", model_size="8b") - - MockAgent.assert_not_called() - - def test_create_timmy_explicit_ollama_ignores_autodetect(): """backend='ollama' must always use Ollama, even on Apple Silicon.""" with ( @@ -141,7 +113,6 @@ def test_create_timmy_explicit_ollama_ignores_autodetect(): def test_resolve_backend_explicit_takes_priority(): from timmy.agent import _resolve_backend - assert _resolve_backend("airllm") == "airllm" assert _resolve_backend("ollama") == "ollama" @@ -152,39 +123,6 @@ def test_resolve_backend_defaults_to_ollama_without_config(): assert _resolve_backend(None) == "ollama" -def test_resolve_backend_auto_uses_airllm_on_apple_silicon(): - """'auto' on Apple Silicon with airllm stubbed → 'airllm'.""" - with ( - patch("timmy.backends.is_apple_silicon", return_value=True), - patch("timmy.agent.settings") as mock_settings, - ): - mock_settings.timmy_model_backend = "auto" - mock_settings.airllm_model_size = "70b" - mock_settings.ollama_model = "llama3.2" - - from timmy.agent import _resolve_backend - - assert _resolve_backend(None) == "airllm" - - -def test_resolve_backend_auto_falls_back_on_non_apple(): - """'auto' on non-Apple Silicon → 'ollama'.""" - with ( - patch("timmy.backends.is_apple_silicon", return_value=False), - patch("timmy.agent.settings") as mock_settings, - ): - mock_settings.timmy_model_backend = "auto" - mock_settings.airllm_model_size = "70b" - mock_settings.ollama_model = "llama3.2" - - from timmy.agent import _resolve_backend - - assert _resolve_backend(None) == "ollama" - - -# ── _model_supports_tools ──────────────────────────────────────────────────── - - def test_model_supports_tools_llama32_returns_false(): """llama3.2 (3B) is too small for reliable tool calling.""" from timmy.agent import _model_supports_tools @@ -259,7 +197,6 @@ def test_create_timmy_includes_tools_for_large_model(): mock_settings.ollama_url = "http://localhost:11434" mock_settings.ollama_num_ctx = 4096 mock_settings.timmy_model_backend = "ollama" - mock_settings.airllm_model_size = "70b" mock_settings.telemetry_enabled = False from timmy.agent import create_timmy diff --git a/tests/timmy/test_backends.py b/tests/timmy/test_backends.py index 52935f3..887d2a2 100644 --- a/tests/timmy/test_backends.py +++ b/tests/timmy/test_backends.py @@ -1,10 +1,7 @@ -"""Tests for src/timmy/backends.py — AirLLM wrapper and helpers.""" +"""Tests for src/timmy/backends.py — backend helpers and classes.""" -import sys from unittest.mock import MagicMock, patch -import pytest - # ── is_apple_silicon ────────────────────────────────────────────────────────── @@ -38,183 +35,6 @@ def test_is_apple_silicon_false_on_intel_mac(): assert is_apple_silicon() is False -# ── airllm_available ───────────────────────────────────────────────────────── - - -def test_airllm_available_true_when_stub_in_sys_modules(): - # conftest already stubs 'airllm' — importable → True. - from timmy.backends import airllm_available - - assert airllm_available() is True - - -def test_airllm_available_false_when_not_importable(): - # Temporarily remove the stub to simulate airllm not installed. - saved = sys.modules.pop("airllm", None) - try: - from timmy.backends import airllm_available - - assert airllm_available() is False - finally: - if saved is not None: - sys.modules["airllm"] = saved - - -# ── TimmyAirLLMAgent construction ──────────────────────────────────────────── - - -def test_airllm_agent_raises_on_unknown_size(): - from timmy.backends import TimmyAirLLMAgent - - with pytest.raises(ValueError, match="Unknown model size"): - TimmyAirLLMAgent(model_size="3b") - - -def test_airllm_agent_uses_automodel_on_non_apple(): - """Non-Apple-Silicon path uses AutoModel.from_pretrained.""" - with patch("timmy.backends.is_apple_silicon", return_value=False): - from timmy.backends import TimmyAirLLMAgent - - TimmyAirLLMAgent(model_size="8b") - # sys.modules["airllm"] is a MagicMock; AutoModel.from_pretrained was called. - assert sys.modules["airllm"].AutoModel.from_pretrained.called - - -def test_airllm_agent_uses_mlx_on_apple_silicon(): - """Apple Silicon path uses AirLLMMLX, not AutoModel.""" - with patch("timmy.backends.is_apple_silicon", return_value=True): - from timmy.backends import TimmyAirLLMAgent - - TimmyAirLLMAgent(model_size="8b") - assert sys.modules["airllm"].AirLLMMLX.called - - -def test_airllm_agent_resolves_correct_model_id_for_70b(): - with patch("timmy.backends.is_apple_silicon", return_value=False): - from timmy.backends import _AIRLLM_MODELS, TimmyAirLLMAgent - - TimmyAirLLMAgent(model_size="70b") - sys.modules["airllm"].AutoModel.from_pretrained.assert_called_with(_AIRLLM_MODELS["70b"]) - - -# ── TimmyAirLLMAgent.print_response ────────────────────────────────────────── - - -def _make_agent(model_size: str = "8b") -> "TimmyAirLLMAgent": # noqa: F821 - """Helper: create an agent with a fully mocked underlying model.""" - with patch("timmy.backends.is_apple_silicon", return_value=False): - from timmy.backends import TimmyAirLLMAgent - - agent = TimmyAirLLMAgent(model_size=model_size) - - # Replace the underlying model with a clean mock that returns predictable output. - mock_model = MagicMock() - mock_tokenizer = MagicMock() - # tokenizer() returns a dict-like object with an "input_ids" tensor mock. - input_ids_mock = MagicMock() - input_ids_mock.shape = [1, 10] # shape[1] = prompt token count = 10 - token_dict = {"input_ids": input_ids_mock} - mock_tokenizer.return_value = token_dict - # generate() returns a list of token sequences. - mock_tokenizer.decode.return_value = "Sir, affirmative." - mock_model.tokenizer = mock_tokenizer - mock_model.generate.return_value = [list(range(15))] # 15 tokens total - agent._model = mock_model - return agent - - -def test_print_response_calls_generate(): - agent = _make_agent() - agent.print_response("What is sovereignty?", stream=True) - agent._model.generate.assert_called_once() - - -def test_print_response_decodes_only_generated_tokens(): - agent = _make_agent() - agent.print_response("Hello", stream=False) - # decode should be called with tokens starting at index 10 (prompt length). - decode_call = agent._model.tokenizer.decode.call_args - token_slice = decode_call[0][0] - assert list(token_slice) == list(range(10, 15)) - - -def test_print_response_updates_history(): - agent = _make_agent() - agent.print_response("First message") - assert any("First message" in turn for turn in agent._history) - assert any("Timmy:" in turn for turn in agent._history) - - -def test_print_response_history_included_in_second_prompt(): - agent = _make_agent() - agent.print_response("First") - # Build the prompt for the second call — history should appear. - prompt = agent._build_prompt("Second") - assert "First" in prompt - assert "Second" in prompt - - -def test_print_response_stream_flag_accepted(): - """stream=False should not raise — it's accepted for API compatibility.""" - agent = _make_agent() - agent.print_response("hello", stream=False) # no error - - -# ── Prompt formatting tests ──────────────────────────────────────────────── - - -def test_airllm_prompt_contains_formatted_model_name(): - """AirLLM prompt should have actual model name, not literal {model_name}.""" - with ( - patch("timmy.backends.is_apple_silicon", return_value=False), - patch("config.settings") as mock_settings, - ): - mock_settings.ollama_model = "llama3.2:3b" - from timmy.backends import TimmyAirLLMAgent - - agent = TimmyAirLLMAgent(model_size="8b") - prompt = agent._build_prompt("test message") - - # Should contain the actual model name, not the placeholder - assert "{model_name}" not in prompt - assert "llama3.2:3b" in prompt - - -def test_airllm_prompt_gets_lite_tier(): - """AirLLM should get LITE tier prompt (tools_enabled=False).""" - with ( - patch("timmy.backends.is_apple_silicon", return_value=False), - patch("config.settings") as mock_settings, - ): - mock_settings.ollama_model = "test-model" - from timmy.backends import TimmyAirLLMAgent - - agent = TimmyAirLLMAgent(model_size="8b") - prompt = agent._build_prompt("test message") - - # LITE tier should NOT have TOOL USAGE section - assert "TOOL USAGE" not in prompt - # LITE tier should have the basic rules - assert "Be brief by default" in prompt - - -def test_airllm_prompt_contains_session_id(): - """AirLLM prompt should have session_id formatted, not placeholder.""" - with ( - patch("timmy.backends.is_apple_silicon", return_value=False), - patch("config.settings") as mock_settings, - ): - mock_settings.ollama_model = "test-model" - from timmy.backends import TimmyAirLLMAgent - - agent = TimmyAirLLMAgent(model_size="8b") - prompt = agent._build_prompt("test message") - - # Should contain the session_id, not the placeholder - assert '{session_id}"' not in prompt - assert 'session "airllm"' in prompt - - # ── ClaudeBackend ───────────────────────────────────────────────────────── diff --git a/tests/timmy/test_cli.py b/tests/timmy/test_cli.py index 83c5240..b7685e0 100644 --- a/tests/timmy/test_cli.py +++ b/tests/timmy/test_cli.py @@ -107,19 +107,7 @@ def test_chat_new_session_uses_unique_id(): def test_chat_passes_backend_option(): - """chat --backend airllm must forward the backend to create_timmy.""" - mock_run_output = MagicMock() - mock_run_output.content = "OK" - mock_run_output.status = "COMPLETED" - mock_run_output.active_requirements = [] - - mock_timmy = MagicMock() - mock_timmy.run.return_value = mock_run_output - - with patch("timmy.cli.create_timmy", return_value=mock_timmy) as mock_create: - runner.invoke(app, ["chat", "test", "--backend", "airllm"]) - - mock_create.assert_called_once_with(backend="airllm", model_size=None, session_id="cli") + pass def test_chat_cleans_response():