feat: integrate AirLLM as optional high-performance backend

Adds the `bigbrain` optional dependency group (airllm>=2.9.0) and a complete second inference path that runs 8B / 70B / 405B Llama models locally via layer-by-layer loading — no GPU required, no cloud, fully sovereign. Key changes: - src/timmy/backends.py — TimmyAirLLMAgent (same print_response interface as Agno Agent); auto-selects AirLLMMLX on Apple Silicon, AutoModel (PyTorch) everywhere else - src/timmy/agent.py — _resolve_backend() routing with explicit override, env-config, and 'auto' Apple-Silicon detection - src/timmy/cli.py — --backend / --model-size flags on all commands - src/config.py — timmy_model_backend + airllm_model_size settings - src/timmy/prompts.py — mentions AirLLM "even bigger brains, still fully sovereign" - pyproject.toml — bigbrain optional dep; wheel includes updated - .env.example — TIMMY_MODEL_BACKEND + AIRLLM_MODEL_SIZE docs - tests/conftest.py — stubs 'airllm' module so tests run without GPU - tests/test_backends.py — 13 new tests covering helpers + TimmyAirLLMAgent - tests/test_agent.py — 7 new tests for backend routing - README.md — Big Brain section with one-line install - activate_self_tdd.sh — bootstrap script (venv + install + tests + watchdog + dashboard); --big-brain flag All 61 tests pass. Self-TDD watchdog unaffected. https://claude.ai/code/session_01DMjQ5qMZ8iHeyix1j3GS7c
2026-02-21 16:53:16 +00:00
parent 7619407b63
commit 19af4ae540
12 changed files with 601 additions and 13 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,8 +5,8 @@ from unittest.mock import MagicMock
 import pytest
 from fastapi.testclient import TestClient

-# ── Mock agno so tests run without it installed ───────────────────────────────
-# Uses setdefault: real module is used if installed, mock otherwise.
+# ── Stub heavy optional dependencies so tests run without them installed ──────
+# Uses setdefault: real module is used if already installed, mock otherwise.
 for _mod in [
    "agno",
    "agno.agent",
@@ -14,6 +14,9 @@ for _mod in [
    "agno.models.ollama",
    "agno.db",
    "agno.db.sqlite",
+    # AirLLM is optional (bigbrain extra) — stub it so backend tests can
+    # import timmy.backends and instantiate TimmyAirLLMAgent without a GPU.
+    "airllm",
 ]:
    sys.modules.setdefault(_mod, MagicMock())

--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -77,3 +77,77 @@ def test_create_timmy_embeds_system_prompt():

        kwargs = MockAgent.call_args.kwargs
        assert kwargs["description"] == TIMMY_SYSTEM_PROMPT
+
+
+# ── AirLLM path ──────────────────────────────────────────────────────────────
+
+def test_create_timmy_airllm_returns_airllm_agent():
+    """backend='airllm' must return a TimmyAirLLMAgent, not an Agno Agent."""
+    with patch("timmy.backends.is_apple_silicon", return_value=False):
+        from timmy.agent import create_timmy
+        from timmy.backends import TimmyAirLLMAgent
+
+        result = create_timmy(backend="airllm", model_size="8b")
+
+    assert isinstance(result, TimmyAirLLMAgent)
+
+
+def test_create_timmy_airllm_does_not_call_agno_agent():
+    """When using the airllm backend, Agno Agent should never be instantiated."""
+    with patch("timmy.agent.Agent") as MockAgent, \
+         patch("timmy.backends.is_apple_silicon", return_value=False):
+
+        from timmy.agent import create_timmy
+        create_timmy(backend="airllm", model_size="8b")
+
+    MockAgent.assert_not_called()
+
+
+def test_create_timmy_explicit_ollama_ignores_autodetect():
+    """backend='ollama' must always use Ollama, even on Apple Silicon."""
+    with patch("timmy.agent.Agent") as MockAgent, \
+         patch("timmy.agent.Ollama"), \
+         patch("timmy.agent.SqliteDb"):
+
+        from timmy.agent import create_timmy
+        create_timmy(backend="ollama")
+
+    MockAgent.assert_called_once()
+
+
+# ── _resolve_backend ─────────────────────────────────────────────────────────
+
+def test_resolve_backend_explicit_takes_priority():
+    from timmy.agent import _resolve_backend
+    assert _resolve_backend("airllm") == "airllm"
+    assert _resolve_backend("ollama") == "ollama"
+
+
+def test_resolve_backend_defaults_to_ollama_without_config():
+    """Default config (timmy_model_backend='ollama') → 'ollama'."""
+    from timmy.agent import _resolve_backend
+    assert _resolve_backend(None) == "ollama"
+
+
+def test_resolve_backend_auto_uses_airllm_on_apple_silicon():
+    """'auto' on Apple Silicon with airllm stubbed → 'airllm'."""
+    with patch("timmy.backends.is_apple_silicon", return_value=True), \
+         patch("timmy.agent.settings") as mock_settings:
+        mock_settings.timmy_model_backend = "auto"
+        mock_settings.airllm_model_size = "70b"
+        mock_settings.ollama_model = "llama3.2"
+
+        from timmy.agent import _resolve_backend
+        assert _resolve_backend(None) == "airllm"
+
+
+def test_resolve_backend_auto_falls_back_on_non_apple():
+    """'auto' on non-Apple Silicon → 'ollama'."""
+    with patch("timmy.backends.is_apple_silicon", return_value=False), \
+         patch("timmy.agent.settings") as mock_settings:
+        mock_settings.timmy_model_backend = "auto"
+        mock_settings.airllm_model_size = "70b"
+        mock_settings.ollama_model = "llama3.2"
+
+        from timmy.agent import _resolve_backend
+        assert _resolve_backend(None) == "ollama"
--- a/tests/test_backends.py
+++ b/tests/test_backends.py
@@ -0,0 +1,143 @@
+"""Tests for src/timmy/backends.py — AirLLM wrapper and helpers."""
+
+import sys
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ── is_apple_silicon ──────────────────────────────────────────────────────────
+
+def test_is_apple_silicon_true_on_arm_darwin():
+    with patch("timmy.backends.platform.system", return_value="Darwin"), \
+         patch("timmy.backends.platform.machine", return_value="arm64"):
+        from timmy.backends import is_apple_silicon
+        assert is_apple_silicon() is True
+
+
+def test_is_apple_silicon_false_on_linux():
+    with patch("timmy.backends.platform.system", return_value="Linux"), \
+         patch("timmy.backends.platform.machine", return_value="x86_64"):
+        from timmy.backends import is_apple_silicon
+        assert is_apple_silicon() is False
+
+
+def test_is_apple_silicon_false_on_intel_mac():
+    with patch("timmy.backends.platform.system", return_value="Darwin"), \
+         patch("timmy.backends.platform.machine", return_value="x86_64"):
+        from timmy.backends import is_apple_silicon
+        assert is_apple_silicon() is False
+
+
+# ── airllm_available ─────────────────────────────────────────────────────────
+
+def test_airllm_available_true_when_stub_in_sys_modules():
+    # conftest already stubs 'airllm' — importable → True.
+    from timmy.backends import airllm_available
+    assert airllm_available() is True
+
+
+def test_airllm_available_false_when_not_importable():
+    # Temporarily remove the stub to simulate airllm not installed.
+    saved = sys.modules.pop("airllm", None)
+    try:
+        from timmy.backends import airllm_available
+        assert airllm_available() is False
+    finally:
+        if saved is not None:
+            sys.modules["airllm"] = saved
+
+
+# ── TimmyAirLLMAgent construction ────────────────────────────────────────────
+
+def test_airllm_agent_raises_on_unknown_size():
+    from timmy.backends import TimmyAirLLMAgent
+    with pytest.raises(ValueError, match="Unknown model size"):
+        TimmyAirLLMAgent(model_size="3b")
+
+
+def test_airllm_agent_uses_automodel_on_non_apple():
+    """Non-Apple-Silicon path uses AutoModel.from_pretrained."""
+    with patch("timmy.backends.is_apple_silicon", return_value=False):
+        from timmy.backends import TimmyAirLLMAgent
+        agent = TimmyAirLLMAgent(model_size="8b")
+    # sys.modules["airllm"] is a MagicMock; AutoModel.from_pretrained was called.
+    assert sys.modules["airllm"].AutoModel.from_pretrained.called
+
+
+def test_airllm_agent_uses_mlx_on_apple_silicon():
+    """Apple Silicon path uses AirLLMMLX, not AutoModel."""
+    with patch("timmy.backends.is_apple_silicon", return_value=True):
+        from timmy.backends import TimmyAirLLMAgent
+        agent = TimmyAirLLMAgent(model_size="8b")
+    assert sys.modules["airllm"].AirLLMMLX.called
+
+
+def test_airllm_agent_resolves_correct_model_id_for_70b():
+    with patch("timmy.backends.is_apple_silicon", return_value=False):
+        from timmy.backends import TimmyAirLLMAgent, _AIRLLM_MODELS
+        TimmyAirLLMAgent(model_size="70b")
+    sys.modules["airllm"].AutoModel.from_pretrained.assert_called_with(
+        _AIRLLM_MODELS["70b"]
+    )
+
+
+# ── TimmyAirLLMAgent.print_response ──────────────────────────────────────────
+
+def _make_agent(model_size: str = "8b") -> "TimmyAirLLMAgent":
+    """Helper: create an agent with a fully mocked underlying model."""
+    with patch("timmy.backends.is_apple_silicon", return_value=False):
+        from timmy.backends import TimmyAirLLMAgent
+        agent = TimmyAirLLMAgent(model_size=model_size)
+
+    # Replace the underlying model with a clean mock that returns predictable output.
+    mock_model = MagicMock()
+    mock_tokenizer = MagicMock()
+    # tokenizer() returns a dict-like object with an "input_ids" tensor mock.
+    input_ids_mock = MagicMock()
+    input_ids_mock.shape = [1, 10]  # shape[1] = prompt token count = 10
+    token_dict = {"input_ids": input_ids_mock}
+    mock_tokenizer.return_value = token_dict
+    # generate() returns a list of token sequences.
+    mock_tokenizer.decode.return_value = "Sir, affirmative."
+    mock_model.tokenizer = mock_tokenizer
+    mock_model.generate.return_value = [list(range(15))]  # 15 tokens total
+    agent._model = mock_model
+    return agent
+
+
+def test_print_response_calls_generate():
+    agent = _make_agent()
+    agent.print_response("What is sovereignty?", stream=True)
+    agent._model.generate.assert_called_once()
+
+
+def test_print_response_decodes_only_generated_tokens():
+    agent = _make_agent()
+    agent.print_response("Hello", stream=False)
+    # decode should be called with tokens starting at index 10 (prompt length).
+    decode_call = agent._model.tokenizer.decode.call_args
+    token_slice = decode_call[0][0]
+    assert list(token_slice) == list(range(10, 15))
+
+
+def test_print_response_updates_history():
+    agent = _make_agent()
+    agent.print_response("First message")
+    assert any("First message" in turn for turn in agent._history)
+    assert any("Timmy:" in turn for turn in agent._history)
+
+
+def test_print_response_history_included_in_second_prompt():
+    agent = _make_agent()
+    agent.print_response("First")
+    # Build the prompt for the second call — history should appear.
+    prompt = agent._build_prompt("Second")
+    assert "First" in prompt
+    assert "Second" in prompt
+
+
+def test_print_response_stream_flag_accepted():
+    """stream=False should not raise — it's accepted for API compatibility."""
+    agent = _make_agent()
+    agent.print_response("hello", stream=False)  # no error