[claude] Document and validate AirLLM Apple Silicon requirements (#1284) (#1298)

2026-03-24 01:52:16 +00:00
parent fc0a94202f
commit 00ef4fbd22
5 changed files with 178 additions and 3 deletions
--- a/.env.example
+++ b/.env.example
@@ -27,8 +27,12 @@
 # ── AirLLM / big-brain backend ───────────────────────────────────────────────
 # Inference backend: "ollama" (default) | "airllm" | "auto"
-#   "auto" → uses AirLLM on Apple Silicon if installed, otherwise Ollama.
+#   "ollama"  → always use Ollama (safe everywhere, any OS)
-#   Requires: pip install ".[bigbrain]"
+#   "airllm"  → AirLLM layer-by-layer loading (Apple Silicon M1/M2/M3/M4 only)
 #               Requires 16 GB RAM minimum (32 GB recommended).
 #               Automatically falls back to Ollama on Intel Mac or Linux.
 #               Install extra: pip install "airllm[mlx]"
 #   "auto"    → use AirLLM on Apple Silicon if installed, otherwise Ollama
 # TIMMY_MODEL_BACKEND=ollama
 # AirLLM model size (default: 70b).
--- a/README.md
+++ b/README.md
@@ -9,6 +9,21 @@ API access with Bitcoin Lightning — all from a browser, no cloud AI required.
 ---
 ## System Requirements
 | Path | Hardware | RAM | Disk |
 |------|----------|-----|------|
 | **Ollama** (default) | Any OS — x86-64 or ARM | 8 GB min | 5–10 GB (model files) |
 | **AirLLM** (Apple Silicon) | M1, M2, M3, or M4 Mac | 16 GB min (32 GB recommended) | ~15 GB free |
 **Ollama path** runs on any modern machine — macOS, Linux, or Windows.  No GPU required.
 **AirLLM path** uses layer-by-layer loading for 70B+ models without a GPU.  Requires Apple
 Silicon and the `bigbrain` extras (`pip install ".[bigbrain]"`).  On Intel Mac or Linux the
 app automatically falls back to Ollama — no crash, no config change needed.
 ---
 ## Quick Start
 ```bash
--- a/src/config.py
+++ b/src/config.py
@@ -94,8 +94,9 @@ class Settings(BaseSettings):
    # ── Backend selection ────────────────────────────────────────────────────
    # "ollama"  — always use Ollama (default, safe everywhere)
    # "airllm"  — AirLLM layer-by-layer loading (Apple Silicon only; degrades to Ollama)
    # "auto"    — pick best available local backend, fall back to Ollama
-    timmy_model_backend: Literal["ollama", "grok", "claude", "auto"] = "ollama"
+    timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama"
    # ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
    # Grok is a premium augmentation layer — local-first ethos preserved.
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -301,6 +301,26 @@ def create_timmy(
        return GrokBackend()
    if resolved == "airllm":
        # AirLLM requires Apple Silicon.  On any other platform (Intel Mac, Linux,
        # Windows) or when the package is not installed, degrade silently to Ollama.
        from timmy.backends import is_apple_silicon
        if not is_apple_silicon():
            logger.warning(
                "TIMMY_MODEL_BACKEND=airllm requested but not running on Apple Silicon "
                "— falling back to Ollama"
            )
        else:
            try:
                import airllm  # noqa: F401
            except ImportError:
                logger.warning(
                    "AirLLM not installed — falling back to Ollama. "
                    "Install with: pip install 'airllm[mlx]'"
                )
        # Fall through to Ollama in all cases (AirLLM integration is scaffolded)
    # Default: Ollama via Agno.
    model_name, is_fallback = _resolve_model_with_fallback(
        requested_model=None,
--- a/tests/unit/test_airllm_backend.py
+++ b/tests/unit/test_airllm_backend.py
@@ -0,0 +1,135 @@
 """Unit tests for AirLLM backend graceful degradation.
 Verifies that setting TIMMY_MODEL_BACKEND=airllm on non-Apple-Silicon hardware
 (Intel Mac, Linux, Windows) or when the airllm package is not installed
 falls back to the Ollama backend without crashing.
 Refs #1284
 """
 import sys
 from unittest.mock import MagicMock, patch
 import pytest
 pytestmark = pytest.mark.unit
 class TestIsAppleSilicon:
    """is_apple_silicon() correctly identifies the host platform."""
    def test_returns_true_on_arm64_darwin(self):
        from timmy.backends import is_apple_silicon
        with patch("platform.system", return_value="Darwin"), patch(
            "platform.machine", return_value="arm64"
        ):
            assert is_apple_silicon() is True
    def test_returns_false_on_intel_mac(self):
        from timmy.backends import is_apple_silicon
        with patch("platform.system", return_value="Darwin"), patch(
            "platform.machine", return_value="x86_64"
        ):
            assert is_apple_silicon() is False
    def test_returns_false_on_linux(self):
        from timmy.backends import is_apple_silicon
        with patch("platform.system", return_value="Linux"), patch(
            "platform.machine", return_value="x86_64"
        ):
            assert is_apple_silicon() is False
    def test_returns_false_on_windows(self):
        from timmy.backends import is_apple_silicon
        with patch("platform.system", return_value="Windows"), patch(
            "platform.machine", return_value="AMD64"
        ):
            assert is_apple_silicon() is False
 class TestAirLLMGracefulDegradation:
    """create_timmy(backend='airllm') falls back to Ollama on unsupported platforms."""
    def _make_fake_ollama_agent(self):
        """Return a lightweight stub that satisfies the Agno Agent interface."""
        agent = MagicMock()
        agent.run = MagicMock(return_value=MagicMock(content="ok"))
        return agent
    def test_falls_back_to_ollama_on_non_apple_silicon(self, caplog):
        """On Intel/Linux, airllm backend logs a warning and creates an Ollama agent."""
        import logging
        from timmy.agent import create_timmy
        fake_agent = self._make_fake_ollama_agent()
        with (
            patch("timmy.backends.is_apple_silicon", return_value=False),
            patch("timmy.agent._create_ollama_agent", return_value=fake_agent) as mock_create,
            patch("timmy.agent._resolve_model_with_fallback", return_value=("qwen3:8b", False)),
            patch("timmy.agent._check_model_available", return_value=True),
            patch("timmy.agent._build_tools_list", return_value=[]),
            patch("timmy.agent._build_prompt", return_value="test prompt"),
            caplog.at_level(logging.WARNING, logger="timmy.agent"),
        ):
            result = create_timmy(backend="airllm")
        assert result is fake_agent
        mock_create.assert_called_once()
        assert "Apple Silicon" in caplog.text
    def test_falls_back_to_ollama_when_airllm_not_installed(self, caplog):
        """When the airllm package is missing, log a warning and use Ollama."""
        import logging
        from timmy.agent import create_timmy
        fake_agent = self._make_fake_ollama_agent()
        # Simulate Apple Silicon + missing airllm package
        def _import_side_effect(name, *args, **kwargs):
            if name == "airllm":
                raise ImportError("No module named 'airllm'")
            return original_import(name, *args, **kwargs)
        original_import = __builtins__["__import__"] if isinstance(__builtins__, dict) else __import__
        with (
            patch("timmy.backends.is_apple_silicon", return_value=True),
            patch("builtins.__import__", side_effect=_import_side_effect),
            patch("timmy.agent._create_ollama_agent", return_value=fake_agent) as mock_create,
            patch("timmy.agent._resolve_model_with_fallback", return_value=("qwen3:8b", False)),
            patch("timmy.agent._check_model_available", return_value=True),
            patch("timmy.agent._build_tools_list", return_value=[]),
            patch("timmy.agent._build_prompt", return_value="test prompt"),
            caplog.at_level(logging.WARNING, logger="timmy.agent"),
        ):
            result = create_timmy(backend="airllm")
        assert result is fake_agent
        mock_create.assert_called_once()
        assert "airllm" in caplog.text.lower() or "AirLLM" in caplog.text
    def test_airllm_backend_does_not_raise(self):
        """create_timmy(backend='airllm') never raises — it degrades gracefully."""
        from timmy.agent import create_timmy
        fake_agent = self._make_fake_ollama_agent()
        with (
            patch("timmy.backends.is_apple_silicon", return_value=False),
            patch("timmy.agent._create_ollama_agent", return_value=fake_agent),
            patch("timmy.agent._resolve_model_with_fallback", return_value=("qwen3:8b", False)),
            patch("timmy.agent._check_model_available", return_value=True),
            patch("timmy.agent._build_tools_list", return_value=[]),
            patch("timmy.agent._build_prompt", return_value="test prompt"),
        ):
            # Should not raise under any circumstances
            result = create_timmy(backend="airllm")
        assert result is not None