diff --git a/src/config.py b/src/config.py
index 54cc7118..4f87bfe4 100644
--- a/src/config.py
+++ b/src/config.py
@@ -41,13 +41,12 @@ class Settings(BaseSettings):
     # "airllm"  — always use AirLLM (requires pip install ".[bigbrain]")
     # "auto"    — use AirLLM on Apple Silicon if airllm is installed,
     #             fall back to Ollama otherwise
-    timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "airllm"
+    timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama"
 
     # AirLLM model size when backend is airllm or auto.
     # Larger = smarter, but needs more RAM / disk.
     # 8b  ~16 GB  |  70b  ~140 GB  |  405b  ~810 GB
-    # Optimization: Llama-3.3-70B-Instruct-MLX is recommended for Mac M3 36GB.
-    airllm_model_size: str = "70b"
+    airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
 
     # ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
     # Grok is a premium augmentation layer — local-first ethos preserved.
diff --git a/src/timmy/backends.py b/src/timmy/backends.py
index 127f4d7a..91c6f363 100644
--- a/src/timmy/backends.py
+++ b/src/timmy/backends.py
@@ -23,10 +23,9 @@ from timmy.prompts import SYSTEM_PROMPT
 logger = logging.getLogger(__name__)
 
 # HuggingFace model IDs for each supported size.
-# 70b is optimized for Mac M3 36GB using Llama-3.3-70B-Instruct-MLX.
 _AIRLLM_MODELS: dict[str, str] = {
     "8b": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    "70b": "meta-llama/Llama-3.3-70B-Instruct-MLX",
+    "70b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
     "405b": "meta-llama/Meta-Llama-3.1-405B-Instruct",
 }
 
@@ -129,10 +128,7 @@ class TimmyAirLLMAgent:
     # ── private helpers ──────────────────────────────────────────────────────
 
     def _build_prompt(self, message: str) -> str:
-        from timmy.prompts import get_system_prompt
-        # AirLLM uses 70b by default which is tool-capable
-        system_prompt = get_system_prompt(tools_enabled=True)
-        context = system_prompt + "\n\n"
+        context = SYSTEM_PROMPT + "\n\n"
         # Include the last 10 turns (5 exchanges) for continuity.
         if self._history:
             context += "\n".join(self._history[-10:]) + "\n\n"