diff --git a/src/config.py b/src/config.py index 54cc7118..4f87bfe4 100644 --- a/src/config.py +++ b/src/config.py @@ -41,13 +41,12 @@ class Settings(BaseSettings): # "airllm" — always use AirLLM (requires pip install ".[bigbrain]") # "auto" — use AirLLM on Apple Silicon if airllm is installed, # fall back to Ollama otherwise - timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "airllm" + timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama" # AirLLM model size when backend is airllm or auto. # Larger = smarter, but needs more RAM / disk. # 8b ~16 GB | 70b ~140 GB | 405b ~810 GB - # Optimization: Llama-3.3-70B-Instruct-MLX is recommended for Mac M3 36GB. - airllm_model_size: str = "70b" + airllm_model_size: Literal["8b", "70b", "405b"] = "70b" # ── Grok (xAI) — opt-in premium cloud backend ──────────────────────── # Grok is a premium augmentation layer — local-first ethos preserved. diff --git a/src/timmy/backends.py b/src/timmy/backends.py index 127f4d7a..91c6f363 100644 --- a/src/timmy/backends.py +++ b/src/timmy/backends.py @@ -23,10 +23,9 @@ from timmy.prompts import SYSTEM_PROMPT logger = logging.getLogger(__name__) # HuggingFace model IDs for each supported size. -# 70b is optimized for Mac M3 36GB using Llama-3.3-70B-Instruct-MLX. _AIRLLM_MODELS: dict[str, str] = { "8b": "meta-llama/Meta-Llama-3.1-8B-Instruct", - "70b": "meta-llama/Llama-3.3-70B-Instruct-MLX", + "70b": "meta-llama/Meta-Llama-3.1-70B-Instruct", "405b": "meta-llama/Meta-Llama-3.1-405B-Instruct", } @@ -129,10 +128,7 @@ class TimmyAirLLMAgent: # ── private helpers ────────────────────────────────────────────────────── def _build_prompt(self, message: str) -> str: - from timmy.prompts import get_system_prompt - # AirLLM uses 70b by default which is tool-capable - system_prompt = get_system_prompt(tools_enabled=True) - context = system_prompt + "\n\n" + context = SYSTEM_PROMPT + "\n\n" # Include the last 10 turns (5 exchanges) for continuity. if self._history: context += "\n".join(self._history[-10:]) + "\n\n"