feat: update default backend to AirLLM and optimize for Mac M3 36GB
This commit is contained in:
@@ -41,12 +41,13 @@ class Settings(BaseSettings):
|
||||
# "airllm" — always use AirLLM (requires pip install ".[bigbrain]")
|
||||
# "auto" — use AirLLM on Apple Silicon if airllm is installed,
|
||||
# fall back to Ollama otherwise
|
||||
timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama"
|
||||
timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "airllm"
|
||||
|
||||
# AirLLM model size when backend is airllm or auto.
|
||||
# Larger = smarter, but needs more RAM / disk.
|
||||
# 8b ~16 GB | 70b ~140 GB | 405b ~810 GB
|
||||
airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
|
||||
# Optimization: Llama-3.3-70B-Instruct-MLX is recommended for Mac M3 36GB.
|
||||
airllm_model_size: str = "70b"
|
||||
|
||||
# ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
|
||||
# Grok is a premium augmentation layer — local-first ethos preserved.
|
||||
|
||||
@@ -23,9 +23,10 @@ from timmy.prompts import SYSTEM_PROMPT
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# HuggingFace model IDs for each supported size.
|
||||
# 70b is optimized for Mac M3 36GB using Llama-3.3-70B-Instruct-MLX.
|
||||
_AIRLLM_MODELS: dict[str, str] = {
|
||||
"8b": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"70b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
||||
"70b": "meta-llama/Llama-3.3-70B-Instruct-MLX",
|
||||
"405b": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
||||
}
|
||||
|
||||
@@ -128,7 +129,10 @@ class TimmyAirLLMAgent:
|
||||
# ── private helpers ──────────────────────────────────────────────────────
|
||||
|
||||
def _build_prompt(self, message: str) -> str:
|
||||
context = SYSTEM_PROMPT + "\n\n"
|
||||
from timmy.prompts import get_system_prompt
|
||||
# AirLLM uses 70b by default which is tool-capable
|
||||
system_prompt = get_system_prompt(tools_enabled=True)
|
||||
context = system_prompt + "\n\n"
|
||||
# Include the last 10 turns (5 exchanges) for continuity.
|
||||
if self._history:
|
||||
context += "\n".join(self._history[-10:]) + "\n\n"
|
||||
|
||||
Reference in New Issue
Block a user