feat: integrate AirLLM as optional high-performance backend

Adds the `bigbrain` optional dependency group (airllm>=2.9.0) and a complete second inference path that runs 8B / 70B / 405B Llama models locally via layer-by-layer loading — no GPU required, no cloud, fully sovereign. Key changes: - src/timmy/backends.py — TimmyAirLLMAgent (same print_response interface as Agno Agent); auto-selects AirLLMMLX on Apple Silicon, AutoModel (PyTorch) everywhere else - src/timmy/agent.py — _resolve_backend() routing with explicit override, env-config, and 'auto' Apple-Silicon detection - src/timmy/cli.py — --backend / --model-size flags on all commands - src/config.py — timmy_model_backend + airllm_model_size settings - src/timmy/prompts.py — mentions AirLLM "even bigger brains, still fully sovereign" - pyproject.toml — bigbrain optional dep; wheel includes updated - .env.example — TIMMY_MODEL_BACKEND + AIRLLM_MODEL_SIZE docs - tests/conftest.py — stubs 'airllm' module so tests run without GPU - tests/test_backends.py — 13 new tests covering helpers + TimmyAirLLMAgent - tests/test_agent.py — 7 new tests for backend routing - README.md — Big Brain section with one-line install - activate_self_tdd.sh — bootstrap script (venv + install + tests + watchdog + dashboard); --big-brain flag All 61 tests pass. Self-TDD watchdog unaffected. https://claude.ai/code/session_01DMjQ5qMZ8iHeyix1j3GS7c
2026-02-21 16:53:16 +00:00
parent 7619407b63
commit 19af4ae540
12 changed files with 601 additions and 13 deletions
--- a/src/timmy/cli.py
+++ b/src/timmy/cli.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import typer

 from timmy.agent import create_timmy
@@ -5,25 +7,50 @@ from timmy.prompts import TIMMY_STATUS_PROMPT

 app = typer.Typer(help="Timmy — sovereign AI agent")

+# Shared option definitions (reused across commands for consistency).
+_BACKEND_OPTION = typer.Option(
+    None,
+    "--backend",
+    "-b",
+    help="Inference backend: 'ollama' (default) | 'airllm' | 'auto'",
+)
+_MODEL_SIZE_OPTION = typer.Option(
+    None,
+    "--model-size",
+    "-s",
+    help="AirLLM model size when --backend airllm: '8b' | '70b' | '405b'",
+)
+

@app.command()
-def think(topic: str = typer.Argument(..., help="Topic to reason about")):
+def think(
+    topic: str = typer.Argument(..., help="Topic to reason about"),
+    backend: Optional[str] = _BACKEND_OPTION,
+    model_size: Optional[str] = _MODEL_SIZE_OPTION,
+):
    """Ask Timmy to think carefully about a topic."""
-    timmy = create_timmy()
+    timmy = create_timmy(backend=backend, model_size=model_size)
    timmy.print_response(f"Think carefully about: {topic}", stream=True)


@app.command()
-def chat(message: str = typer.Argument(..., help="Message to send")):
+def chat(
+    message: str = typer.Argument(..., help="Message to send"),
+    backend: Optional[str] = _BACKEND_OPTION,
+    model_size: Optional[str] = _MODEL_SIZE_OPTION,
+):
    """Send a message to Timmy."""
-    timmy = create_timmy()
+    timmy = create_timmy(backend=backend, model_size=model_size)
    timmy.print_response(message, stream=True)


@app.command()
-def status():
+def status(
+    backend: Optional[str] = _BACKEND_OPTION,
+    model_size: Optional[str] = _MODEL_SIZE_OPTION,
+):
    """Print Timmy's operational status."""
-    timmy = create_timmy()
+    timmy = create_timmy(backend=backend, model_size=model_size)
    timmy.print_response(TIMMY_STATUS_PROMPT, stream=False)