forked from Rockachopa/Timmy-time-dashboard
Adds the `bigbrain` optional dependency group (airllm>=2.9.0) and a
complete second inference path that runs 8B / 70B / 405B Llama models
locally via layer-by-layer loading — no GPU required, no cloud, fully
sovereign.
Key changes:
- src/timmy/backends.py — TimmyAirLLMAgent (same print_response interface
as Agno Agent); auto-selects AirLLMMLX on Apple
Silicon, AutoModel (PyTorch) everywhere else
- src/timmy/agent.py — _resolve_backend() routing with explicit override,
env-config, and 'auto' Apple-Silicon detection
- src/timmy/cli.py — --backend / --model-size flags on all commands
- src/config.py — timmy_model_backend + airllm_model_size settings
- src/timmy/prompts.py — mentions AirLLM "even bigger brains, still fully
sovereign"
- pyproject.toml — bigbrain optional dep; wheel includes updated
- .env.example — TIMMY_MODEL_BACKEND + AIRLLM_MODEL_SIZE docs
- tests/conftest.py — stubs 'airllm' module so tests run without GPU
- tests/test_backends.py — 13 new tests covering helpers + TimmyAirLLMAgent
- tests/test_agent.py — 7 new tests for backend routing
- README.md — Big Brain section with one-line install
- activate_self_tdd.sh — bootstrap script (venv + install + tests +
watchdog + dashboard); --big-brain flag
All 61 tests pass. Self-TDD watchdog unaffected.
https://claude.ai/code/session_01DMjQ5qMZ8iHeyix1j3GS7c
74 lines
2.3 KiB
Python
74 lines
2.3 KiB
Python
from typing import TYPE_CHECKING, Union
|
|
|
|
from agno.agent import Agent
|
|
from agno.db.sqlite import SqliteDb
|
|
from agno.models.ollama import Ollama
|
|
|
|
from config import settings
|
|
from timmy.prompts import TIMMY_SYSTEM_PROMPT
|
|
|
|
if TYPE_CHECKING:
|
|
from timmy.backends import TimmyAirLLMAgent
|
|
|
|
# Union type for callers that want to hint the return type.
|
|
TimmyAgent = Union[Agent, "TimmyAirLLMAgent"]
|
|
|
|
|
|
def _resolve_backend(requested: str | None) -> str:
|
|
"""Return the backend name to use, resolving 'auto' and explicit overrides.
|
|
|
|
Priority (highest → lowest):
|
|
1. CLI flag passed directly to create_timmy()
|
|
2. TIMMY_MODEL_BACKEND env var / .env setting
|
|
3. 'ollama' (safe default — no surprises)
|
|
|
|
'auto' triggers Apple Silicon detection: uses AirLLM if both
|
|
is_apple_silicon() and airllm_available() return True.
|
|
"""
|
|
if requested is not None:
|
|
return requested
|
|
|
|
configured = settings.timmy_model_backend # "ollama" | "airllm" | "auto"
|
|
if configured != "auto":
|
|
return configured
|
|
|
|
# "auto" path — lazy import to keep startup fast and tests clean.
|
|
from timmy.backends import airllm_available, is_apple_silicon
|
|
if is_apple_silicon() and airllm_available():
|
|
return "airllm"
|
|
return "ollama"
|
|
|
|
|
|
def create_timmy(
|
|
db_file: str = "timmy.db",
|
|
backend: str | None = None,
|
|
model_size: str | None = None,
|
|
) -> TimmyAgent:
|
|
"""Instantiate Timmy — Ollama or AirLLM, same public interface either way.
|
|
|
|
Args:
|
|
db_file: SQLite file for Agno conversation memory (Ollama path only).
|
|
backend: "ollama" | "airllm" | "auto" | None (reads config/env).
|
|
model_size: AirLLM size — "8b" | "70b" | "405b" | None (reads config).
|
|
|
|
Returns an Agno Agent (Ollama) or TimmyAirLLMAgent — both expose
|
|
print_response(message, stream).
|
|
"""
|
|
resolved = _resolve_backend(backend)
|
|
size = model_size or settings.airllm_model_size
|
|
|
|
if resolved == "airllm":
|
|
from timmy.backends import TimmyAirLLMAgent
|
|
return TimmyAirLLMAgent(model_size=size)
|
|
|
|
# Default: Ollama via Agno.
|
|
return Agent(
|
|
name="Timmy",
|
|
model=Ollama(id=settings.ollama_model),
|
|
db=SqliteDb(db_file=db_file),
|
|
description=TIMMY_SYSTEM_PROMPT,
|
|
add_history_to_context=True,
|
|
num_history_runs=10,
|
|
markdown=True,
|
|
)
|