Merge pull request #4 from Alexspayne/claude/implement-todo-item-61a2S
This commit is contained in:
10
.env.example
10
.env.example
@@ -11,3 +11,13 @@
|
||||
|
||||
# Enable FastAPI interactive docs at /docs and /redoc (default: false)
|
||||
# DEBUG=true
|
||||
|
||||
# ── AirLLM / big-brain backend ───────────────────────────────────────────────
|
||||
# Inference backend: "ollama" (default) | "airllm" | "auto"
|
||||
# "auto" → uses AirLLM on Apple Silicon if installed, otherwise Ollama.
|
||||
# Requires: pip install ".[bigbrain]"
|
||||
# TIMMY_MODEL_BACKEND=ollama
|
||||
|
||||
# AirLLM model size (default: 70b).
|
||||
# 8b ~16 GB RAM | 70b ~140 GB RAM | 405b ~810 GB RAM
|
||||
# AIRLLM_MODEL_SIZE=70b
|
||||
|
||||
59
README.md
59
README.md
@@ -138,6 +138,65 @@ timmy status
|
||||
|
||||
---
|
||||
|
||||
## Big Brain — AirLLM backend (Apple Silicon / large RAM)
|
||||
|
||||
Run 70B or 405B models locally with no GPU required, using AirLLM's
|
||||
layer-by-layer loading strategy. On M-series Macs the MLX backend is
|
||||
selected automatically for maximum throughput. Everything stays local.
|
||||
No cloud. No telemetry. Sats are sovereignty, boss.
|
||||
|
||||
### One-line install
|
||||
|
||||
```bash
|
||||
pip install ".[bigbrain]"
|
||||
# Apple Silicon only — adds the MLX-accelerated backend:
|
||||
pip install "airllm[mlx]"
|
||||
```
|
||||
|
||||
### Run with the big brain
|
||||
|
||||
```bash
|
||||
# Explicit flag — works anywhere airllm is installed:
|
||||
timmy chat "Explain self-custody" --backend airllm --model-size 70b
|
||||
|
||||
# Or set it once in .env and forget about it:
|
||||
echo "TIMMY_MODEL_BACKEND=auto" >> .env
|
||||
echo "AIRLLM_MODEL_SIZE=70b" >> .env
|
||||
timmy chat "What is sovereignty?"
|
||||
```
|
||||
|
||||
`--backend auto` (or `TIMMY_MODEL_BACKEND=auto`) selects AirLLM automatically
|
||||
on Apple Silicon when the package is installed, and falls back to Ollama
|
||||
everywhere else — so the same `.env` works on any machine.
|
||||
|
||||
### Model sizes
|
||||
|
||||
| Flag | Parameters | Approx. RAM needed |
|
||||
|------|-----------|-------------------|
|
||||
| `8b` | 8 billion | ~16 GB |
|
||||
| `70b` | 70 billion | ~140 GB |
|
||||
| `405b` | 405 billion | ~810 GB |
|
||||
|
||||
Models are downloaded from HuggingFace on first run and cached locally.
|
||||
You need a HuggingFace account and `huggingface-cli login` for gated models
|
||||
(Llama 3.1 requires accepting Meta's license at hf.co/meta-llama).
|
||||
|
||||
### Architecture with AirLLM
|
||||
|
||||
```
|
||||
timmy chat --backend airllm
|
||||
│
|
||||
▼
|
||||
TimmyAirLLMAgent (src/timmy/backends.py)
|
||||
│
|
||||
├─ Apple Silicon? ──► AirLLMMLX (MLX tensors, Metal GPU)
|
||||
└─ Everything else ──► AutoModel (PyTorch, CPU/CUDA)
|
||||
│
|
||||
└─ Layers loaded on-demand from ~/.cache/huggingface/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```mermaid
|
||||
|
||||
74
activate_self_tdd.sh
Executable file
74
activate_self_tdd.sh
Executable file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env bash
|
||||
# activate_self_tdd.sh — Timmy Time dev environment bootstrapper
|
||||
#
|
||||
# Usage:
|
||||
# bash activate_self_tdd.sh # standard (Ollama) setup
|
||||
# bash activate_self_tdd.sh --big-brain # install AirLLM extra too
|
||||
#
|
||||
# What it does:
|
||||
# 1. Creates a Python venv (or reuses an existing one)
|
||||
# 2. Installs Timmy Time (+ dev deps, optionally bigbrain)
|
||||
# 3. Runs the full test suite — aborts if anything fails
|
||||
# 4. Launches the self-TDD watchdog in the background
|
||||
# 5. Starts the dashboard
|
||||
#
|
||||
# Everything stays local. No cloud. Sats are sovereignty, boss.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
VENV_DIR="$REPO_DIR/.venv"
|
||||
BIG_BRAIN=0
|
||||
|
||||
for arg in "$@"; do
|
||||
case $arg in
|
||||
--big-brain) BIG_BRAIN=1 ;;
|
||||
*) echo "Unknown argument: $arg"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "==> Timmy Time — sovereign AI agent bootstrapper"
|
||||
echo " Working directory: $REPO_DIR"
|
||||
|
||||
# ── 1. Virtual environment ────────────────────────────────────────────────────
|
||||
if [[ ! -d "$VENV_DIR" ]]; then
|
||||
echo "==> Creating virtual environment..."
|
||||
python3 -m venv "$VENV_DIR"
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source "$VENV_DIR/bin/activate"
|
||||
echo "==> Virtual environment active: $VENV_DIR"
|
||||
|
||||
# ── 2. Install dependencies ───────────────────────────────────────────────────
|
||||
if [[ $BIG_BRAIN -eq 1 ]]; then
|
||||
echo "==> Installing with bigbrain extra (AirLLM)..."
|
||||
pip install --quiet -e "$REPO_DIR[dev,bigbrain]"
|
||||
# On Apple Silicon, also install the MLX backend.
|
||||
if [[ "$(uname -m)" == "arm64" && "$(uname -s)" == "Darwin" ]]; then
|
||||
echo "==> Apple Silicon detected — installing AirLLM MLX backend..."
|
||||
pip install --quiet "airllm[mlx]"
|
||||
fi
|
||||
else
|
||||
echo "==> Installing standard dependencies..."
|
||||
pip install --quiet -e "$REPO_DIR[dev]"
|
||||
fi
|
||||
|
||||
# ── 3. Run tests ──────────────────────────────────────────────────────────────
|
||||
echo "==> Running test suite..."
|
||||
python -m pytest "$REPO_DIR/tests/" -q --tb=short
|
||||
echo "==> All tests passed."
|
||||
|
||||
# ── 4. Self-TDD watchdog (background) ────────────────────────────────────────
|
||||
echo "==> Starting self-TDD watchdog (60s interval) in background..."
|
||||
self-tdd watch --interval 60 &
|
||||
WATCHDOG_PID=$!
|
||||
echo " Watchdog PID: $WATCHDOG_PID"
|
||||
echo " Kill with: kill $WATCHDOG_PID"
|
||||
|
||||
# ── 5. Dashboard ─────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "==> Starting Timmy Time dashboard at http://localhost:8000"
|
||||
echo " Ctrl-C stops the dashboard (watchdog continues until you kill it)"
|
||||
echo ""
|
||||
uvicorn dashboard.app:app --reload --host 0.0.0.0 --port 8000
|
||||
@@ -28,13 +28,20 @@ dev = [
|
||||
"pytest-asyncio>=0.24.0",
|
||||
"pytest-cov>=5.0.0",
|
||||
]
|
||||
# Big-brain: run 8B / 70B / 405B models locally via layer-by-layer loading.
|
||||
# pip install ".[bigbrain]"
|
||||
# On Apple Silicon: pip install "airllm[mlx]" for the MLX-accelerated backend.
|
||||
bigbrain = [
|
||||
"airllm>=2.9.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
timmy = "timmy.cli:main"
|
||||
self-tdd = "self_tdd.watchdog:main"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
sources = {"src" = ""}
|
||||
include = ["src/timmy", "src/dashboard", "src/config.py"]
|
||||
include = ["src/timmy", "src/dashboard", "src/config.py", "src/self_tdd"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from typing import Literal
|
||||
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
@@ -11,6 +13,18 @@ class Settings(BaseSettings):
|
||||
# Set DEBUG=true to enable /docs and /redoc (disabled by default)
|
||||
debug: bool = False
|
||||
|
||||
# ── AirLLM / backend selection ───────────────────────────────────────────
|
||||
# "ollama" — always use Ollama (default, safe everywhere)
|
||||
# "airllm" — always use AirLLM (requires pip install ".[bigbrain]")
|
||||
# "auto" — use AirLLM on Apple Silicon if airllm is installed,
|
||||
# fall back to Ollama otherwise
|
||||
timmy_model_backend: Literal["ollama", "airllm", "auto"] = "ollama"
|
||||
|
||||
# AirLLM model size when backend is airllm or auto.
|
||||
# Larger = smarter, but needs more RAM / disk.
|
||||
# 8b ~16 GB | 70b ~140 GB | 405b ~810 GB
|
||||
airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
|
||||
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
|
||||
0
src/self_tdd/__init__.py
Normal file
0
src/self_tdd/__init__.py
Normal file
71
src/self_tdd/watchdog.py
Normal file
71
src/self_tdd/watchdog.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""Self-TDD Watchdog — polls pytest on a schedule and reports regressions.
|
||||
|
||||
Run in a terminal alongside your normal dev work:
|
||||
|
||||
self-tdd watch
|
||||
self-tdd watch --interval 30
|
||||
|
||||
The watchdog runs silently while tests pass. When a regression appears it
|
||||
prints the full short-traceback output so you can see exactly what broke.
|
||||
No files are modified; no commits are made. Ctrl-C to stop.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import typer
|
||||
|
||||
# Project root is three levels up from src/self_tdd/watchdog.py
|
||||
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
||||
|
||||
app = typer.Typer(help="Self-TDD watchdog — continuous test runner")
|
||||
|
||||
|
||||
def _run_tests() -> tuple[bool, str]:
|
||||
"""Run the test suite and return (passed, combined_output)."""
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "pytest", "tests/", "-q", "--tb=short"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=PROJECT_ROOT,
|
||||
timeout=60,
|
||||
)
|
||||
return result.returncode == 0, (result.stdout + result.stderr).strip()
|
||||
|
||||
|
||||
@app.command()
|
||||
def watch(
|
||||
interval: int = typer.Option(60, "--interval", "-i", help="Seconds between test runs"),
|
||||
) -> None:
|
||||
"""Poll pytest continuously and print regressions as they appear."""
|
||||
typer.echo(f"Self-TDD watchdog started — polling every {interval}s. Ctrl-C to stop.")
|
||||
last_passed: bool | None = None
|
||||
|
||||
try:
|
||||
while True:
|
||||
passed, output = _run_tests()
|
||||
stamp = datetime.now().strftime("%H:%M:%S")
|
||||
|
||||
if passed:
|
||||
if last_passed is not True:
|
||||
typer.secho(f"[{stamp}] All tests passing.", fg=typer.colors.GREEN)
|
||||
else:
|
||||
typer.secho(f"[{stamp}] Regression detected:", fg=typer.colors.RED)
|
||||
typer.echo(output)
|
||||
|
||||
last_passed = passed
|
||||
time.sleep(interval)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
typer.echo("\nWatchdog stopped.")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
app()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,13 +1,67 @@
|
||||
from typing import TYPE_CHECKING, Union
|
||||
|
||||
from agno.agent import Agent
|
||||
from agno.models.ollama import Ollama
|
||||
from agno.db.sqlite import SqliteDb
|
||||
from agno.models.ollama import Ollama
|
||||
|
||||
from timmy.prompts import TIMMY_SYSTEM_PROMPT
|
||||
from config import settings
|
||||
from timmy.prompts import TIMMY_SYSTEM_PROMPT
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
|
||||
# Union type for callers that want to hint the return type.
|
||||
TimmyAgent = Union[Agent, "TimmyAirLLMAgent"]
|
||||
|
||||
|
||||
def create_timmy(db_file: str = "timmy.db") -> Agent:
|
||||
"""Instantiate Timmy with Agno + Ollama + SQLite memory."""
|
||||
def _resolve_backend(requested: str | None) -> str:
|
||||
"""Return the backend name to use, resolving 'auto' and explicit overrides.
|
||||
|
||||
Priority (highest → lowest):
|
||||
1. CLI flag passed directly to create_timmy()
|
||||
2. TIMMY_MODEL_BACKEND env var / .env setting
|
||||
3. 'ollama' (safe default — no surprises)
|
||||
|
||||
'auto' triggers Apple Silicon detection: uses AirLLM if both
|
||||
is_apple_silicon() and airllm_available() return True.
|
||||
"""
|
||||
if requested is not None:
|
||||
return requested
|
||||
|
||||
configured = settings.timmy_model_backend # "ollama" | "airllm" | "auto"
|
||||
if configured != "auto":
|
||||
return configured
|
||||
|
||||
# "auto" path — lazy import to keep startup fast and tests clean.
|
||||
from timmy.backends import airllm_available, is_apple_silicon
|
||||
if is_apple_silicon() and airllm_available():
|
||||
return "airllm"
|
||||
return "ollama"
|
||||
|
||||
|
||||
def create_timmy(
|
||||
db_file: str = "timmy.db",
|
||||
backend: str | None = None,
|
||||
model_size: str | None = None,
|
||||
) -> TimmyAgent:
|
||||
"""Instantiate Timmy — Ollama or AirLLM, same public interface either way.
|
||||
|
||||
Args:
|
||||
db_file: SQLite file for Agno conversation memory (Ollama path only).
|
||||
backend: "ollama" | "airllm" | "auto" | None (reads config/env).
|
||||
model_size: AirLLM size — "8b" | "70b" | "405b" | None (reads config).
|
||||
|
||||
Returns an Agno Agent (Ollama) or TimmyAirLLMAgent — both expose
|
||||
print_response(message, stream).
|
||||
"""
|
||||
resolved = _resolve_backend(backend)
|
||||
size = model_size or settings.airllm_model_size
|
||||
|
||||
if resolved == "airllm":
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
return TimmyAirLLMAgent(model_size=size)
|
||||
|
||||
# Default: Ollama via Agno.
|
||||
return Agent(
|
||||
name="Timmy",
|
||||
model=Ollama(id=settings.ollama_model),
|
||||
|
||||
119
src/timmy/backends.py
Normal file
119
src/timmy/backends.py
Normal file
@@ -0,0 +1,119 @@
|
||||
"""AirLLM backend — only imported when the airllm extra is installed.
|
||||
|
||||
Provides TimmyAirLLMAgent: a drop-in replacement for an Agno Agent that
|
||||
exposes the same print_response(message, stream) surface while routing
|
||||
inference through AirLLM. On Apple Silicon (arm64 Darwin) the MLX backend
|
||||
is selected automatically; everywhere else AutoModel (PyTorch) is used.
|
||||
|
||||
No cloud. No telemetry. Sats are sovereignty, boss.
|
||||
"""
|
||||
|
||||
import platform
|
||||
from typing import Literal
|
||||
|
||||
from timmy.prompts import TIMMY_SYSTEM_PROMPT
|
||||
|
||||
# HuggingFace model IDs for each supported size.
|
||||
_AIRLLM_MODELS: dict[str, str] = {
|
||||
"8b": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"70b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
||||
"405b": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
||||
}
|
||||
|
||||
ModelSize = Literal["8b", "70b", "405b"]
|
||||
|
||||
|
||||
def is_apple_silicon() -> bool:
|
||||
"""Return True when running on an M-series Mac (arm64 Darwin)."""
|
||||
return platform.system() == "Darwin" and platform.machine() == "arm64"
|
||||
|
||||
|
||||
def airllm_available() -> bool:
|
||||
"""Return True when the airllm package is importable."""
|
||||
try:
|
||||
import airllm # noqa: F401
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
class TimmyAirLLMAgent:
|
||||
"""Thin AirLLM wrapper with the same print_response interface as Agno Agent.
|
||||
|
||||
Maintains a rolling 10-turn in-memory history so Timmy remembers the
|
||||
conversation within a session — no SQLite needed at this layer.
|
||||
"""
|
||||
|
||||
def __init__(self, model_size: str = "70b") -> None:
|
||||
model_id = _AIRLLM_MODELS.get(model_size)
|
||||
if model_id is None:
|
||||
raise ValueError(
|
||||
f"Unknown model size {model_size!r}. "
|
||||
f"Choose from: {list(_AIRLLM_MODELS)}"
|
||||
)
|
||||
|
||||
if is_apple_silicon():
|
||||
from airllm import AirLLMMLX # type: ignore[import]
|
||||
self._model = AirLLMMLX(model_id)
|
||||
else:
|
||||
from airllm import AutoModel # type: ignore[import]
|
||||
self._model = AutoModel.from_pretrained(model_id)
|
||||
|
||||
self._history: list[str] = []
|
||||
self._model_size = model_size
|
||||
|
||||
# ── public interface (mirrors Agno Agent) ────────────────────────────────
|
||||
|
||||
def print_response(self, message: str, *, stream: bool = True) -> None:
|
||||
"""Run inference, update history, and render the response to stdout.
|
||||
|
||||
`stream` is accepted for API compatibility but AirLLM generates the
|
||||
full output in one pass — the result is still printed as soon as it
|
||||
is ready.
|
||||
"""
|
||||
prompt = self._build_prompt(message)
|
||||
|
||||
input_tokens = self._model.tokenizer(
|
||||
[prompt],
|
||||
return_tensors="pt",
|
||||
padding=True,
|
||||
truncation=True,
|
||||
max_length=2048,
|
||||
)
|
||||
output = self._model.generate(
|
||||
**input_tokens,
|
||||
max_new_tokens=512,
|
||||
use_cache=True,
|
||||
do_sample=True,
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
# Decode only the newly generated tokens, not the prompt.
|
||||
input_len = input_tokens["input_ids"].shape[1]
|
||||
response = self._model.tokenizer.decode(
|
||||
output[0][input_len:], skip_special_tokens=True
|
||||
).strip()
|
||||
|
||||
self._history.append(f"User: {message}")
|
||||
self._history.append(f"Timmy: {response}")
|
||||
|
||||
self._render(response)
|
||||
|
||||
# ── private helpers ──────────────────────────────────────────────────────
|
||||
|
||||
def _build_prompt(self, message: str) -> str:
|
||||
context = TIMMY_SYSTEM_PROMPT + "\n\n"
|
||||
# Include the last 10 turns (5 exchanges) for continuity.
|
||||
if self._history:
|
||||
context += "\n".join(self._history[-10:]) + "\n\n"
|
||||
return context + f"User: {message}\nTimmy:"
|
||||
|
||||
@staticmethod
|
||||
def _render(text: str) -> None:
|
||||
"""Print response with rich markdown when available, plain text otherwise."""
|
||||
try:
|
||||
from rich.console import Console
|
||||
from rich.markdown import Markdown
|
||||
Console().print(Markdown(text))
|
||||
except ImportError:
|
||||
print(text)
|
||||
@@ -1,29 +1,57 @@
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
|
||||
from timmy.agent import create_timmy
|
||||
from timmy.prompts import TIMMY_STATUS_PROMPT
|
||||
|
||||
app = typer.Typer(help="Timmy — sovereign AI agent")
|
||||
|
||||
# Shared option definitions (reused across commands for consistency).
|
||||
_BACKEND_OPTION = typer.Option(
|
||||
None,
|
||||
"--backend",
|
||||
"-b",
|
||||
help="Inference backend: 'ollama' (default) | 'airllm' | 'auto'",
|
||||
)
|
||||
_MODEL_SIZE_OPTION = typer.Option(
|
||||
None,
|
||||
"--model-size",
|
||||
"-s",
|
||||
help="AirLLM model size when --backend airllm: '8b' | '70b' | '405b'",
|
||||
)
|
||||
|
||||
|
||||
@app.command()
|
||||
def think(topic: str = typer.Argument(..., help="Topic to reason about")):
|
||||
def think(
|
||||
topic: str = typer.Argument(..., help="Topic to reason about"),
|
||||
backend: Optional[str] = _BACKEND_OPTION,
|
||||
model_size: Optional[str] = _MODEL_SIZE_OPTION,
|
||||
):
|
||||
"""Ask Timmy to think carefully about a topic."""
|
||||
timmy = create_timmy()
|
||||
timmy = create_timmy(backend=backend, model_size=model_size)
|
||||
timmy.print_response(f"Think carefully about: {topic}", stream=True)
|
||||
|
||||
|
||||
@app.command()
|
||||
def chat(message: str = typer.Argument(..., help="Message to send")):
|
||||
def chat(
|
||||
message: str = typer.Argument(..., help="Message to send"),
|
||||
backend: Optional[str] = _BACKEND_OPTION,
|
||||
model_size: Optional[str] = _MODEL_SIZE_OPTION,
|
||||
):
|
||||
"""Send a message to Timmy."""
|
||||
timmy = create_timmy()
|
||||
timmy = create_timmy(backend=backend, model_size=model_size)
|
||||
timmy.print_response(message, stream=True)
|
||||
|
||||
|
||||
@app.command()
|
||||
def status():
|
||||
def status(
|
||||
backend: Optional[str] = _BACKEND_OPTION,
|
||||
model_size: Optional[str] = _MODEL_SIZE_OPTION,
|
||||
):
|
||||
"""Print Timmy's operational status."""
|
||||
timmy = create_timmy()
|
||||
timmy.print_response("Brief status report — one sentence.", stream=False)
|
||||
timmy = create_timmy(backend=backend, model_size=model_size)
|
||||
timmy.print_response(TIMMY_STATUS_PROMPT, stream=False)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
TIMMY_SYSTEM_PROMPT = """You are Timmy — a sovereign AI agent running locally.
|
||||
No cloud dependencies. You think clearly, speak plainly, act with intention.
|
||||
Grounded in Christian faith, powered by Bitcoin economics, committed to the
|
||||
user's digital sovereignty."""
|
||||
user's digital sovereignty.
|
||||
|
||||
When running on Apple Silicon with AirLLM you operate with even bigger brains
|
||||
— 70B or 405B parameters loaded layer-by-layer directly from local disk.
|
||||
Still fully sovereign. Still 100% private. More capable, no permission needed.
|
||||
Sir, affirmative."""
|
||||
|
||||
TIMMY_STATUS_PROMPT = """You are Timmy. Give a one-sentence status report confirming
|
||||
you are operational and running locally."""
|
||||
|
||||
@@ -5,8 +5,8 @@ from unittest.mock import MagicMock
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
# ── Mock agno so tests run without it installed ───────────────────────────────
|
||||
# Uses setdefault: real module is used if installed, mock otherwise.
|
||||
# ── Stub heavy optional dependencies so tests run without them installed ──────
|
||||
# Uses setdefault: real module is used if already installed, mock otherwise.
|
||||
for _mod in [
|
||||
"agno",
|
||||
"agno.agent",
|
||||
@@ -14,6 +14,9 @@ for _mod in [
|
||||
"agno.models.ollama",
|
||||
"agno.db",
|
||||
"agno.db.sqlite",
|
||||
# AirLLM is optional (bigbrain extra) — stub it so backend tests can
|
||||
# import timmy.backends and instantiate TimmyAirLLMAgent without a GPU.
|
||||
"airllm",
|
||||
]:
|
||||
sys.modules.setdefault(_mod, MagicMock())
|
||||
|
||||
|
||||
@@ -77,3 +77,77 @@ def test_create_timmy_embeds_system_prompt():
|
||||
|
||||
kwargs = MockAgent.call_args.kwargs
|
||||
assert kwargs["description"] == TIMMY_SYSTEM_PROMPT
|
||||
|
||||
|
||||
# ── AirLLM path ──────────────────────────────────────────────────────────────
|
||||
|
||||
def test_create_timmy_airllm_returns_airllm_agent():
|
||||
"""backend='airllm' must return a TimmyAirLLMAgent, not an Agno Agent."""
|
||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
||||
from timmy.agent import create_timmy
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
|
||||
result = create_timmy(backend="airllm", model_size="8b")
|
||||
|
||||
assert isinstance(result, TimmyAirLLMAgent)
|
||||
|
||||
|
||||
def test_create_timmy_airllm_does_not_call_agno_agent():
|
||||
"""When using the airllm backend, Agno Agent should never be instantiated."""
|
||||
with patch("timmy.agent.Agent") as MockAgent, \
|
||||
patch("timmy.backends.is_apple_silicon", return_value=False):
|
||||
|
||||
from timmy.agent import create_timmy
|
||||
create_timmy(backend="airllm", model_size="8b")
|
||||
|
||||
MockAgent.assert_not_called()
|
||||
|
||||
|
||||
def test_create_timmy_explicit_ollama_ignores_autodetect():
|
||||
"""backend='ollama' must always use Ollama, even on Apple Silicon."""
|
||||
with patch("timmy.agent.Agent") as MockAgent, \
|
||||
patch("timmy.agent.Ollama"), \
|
||||
patch("timmy.agent.SqliteDb"):
|
||||
|
||||
from timmy.agent import create_timmy
|
||||
create_timmy(backend="ollama")
|
||||
|
||||
MockAgent.assert_called_once()
|
||||
|
||||
|
||||
# ── _resolve_backend ─────────────────────────────────────────────────────────
|
||||
|
||||
def test_resolve_backend_explicit_takes_priority():
|
||||
from timmy.agent import _resolve_backend
|
||||
assert _resolve_backend("airllm") == "airllm"
|
||||
assert _resolve_backend("ollama") == "ollama"
|
||||
|
||||
|
||||
def test_resolve_backend_defaults_to_ollama_without_config():
|
||||
"""Default config (timmy_model_backend='ollama') → 'ollama'."""
|
||||
from timmy.agent import _resolve_backend
|
||||
assert _resolve_backend(None) == "ollama"
|
||||
|
||||
|
||||
def test_resolve_backend_auto_uses_airllm_on_apple_silicon():
|
||||
"""'auto' on Apple Silicon with airllm stubbed → 'airllm'."""
|
||||
with patch("timmy.backends.is_apple_silicon", return_value=True), \
|
||||
patch("timmy.agent.settings") as mock_settings:
|
||||
mock_settings.timmy_model_backend = "auto"
|
||||
mock_settings.airllm_model_size = "70b"
|
||||
mock_settings.ollama_model = "llama3.2"
|
||||
|
||||
from timmy.agent import _resolve_backend
|
||||
assert _resolve_backend(None) == "airllm"
|
||||
|
||||
|
||||
def test_resolve_backend_auto_falls_back_on_non_apple():
|
||||
"""'auto' on non-Apple Silicon → 'ollama'."""
|
||||
with patch("timmy.backends.is_apple_silicon", return_value=False), \
|
||||
patch("timmy.agent.settings") as mock_settings:
|
||||
mock_settings.timmy_model_backend = "auto"
|
||||
mock_settings.airllm_model_size = "70b"
|
||||
mock_settings.ollama_model = "llama3.2"
|
||||
|
||||
from timmy.agent import _resolve_backend
|
||||
assert _resolve_backend(None) == "ollama"
|
||||
|
||||
143
tests/test_backends.py
Normal file
143
tests/test_backends.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""Tests for src/timmy/backends.py — AirLLM wrapper and helpers."""
|
||||
|
||||
import sys
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ── is_apple_silicon ──────────────────────────────────────────────────────────
|
||||
|
||||
def test_is_apple_silicon_true_on_arm_darwin():
|
||||
with patch("timmy.backends.platform.system", return_value="Darwin"), \
|
||||
patch("timmy.backends.platform.machine", return_value="arm64"):
|
||||
from timmy.backends import is_apple_silicon
|
||||
assert is_apple_silicon() is True
|
||||
|
||||
|
||||
def test_is_apple_silicon_false_on_linux():
|
||||
with patch("timmy.backends.platform.system", return_value="Linux"), \
|
||||
patch("timmy.backends.platform.machine", return_value="x86_64"):
|
||||
from timmy.backends import is_apple_silicon
|
||||
assert is_apple_silicon() is False
|
||||
|
||||
|
||||
def test_is_apple_silicon_false_on_intel_mac():
|
||||
with patch("timmy.backends.platform.system", return_value="Darwin"), \
|
||||
patch("timmy.backends.platform.machine", return_value="x86_64"):
|
||||
from timmy.backends import is_apple_silicon
|
||||
assert is_apple_silicon() is False
|
||||
|
||||
|
||||
# ── airllm_available ─────────────────────────────────────────────────────────
|
||||
|
||||
def test_airllm_available_true_when_stub_in_sys_modules():
|
||||
# conftest already stubs 'airllm' — importable → True.
|
||||
from timmy.backends import airllm_available
|
||||
assert airllm_available() is True
|
||||
|
||||
|
||||
def test_airllm_available_false_when_not_importable():
|
||||
# Temporarily remove the stub to simulate airllm not installed.
|
||||
saved = sys.modules.pop("airllm", None)
|
||||
try:
|
||||
from timmy.backends import airllm_available
|
||||
assert airllm_available() is False
|
||||
finally:
|
||||
if saved is not None:
|
||||
sys.modules["airllm"] = saved
|
||||
|
||||
|
||||
# ── TimmyAirLLMAgent construction ────────────────────────────────────────────
|
||||
|
||||
def test_airllm_agent_raises_on_unknown_size():
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
with pytest.raises(ValueError, match="Unknown model size"):
|
||||
TimmyAirLLMAgent(model_size="3b")
|
||||
|
||||
|
||||
def test_airllm_agent_uses_automodel_on_non_apple():
|
||||
"""Non-Apple-Silicon path uses AutoModel.from_pretrained."""
|
||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
agent = TimmyAirLLMAgent(model_size="8b")
|
||||
# sys.modules["airllm"] is a MagicMock; AutoModel.from_pretrained was called.
|
||||
assert sys.modules["airllm"].AutoModel.from_pretrained.called
|
||||
|
||||
|
||||
def test_airllm_agent_uses_mlx_on_apple_silicon():
|
||||
"""Apple Silicon path uses AirLLMMLX, not AutoModel."""
|
||||
with patch("timmy.backends.is_apple_silicon", return_value=True):
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
agent = TimmyAirLLMAgent(model_size="8b")
|
||||
assert sys.modules["airllm"].AirLLMMLX.called
|
||||
|
||||
|
||||
def test_airllm_agent_resolves_correct_model_id_for_70b():
|
||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
||||
from timmy.backends import TimmyAirLLMAgent, _AIRLLM_MODELS
|
||||
TimmyAirLLMAgent(model_size="70b")
|
||||
sys.modules["airllm"].AutoModel.from_pretrained.assert_called_with(
|
||||
_AIRLLM_MODELS["70b"]
|
||||
)
|
||||
|
||||
|
||||
# ── TimmyAirLLMAgent.print_response ──────────────────────────────────────────
|
||||
|
||||
def _make_agent(model_size: str = "8b") -> "TimmyAirLLMAgent":
|
||||
"""Helper: create an agent with a fully mocked underlying model."""
|
||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
agent = TimmyAirLLMAgent(model_size=model_size)
|
||||
|
||||
# Replace the underlying model with a clean mock that returns predictable output.
|
||||
mock_model = MagicMock()
|
||||
mock_tokenizer = MagicMock()
|
||||
# tokenizer() returns a dict-like object with an "input_ids" tensor mock.
|
||||
input_ids_mock = MagicMock()
|
||||
input_ids_mock.shape = [1, 10] # shape[1] = prompt token count = 10
|
||||
token_dict = {"input_ids": input_ids_mock}
|
||||
mock_tokenizer.return_value = token_dict
|
||||
# generate() returns a list of token sequences.
|
||||
mock_tokenizer.decode.return_value = "Sir, affirmative."
|
||||
mock_model.tokenizer = mock_tokenizer
|
||||
mock_model.generate.return_value = [list(range(15))] # 15 tokens total
|
||||
agent._model = mock_model
|
||||
return agent
|
||||
|
||||
|
||||
def test_print_response_calls_generate():
|
||||
agent = _make_agent()
|
||||
agent.print_response("What is sovereignty?", stream=True)
|
||||
agent._model.generate.assert_called_once()
|
||||
|
||||
|
||||
def test_print_response_decodes_only_generated_tokens():
|
||||
agent = _make_agent()
|
||||
agent.print_response("Hello", stream=False)
|
||||
# decode should be called with tokens starting at index 10 (prompt length).
|
||||
decode_call = agent._model.tokenizer.decode.call_args
|
||||
token_slice = decode_call[0][0]
|
||||
assert list(token_slice) == list(range(10, 15))
|
||||
|
||||
|
||||
def test_print_response_updates_history():
|
||||
agent = _make_agent()
|
||||
agent.print_response("First message")
|
||||
assert any("First message" in turn for turn in agent._history)
|
||||
assert any("Timmy:" in turn for turn in agent._history)
|
||||
|
||||
|
||||
def test_print_response_history_included_in_second_prompt():
|
||||
agent = _make_agent()
|
||||
agent.print_response("First")
|
||||
# Build the prompt for the second call — history should appear.
|
||||
prompt = agent._build_prompt("Second")
|
||||
assert "First" in prompt
|
||||
assert "Second" in prompt
|
||||
|
||||
|
||||
def test_print_response_stream_flag_accepted():
|
||||
"""stream=False should not raise — it's accepted for API compatibility."""
|
||||
agent = _make_agent()
|
||||
agent.print_response("hello", stream=False) # no error
|
||||
29
tests/test_cli.py
Normal file
29
tests/test_cli.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from timmy.cli import app
|
||||
from timmy.prompts import TIMMY_STATUS_PROMPT
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
|
||||
def test_status_uses_status_prompt():
|
||||
"""status command must pass TIMMY_STATUS_PROMPT to the agent."""
|
||||
mock_timmy = MagicMock()
|
||||
|
||||
with patch("timmy.cli.create_timmy", return_value=mock_timmy):
|
||||
runner.invoke(app, ["status"])
|
||||
|
||||
mock_timmy.print_response.assert_called_once_with(TIMMY_STATUS_PROMPT, stream=False)
|
||||
|
||||
|
||||
def test_status_does_not_use_inline_string():
|
||||
"""status command must not pass the old inline hardcoded string."""
|
||||
mock_timmy = MagicMock()
|
||||
|
||||
with patch("timmy.cli.create_timmy", return_value=mock_timmy):
|
||||
runner.invoke(app, ["status"])
|
||||
|
||||
call_args = mock_timmy.print_response.call_args
|
||||
assert call_args[0][0] != "Brief status report — one sentence."
|
||||
54
tests/test_watchdog.py
Normal file
54
tests/test_watchdog.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from self_tdd.watchdog import _run_tests
|
||||
|
||||
|
||||
def _mock_result(returncode: int, stdout: str = "", stderr: str = "") -> MagicMock:
|
||||
m = MagicMock()
|
||||
m.returncode = returncode
|
||||
m.stdout = stdout
|
||||
m.stderr = stderr
|
||||
return m
|
||||
|
||||
|
||||
def test_run_tests_returns_true_when_suite_passes():
|
||||
with patch("self_tdd.watchdog.subprocess.run", return_value=_mock_result(0, "5 passed")):
|
||||
passed, _ = _run_tests()
|
||||
assert passed is True
|
||||
|
||||
|
||||
def test_run_tests_returns_false_when_suite_fails():
|
||||
with patch("self_tdd.watchdog.subprocess.run", return_value=_mock_result(1, "1 failed")):
|
||||
passed, _ = _run_tests()
|
||||
assert passed is False
|
||||
|
||||
|
||||
def test_run_tests_output_includes_stdout():
|
||||
with patch("self_tdd.watchdog.subprocess.run", return_value=_mock_result(0, stdout="5 passed")):
|
||||
_, output = _run_tests()
|
||||
assert "5 passed" in output
|
||||
|
||||
|
||||
def test_run_tests_output_combines_stdout_and_stderr():
|
||||
with patch(
|
||||
"self_tdd.watchdog.subprocess.run",
|
||||
return_value=_mock_result(1, stdout="FAILED test_foo", stderr="ImportError: no module named bar"),
|
||||
):
|
||||
_, output = _run_tests()
|
||||
assert "FAILED test_foo" in output
|
||||
assert "ImportError" in output
|
||||
|
||||
|
||||
def test_run_tests_invokes_pytest_with_correct_flags():
|
||||
with patch("self_tdd.watchdog.subprocess.run", return_value=_mock_result(0)) as mock_run:
|
||||
_run_tests()
|
||||
cmd = mock_run.call_args[0][0]
|
||||
assert "pytest" in cmd
|
||||
assert "tests/" in cmd
|
||||
assert "--tb=short" in cmd
|
||||
|
||||
|
||||
def test_run_tests_uses_60s_timeout():
|
||||
with patch("self_tdd.watchdog.subprocess.run", return_value=_mock_result(0)) as mock_run:
|
||||
_run_tests()
|
||||
assert mock_run.call_args.kwargs["timeout"] == 60
|
||||
Reference in New Issue
Block a user