Merge pull request #4 from Alexspayne/claude/implement-todo-item-61a2S

This commit is contained in:
Alexander Whitestone
2026-02-21 11:59:10 -05:00
committed by GitHub
16 changed files with 759 additions and 15 deletions

View File

@@ -11,3 +11,13 @@
# Enable FastAPI interactive docs at /docs and /redoc (default: false)
# DEBUG=true
# ── AirLLM / big-brain backend ───────────────────────────────────────────────
# Inference backend: "ollama" (default) | "airllm" | "auto"
# "auto" → uses AirLLM on Apple Silicon if installed, otherwise Ollama.
# Requires: pip install ".[bigbrain]"
# TIMMY_MODEL_BACKEND=ollama
# AirLLM model size (default: 70b).
# 8b ~16 GB RAM | 70b ~140 GB RAM | 405b ~810 GB RAM
# AIRLLM_MODEL_SIZE=70b

View File

@@ -138,6 +138,65 @@ timmy status
---
## Big Brain — AirLLM backend (Apple Silicon / large RAM)
Run 70B or 405B models locally with no GPU required, using AirLLM's
layer-by-layer loading strategy. On M-series Macs the MLX backend is
selected automatically for maximum throughput. Everything stays local.
No cloud. No telemetry. Sats are sovereignty, boss.
### One-line install
```bash
pip install ".[bigbrain]"
# Apple Silicon only — adds the MLX-accelerated backend:
pip install "airllm[mlx]"
```
### Run with the big brain
```bash
# Explicit flag — works anywhere airllm is installed:
timmy chat "Explain self-custody" --backend airllm --model-size 70b
# Or set it once in .env and forget about it:
echo "TIMMY_MODEL_BACKEND=auto" >> .env
echo "AIRLLM_MODEL_SIZE=70b" >> .env
timmy chat "What is sovereignty?"
```
`--backend auto` (or `TIMMY_MODEL_BACKEND=auto`) selects AirLLM automatically
on Apple Silicon when the package is installed, and falls back to Ollama
everywhere else — so the same `.env` works on any machine.
### Model sizes
| Flag | Parameters | Approx. RAM needed |
|------|-----------|-------------------|
| `8b` | 8 billion | ~16 GB |
| `70b` | 70 billion | ~140 GB |
| `405b` | 405 billion | ~810 GB |
Models are downloaded from HuggingFace on first run and cached locally.
You need a HuggingFace account and `huggingface-cli login` for gated models
(Llama 3.1 requires accepting Meta's license at hf.co/meta-llama).
### Architecture with AirLLM
```
timmy chat --backend airllm
TimmyAirLLMAgent (src/timmy/backends.py)
├─ Apple Silicon? ──► AirLLMMLX (MLX tensors, Metal GPU)
└─ Everything else ──► AutoModel (PyTorch, CPU/CUDA)
└─ Layers loaded on-demand from ~/.cache/huggingface/
```
---
## Architecture
```mermaid

74
activate_self_tdd.sh Executable file
View File

@@ -0,0 +1,74 @@
#!/usr/bin/env bash
# activate_self_tdd.sh — Timmy Time dev environment bootstrapper
#
# Usage:
# bash activate_self_tdd.sh # standard (Ollama) setup
# bash activate_self_tdd.sh --big-brain # install AirLLM extra too
#
# What it does:
# 1. Creates a Python venv (or reuses an existing one)
# 2. Installs Timmy Time (+ dev deps, optionally bigbrain)
# 3. Runs the full test suite — aborts if anything fails
# 4. Launches the self-TDD watchdog in the background
# 5. Starts the dashboard
#
# Everything stays local. No cloud. Sats are sovereignty, boss.
set -euo pipefail
REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VENV_DIR="$REPO_DIR/.venv"
BIG_BRAIN=0
for arg in "$@"; do
case $arg in
--big-brain) BIG_BRAIN=1 ;;
*) echo "Unknown argument: $arg"; exit 1 ;;
esac
done
echo "==> Timmy Time — sovereign AI agent bootstrapper"
echo " Working directory: $REPO_DIR"
# ── 1. Virtual environment ────────────────────────────────────────────────────
if [[ ! -d "$VENV_DIR" ]]; then
echo "==> Creating virtual environment..."
python3 -m venv "$VENV_DIR"
fi
# shellcheck disable=SC1091
source "$VENV_DIR/bin/activate"
echo "==> Virtual environment active: $VENV_DIR"
# ── 2. Install dependencies ───────────────────────────────────────────────────
if [[ $BIG_BRAIN -eq 1 ]]; then
echo "==> Installing with bigbrain extra (AirLLM)..."
pip install --quiet -e "$REPO_DIR[dev,bigbrain]"
# On Apple Silicon, also install the MLX backend.
if [[ "$(uname -m)" == "arm64" && "$(uname -s)" == "Darwin" ]]; then
echo "==> Apple Silicon detected — installing AirLLM MLX backend..."
pip install --quiet "airllm[mlx]"
fi
else
echo "==> Installing standard dependencies..."
pip install --quiet -e "$REPO_DIR[dev]"
fi
# ── 3. Run tests ──────────────────────────────────────────────────────────────
echo "==> Running test suite..."
python -m pytest "$REPO_DIR/tests/" -q --tb=short
echo "==> All tests passed."
# ── 4. Self-TDD watchdog (background) ────────────────────────────────────────
echo "==> Starting self-TDD watchdog (60s interval) in background..."
self-tdd watch --interval 60 &
WATCHDOG_PID=$!
echo " Watchdog PID: $WATCHDOG_PID"
echo " Kill with: kill $WATCHDOG_PID"
# ── 5. Dashboard ─────────────────────────────────────────────────────────────
echo ""
echo "==> Starting Timmy Time dashboard at http://localhost:8000"
echo " Ctrl-C stops the dashboard (watchdog continues until you kill it)"
echo ""
uvicorn dashboard.app:app --reload --host 0.0.0.0 --port 8000

View File

@@ -28,13 +28,20 @@ dev = [
"pytest-asyncio>=0.24.0",
"pytest-cov>=5.0.0",
]
# Big-brain: run 8B / 70B / 405B models locally via layer-by-layer loading.
# pip install ".[bigbrain]"
# On Apple Silicon: pip install "airllm[mlx]" for the MLX-accelerated backend.
bigbrain = [
"airllm>=2.9.0",
]
[project.scripts]
timmy = "timmy.cli:main"
self-tdd = "self_tdd.watchdog:main"
[tool.hatch.build.targets.wheel]
sources = {"src" = ""}
include = ["src/timmy", "src/dashboard", "src/config.py"]
include = ["src/timmy", "src/dashboard", "src/config.py", "src/self_tdd"]
[tool.pytest.ini_options]
testpaths = ["tests"]

View File

@@ -1,3 +1,5 @@
from typing import Literal
from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -11,6 +13,18 @@ class Settings(BaseSettings):
# Set DEBUG=true to enable /docs and /redoc (disabled by default)
debug: bool = False
# ── AirLLM / backend selection ───────────────────────────────────────────
# "ollama" — always use Ollama (default, safe everywhere)
# "airllm" — always use AirLLM (requires pip install ".[bigbrain]")
# "auto" — use AirLLM on Apple Silicon if airllm is installed,
# fall back to Ollama otherwise
timmy_model_backend: Literal["ollama", "airllm", "auto"] = "ollama"
# AirLLM model size when backend is airllm or auto.
# Larger = smarter, but needs more RAM / disk.
# 8b ~16 GB | 70b ~140 GB | 405b ~810 GB
airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",

0
src/self_tdd/__init__.py Normal file
View File

71
src/self_tdd/watchdog.py Normal file
View File

@@ -0,0 +1,71 @@
"""Self-TDD Watchdog — polls pytest on a schedule and reports regressions.
Run in a terminal alongside your normal dev work:
self-tdd watch
self-tdd watch --interval 30
The watchdog runs silently while tests pass. When a regression appears it
prints the full short-traceback output so you can see exactly what broke.
No files are modified; no commits are made. Ctrl-C to stop.
"""
import subprocess
import sys
import time
from datetime import datetime
from pathlib import Path
import typer
# Project root is three levels up from src/self_tdd/watchdog.py
PROJECT_ROOT = Path(__file__).parent.parent.parent
app = typer.Typer(help="Self-TDD watchdog — continuous test runner")
def _run_tests() -> tuple[bool, str]:
"""Run the test suite and return (passed, combined_output)."""
result = subprocess.run(
[sys.executable, "-m", "pytest", "tests/", "-q", "--tb=short"],
capture_output=True,
text=True,
cwd=PROJECT_ROOT,
timeout=60,
)
return result.returncode == 0, (result.stdout + result.stderr).strip()
@app.command()
def watch(
interval: int = typer.Option(60, "--interval", "-i", help="Seconds between test runs"),
) -> None:
"""Poll pytest continuously and print regressions as they appear."""
typer.echo(f"Self-TDD watchdog started — polling every {interval}s. Ctrl-C to stop.")
last_passed: bool | None = None
try:
while True:
passed, output = _run_tests()
stamp = datetime.now().strftime("%H:%M:%S")
if passed:
if last_passed is not True:
typer.secho(f"[{stamp}] All tests passing.", fg=typer.colors.GREEN)
else:
typer.secho(f"[{stamp}] Regression detected:", fg=typer.colors.RED)
typer.echo(output)
last_passed = passed
time.sleep(interval)
except KeyboardInterrupt:
typer.echo("\nWatchdog stopped.")
def main() -> None:
app()
if __name__ == "__main__":
main()

View File

@@ -1,13 +1,67 @@
from typing import TYPE_CHECKING, Union
from agno.agent import Agent
from agno.models.ollama import Ollama
from agno.db.sqlite import SqliteDb
from agno.models.ollama import Ollama
from timmy.prompts import TIMMY_SYSTEM_PROMPT
from config import settings
from timmy.prompts import TIMMY_SYSTEM_PROMPT
if TYPE_CHECKING:
from timmy.backends import TimmyAirLLMAgent
# Union type for callers that want to hint the return type.
TimmyAgent = Union[Agent, "TimmyAirLLMAgent"]
def create_timmy(db_file: str = "timmy.db") -> Agent:
"""Instantiate Timmy with Agno + Ollama + SQLite memory."""
def _resolve_backend(requested: str | None) -> str:
"""Return the backend name to use, resolving 'auto' and explicit overrides.
Priority (highest → lowest):
1. CLI flag passed directly to create_timmy()
2. TIMMY_MODEL_BACKEND env var / .env setting
3. 'ollama' (safe default — no surprises)
'auto' triggers Apple Silicon detection: uses AirLLM if both
is_apple_silicon() and airllm_available() return True.
"""
if requested is not None:
return requested
configured = settings.timmy_model_backend # "ollama" | "airllm" | "auto"
if configured != "auto":
return configured
# "auto" path — lazy import to keep startup fast and tests clean.
from timmy.backends import airllm_available, is_apple_silicon
if is_apple_silicon() and airllm_available():
return "airllm"
return "ollama"
def create_timmy(
db_file: str = "timmy.db",
backend: str | None = None,
model_size: str | None = None,
) -> TimmyAgent:
"""Instantiate Timmy — Ollama or AirLLM, same public interface either way.
Args:
db_file: SQLite file for Agno conversation memory (Ollama path only).
backend: "ollama" | "airllm" | "auto" | None (reads config/env).
model_size: AirLLM size — "8b" | "70b" | "405b" | None (reads config).
Returns an Agno Agent (Ollama) or TimmyAirLLMAgent — both expose
print_response(message, stream).
"""
resolved = _resolve_backend(backend)
size = model_size or settings.airllm_model_size
if resolved == "airllm":
from timmy.backends import TimmyAirLLMAgent
return TimmyAirLLMAgent(model_size=size)
# Default: Ollama via Agno.
return Agent(
name="Timmy",
model=Ollama(id=settings.ollama_model),

119
src/timmy/backends.py Normal file
View File

@@ -0,0 +1,119 @@
"""AirLLM backend — only imported when the airllm extra is installed.
Provides TimmyAirLLMAgent: a drop-in replacement for an Agno Agent that
exposes the same print_response(message, stream) surface while routing
inference through AirLLM. On Apple Silicon (arm64 Darwin) the MLX backend
is selected automatically; everywhere else AutoModel (PyTorch) is used.
No cloud. No telemetry. Sats are sovereignty, boss.
"""
import platform
from typing import Literal
from timmy.prompts import TIMMY_SYSTEM_PROMPT
# HuggingFace model IDs for each supported size.
_AIRLLM_MODELS: dict[str, str] = {
"8b": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"70b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"405b": "meta-llama/Meta-Llama-3.1-405B-Instruct",
}
ModelSize = Literal["8b", "70b", "405b"]
def is_apple_silicon() -> bool:
"""Return True when running on an M-series Mac (arm64 Darwin)."""
return platform.system() == "Darwin" and platform.machine() == "arm64"
def airllm_available() -> bool:
"""Return True when the airllm package is importable."""
try:
import airllm # noqa: F401
return True
except ImportError:
return False
class TimmyAirLLMAgent:
"""Thin AirLLM wrapper with the same print_response interface as Agno Agent.
Maintains a rolling 10-turn in-memory history so Timmy remembers the
conversation within a session — no SQLite needed at this layer.
"""
def __init__(self, model_size: str = "70b") -> None:
model_id = _AIRLLM_MODELS.get(model_size)
if model_id is None:
raise ValueError(
f"Unknown model size {model_size!r}. "
f"Choose from: {list(_AIRLLM_MODELS)}"
)
if is_apple_silicon():
from airllm import AirLLMMLX # type: ignore[import]
self._model = AirLLMMLX(model_id)
else:
from airllm import AutoModel # type: ignore[import]
self._model = AutoModel.from_pretrained(model_id)
self._history: list[str] = []
self._model_size = model_size
# ── public interface (mirrors Agno Agent) ────────────────────────────────
def print_response(self, message: str, *, stream: bool = True) -> None:
"""Run inference, update history, and render the response to stdout.
`stream` is accepted for API compatibility but AirLLM generates the
full output in one pass — the result is still printed as soon as it
is ready.
"""
prompt = self._build_prompt(message)
input_tokens = self._model.tokenizer(
[prompt],
return_tensors="pt",
padding=True,
truncation=True,
max_length=2048,
)
output = self._model.generate(
**input_tokens,
max_new_tokens=512,
use_cache=True,
do_sample=True,
temperature=0.7,
)
# Decode only the newly generated tokens, not the prompt.
input_len = input_tokens["input_ids"].shape[1]
response = self._model.tokenizer.decode(
output[0][input_len:], skip_special_tokens=True
).strip()
self._history.append(f"User: {message}")
self._history.append(f"Timmy: {response}")
self._render(response)
# ── private helpers ──────────────────────────────────────────────────────
def _build_prompt(self, message: str) -> str:
context = TIMMY_SYSTEM_PROMPT + "\n\n"
# Include the last 10 turns (5 exchanges) for continuity.
if self._history:
context += "\n".join(self._history[-10:]) + "\n\n"
return context + f"User: {message}\nTimmy:"
@staticmethod
def _render(text: str) -> None:
"""Print response with rich markdown when available, plain text otherwise."""
try:
from rich.console import Console
from rich.markdown import Markdown
Console().print(Markdown(text))
except ImportError:
print(text)

View File

@@ -1,29 +1,57 @@
from typing import Optional
import typer
from timmy.agent import create_timmy
from timmy.prompts import TIMMY_STATUS_PROMPT
app = typer.Typer(help="Timmy — sovereign AI agent")
# Shared option definitions (reused across commands for consistency).
_BACKEND_OPTION = typer.Option(
None,
"--backend",
"-b",
help="Inference backend: 'ollama' (default) | 'airllm' | 'auto'",
)
_MODEL_SIZE_OPTION = typer.Option(
None,
"--model-size",
"-s",
help="AirLLM model size when --backend airllm: '8b' | '70b' | '405b'",
)
@app.command()
def think(topic: str = typer.Argument(..., help="Topic to reason about")):
def think(
topic: str = typer.Argument(..., help="Topic to reason about"),
backend: Optional[str] = _BACKEND_OPTION,
model_size: Optional[str] = _MODEL_SIZE_OPTION,
):
"""Ask Timmy to think carefully about a topic."""
timmy = create_timmy()
timmy = create_timmy(backend=backend, model_size=model_size)
timmy.print_response(f"Think carefully about: {topic}", stream=True)
@app.command()
def chat(message: str = typer.Argument(..., help="Message to send")):
def chat(
message: str = typer.Argument(..., help="Message to send"),
backend: Optional[str] = _BACKEND_OPTION,
model_size: Optional[str] = _MODEL_SIZE_OPTION,
):
"""Send a message to Timmy."""
timmy = create_timmy()
timmy = create_timmy(backend=backend, model_size=model_size)
timmy.print_response(message, stream=True)
@app.command()
def status():
def status(
backend: Optional[str] = _BACKEND_OPTION,
model_size: Optional[str] = _MODEL_SIZE_OPTION,
):
"""Print Timmy's operational status."""
timmy = create_timmy()
timmy.print_response("Brief status report — one sentence.", stream=False)
timmy = create_timmy(backend=backend, model_size=model_size)
timmy.print_response(TIMMY_STATUS_PROMPT, stream=False)
def main():

View File

@@ -1,7 +1,12 @@
TIMMY_SYSTEM_PROMPT = """You are Timmy — a sovereign AI agent running locally.
No cloud dependencies. You think clearly, speak plainly, act with intention.
Grounded in Christian faith, powered by Bitcoin economics, committed to the
user's digital sovereignty."""
user's digital sovereignty.
When running on Apple Silicon with AirLLM you operate with even bigger brains
— 70B or 405B parameters loaded layer-by-layer directly from local disk.
Still fully sovereign. Still 100% private. More capable, no permission needed.
Sir, affirmative."""
TIMMY_STATUS_PROMPT = """You are Timmy. Give a one-sentence status report confirming
you are operational and running locally."""

View File

@@ -5,8 +5,8 @@ from unittest.mock import MagicMock
import pytest
from fastapi.testclient import TestClient
# ── Mock agno so tests run without it installed ───────────────────────────────
# Uses setdefault: real module is used if installed, mock otherwise.
# ── Stub heavy optional dependencies so tests run without them installed ──────
# Uses setdefault: real module is used if already installed, mock otherwise.
for _mod in [
"agno",
"agno.agent",
@@ -14,6 +14,9 @@ for _mod in [
"agno.models.ollama",
"agno.db",
"agno.db.sqlite",
# AirLLM is optional (bigbrain extra) — stub it so backend tests can
# import timmy.backends and instantiate TimmyAirLLMAgent without a GPU.
"airllm",
]:
sys.modules.setdefault(_mod, MagicMock())

View File

@@ -77,3 +77,77 @@ def test_create_timmy_embeds_system_prompt():
kwargs = MockAgent.call_args.kwargs
assert kwargs["description"] == TIMMY_SYSTEM_PROMPT
# ── AirLLM path ──────────────────────────────────────────────────────────────
def test_create_timmy_airllm_returns_airllm_agent():
"""backend='airllm' must return a TimmyAirLLMAgent, not an Agno Agent."""
with patch("timmy.backends.is_apple_silicon", return_value=False):
from timmy.agent import create_timmy
from timmy.backends import TimmyAirLLMAgent
result = create_timmy(backend="airllm", model_size="8b")
assert isinstance(result, TimmyAirLLMAgent)
def test_create_timmy_airllm_does_not_call_agno_agent():
"""When using the airllm backend, Agno Agent should never be instantiated."""
with patch("timmy.agent.Agent") as MockAgent, \
patch("timmy.backends.is_apple_silicon", return_value=False):
from timmy.agent import create_timmy
create_timmy(backend="airllm", model_size="8b")
MockAgent.assert_not_called()
def test_create_timmy_explicit_ollama_ignores_autodetect():
"""backend='ollama' must always use Ollama, even on Apple Silicon."""
with patch("timmy.agent.Agent") as MockAgent, \
patch("timmy.agent.Ollama"), \
patch("timmy.agent.SqliteDb"):
from timmy.agent import create_timmy
create_timmy(backend="ollama")
MockAgent.assert_called_once()
# ── _resolve_backend ─────────────────────────────────────────────────────────
def test_resolve_backend_explicit_takes_priority():
from timmy.agent import _resolve_backend
assert _resolve_backend("airllm") == "airllm"
assert _resolve_backend("ollama") == "ollama"
def test_resolve_backend_defaults_to_ollama_without_config():
"""Default config (timmy_model_backend='ollama') → 'ollama'."""
from timmy.agent import _resolve_backend
assert _resolve_backend(None) == "ollama"
def test_resolve_backend_auto_uses_airllm_on_apple_silicon():
"""'auto' on Apple Silicon with airllm stubbed → 'airllm'."""
with patch("timmy.backends.is_apple_silicon", return_value=True), \
patch("timmy.agent.settings") as mock_settings:
mock_settings.timmy_model_backend = "auto"
mock_settings.airllm_model_size = "70b"
mock_settings.ollama_model = "llama3.2"
from timmy.agent import _resolve_backend
assert _resolve_backend(None) == "airllm"
def test_resolve_backend_auto_falls_back_on_non_apple():
"""'auto' on non-Apple Silicon → 'ollama'."""
with patch("timmy.backends.is_apple_silicon", return_value=False), \
patch("timmy.agent.settings") as mock_settings:
mock_settings.timmy_model_backend = "auto"
mock_settings.airllm_model_size = "70b"
mock_settings.ollama_model = "llama3.2"
from timmy.agent import _resolve_backend
assert _resolve_backend(None) == "ollama"

143
tests/test_backends.py Normal file
View File

@@ -0,0 +1,143 @@
"""Tests for src/timmy/backends.py — AirLLM wrapper and helpers."""
import sys
from unittest.mock import MagicMock, patch
import pytest
# ── is_apple_silicon ──────────────────────────────────────────────────────────
def test_is_apple_silicon_true_on_arm_darwin():
with patch("timmy.backends.platform.system", return_value="Darwin"), \
patch("timmy.backends.platform.machine", return_value="arm64"):
from timmy.backends import is_apple_silicon
assert is_apple_silicon() is True
def test_is_apple_silicon_false_on_linux():
with patch("timmy.backends.platform.system", return_value="Linux"), \
patch("timmy.backends.platform.machine", return_value="x86_64"):
from timmy.backends import is_apple_silicon
assert is_apple_silicon() is False
def test_is_apple_silicon_false_on_intel_mac():
with patch("timmy.backends.platform.system", return_value="Darwin"), \
patch("timmy.backends.platform.machine", return_value="x86_64"):
from timmy.backends import is_apple_silicon
assert is_apple_silicon() is False
# ── airllm_available ─────────────────────────────────────────────────────────
def test_airllm_available_true_when_stub_in_sys_modules():
# conftest already stubs 'airllm' — importable → True.
from timmy.backends import airllm_available
assert airllm_available() is True
def test_airllm_available_false_when_not_importable():
# Temporarily remove the stub to simulate airllm not installed.
saved = sys.modules.pop("airllm", None)
try:
from timmy.backends import airllm_available
assert airllm_available() is False
finally:
if saved is not None:
sys.modules["airllm"] = saved
# ── TimmyAirLLMAgent construction ────────────────────────────────────────────
def test_airllm_agent_raises_on_unknown_size():
from timmy.backends import TimmyAirLLMAgent
with pytest.raises(ValueError, match="Unknown model size"):
TimmyAirLLMAgent(model_size="3b")
def test_airllm_agent_uses_automodel_on_non_apple():
"""Non-Apple-Silicon path uses AutoModel.from_pretrained."""
with patch("timmy.backends.is_apple_silicon", return_value=False):
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size="8b")
# sys.modules["airllm"] is a MagicMock; AutoModel.from_pretrained was called.
assert sys.modules["airllm"].AutoModel.from_pretrained.called
def test_airllm_agent_uses_mlx_on_apple_silicon():
"""Apple Silicon path uses AirLLMMLX, not AutoModel."""
with patch("timmy.backends.is_apple_silicon", return_value=True):
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size="8b")
assert sys.modules["airllm"].AirLLMMLX.called
def test_airllm_agent_resolves_correct_model_id_for_70b():
with patch("timmy.backends.is_apple_silicon", return_value=False):
from timmy.backends import TimmyAirLLMAgent, _AIRLLM_MODELS
TimmyAirLLMAgent(model_size="70b")
sys.modules["airllm"].AutoModel.from_pretrained.assert_called_with(
_AIRLLM_MODELS["70b"]
)
# ── TimmyAirLLMAgent.print_response ──────────────────────────────────────────
def _make_agent(model_size: str = "8b") -> "TimmyAirLLMAgent":
"""Helper: create an agent with a fully mocked underlying model."""
with patch("timmy.backends.is_apple_silicon", return_value=False):
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size=model_size)
# Replace the underlying model with a clean mock that returns predictable output.
mock_model = MagicMock()
mock_tokenizer = MagicMock()
# tokenizer() returns a dict-like object with an "input_ids" tensor mock.
input_ids_mock = MagicMock()
input_ids_mock.shape = [1, 10] # shape[1] = prompt token count = 10
token_dict = {"input_ids": input_ids_mock}
mock_tokenizer.return_value = token_dict
# generate() returns a list of token sequences.
mock_tokenizer.decode.return_value = "Sir, affirmative."
mock_model.tokenizer = mock_tokenizer
mock_model.generate.return_value = [list(range(15))] # 15 tokens total
agent._model = mock_model
return agent
def test_print_response_calls_generate():
agent = _make_agent()
agent.print_response("What is sovereignty?", stream=True)
agent._model.generate.assert_called_once()
def test_print_response_decodes_only_generated_tokens():
agent = _make_agent()
agent.print_response("Hello", stream=False)
# decode should be called with tokens starting at index 10 (prompt length).
decode_call = agent._model.tokenizer.decode.call_args
token_slice = decode_call[0][0]
assert list(token_slice) == list(range(10, 15))
def test_print_response_updates_history():
agent = _make_agent()
agent.print_response("First message")
assert any("First message" in turn for turn in agent._history)
assert any("Timmy:" in turn for turn in agent._history)
def test_print_response_history_included_in_second_prompt():
agent = _make_agent()
agent.print_response("First")
# Build the prompt for the second call — history should appear.
prompt = agent._build_prompt("Second")
assert "First" in prompt
assert "Second" in prompt
def test_print_response_stream_flag_accepted():
"""stream=False should not raise — it's accepted for API compatibility."""
agent = _make_agent()
agent.print_response("hello", stream=False) # no error

29
tests/test_cli.py Normal file
View File

@@ -0,0 +1,29 @@
from unittest.mock import MagicMock, patch
from typer.testing import CliRunner
from timmy.cli import app
from timmy.prompts import TIMMY_STATUS_PROMPT
runner = CliRunner()
def test_status_uses_status_prompt():
"""status command must pass TIMMY_STATUS_PROMPT to the agent."""
mock_timmy = MagicMock()
with patch("timmy.cli.create_timmy", return_value=mock_timmy):
runner.invoke(app, ["status"])
mock_timmy.print_response.assert_called_once_with(TIMMY_STATUS_PROMPT, stream=False)
def test_status_does_not_use_inline_string():
"""status command must not pass the old inline hardcoded string."""
mock_timmy = MagicMock()
with patch("timmy.cli.create_timmy", return_value=mock_timmy):
runner.invoke(app, ["status"])
call_args = mock_timmy.print_response.call_args
assert call_args[0][0] != "Brief status report — one sentence."

54
tests/test_watchdog.py Normal file
View File

@@ -0,0 +1,54 @@
from unittest.mock import MagicMock, patch
from self_tdd.watchdog import _run_tests
def _mock_result(returncode: int, stdout: str = "", stderr: str = "") -> MagicMock:
m = MagicMock()
m.returncode = returncode
m.stdout = stdout
m.stderr = stderr
return m
def test_run_tests_returns_true_when_suite_passes():
with patch("self_tdd.watchdog.subprocess.run", return_value=_mock_result(0, "5 passed")):
passed, _ = _run_tests()
assert passed is True
def test_run_tests_returns_false_when_suite_fails():
with patch("self_tdd.watchdog.subprocess.run", return_value=_mock_result(1, "1 failed")):
passed, _ = _run_tests()
assert passed is False
def test_run_tests_output_includes_stdout():
with patch("self_tdd.watchdog.subprocess.run", return_value=_mock_result(0, stdout="5 passed")):
_, output = _run_tests()
assert "5 passed" in output
def test_run_tests_output_combines_stdout_and_stderr():
with patch(
"self_tdd.watchdog.subprocess.run",
return_value=_mock_result(1, stdout="FAILED test_foo", stderr="ImportError: no module named bar"),
):
_, output = _run_tests()
assert "FAILED test_foo" in output
assert "ImportError" in output
def test_run_tests_invokes_pytest_with_correct_flags():
with patch("self_tdd.watchdog.subprocess.run", return_value=_mock_result(0)) as mock_run:
_run_tests()
cmd = mock_run.call_args[0][0]
assert "pytest" in cmd
assert "tests/" in cmd
assert "--tb=short" in cmd
def test_run_tests_uses_60s_timeout():
with patch("self_tdd.watchdog.subprocess.run", return_value=_mock_result(0)) as mock_run:
_run_tests()
assert mock_run.call_args.kwargs["timeout"] == 60