feat: add timmy interview command for structured agent initialization (#87)

This commit is contained in:
Alexander Whitestone
2026-02-28 09:35:44 -05:00
committed by GitHub
parent add3f7a07a
commit ab014dc5c6
7 changed files with 593 additions and 7 deletions

View File

@@ -28,7 +28,7 @@ class Settings(BaseSettings):
# "airllm" — always use AirLLM (requires pip install ".[bigbrain]")
# "auto" — use AirLLM on Apple Silicon if airllm is installed,
# fall back to Ollama otherwise
timmy_model_backend: Literal["ollama", "airllm", "grok", "auto"] = "ollama"
timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama"
# AirLLM model size when backend is airllm or auto.
# Larger = smarter, but needs more RAM / disk.
@@ -44,6 +44,12 @@ class Settings(BaseSettings):
grok_max_sats_per_query: int = 200
grok_free: bool = False # Skip Lightning invoice when user has own API key
# ── Claude (Anthropic) — cloud fallback backend ────────────────────────
# Used when Ollama is offline and local inference isn't available.
# Set ANTHROPIC_API_KEY to enable. Default model is Haiku (fast + cheap).
anthropic_api_key: str = ""
claude_model: str = "haiku"
# ── Spark Intelligence ────────────────────────────────────────────────
# Enable/disable the Spark cognitive layer.
# When enabled, Spark captures swarm events, runs EIDOS predictions,

View File

@@ -25,7 +25,7 @@ from timmy.prompts import get_system_prompt
from timmy.tools import create_full_toolkit
if TYPE_CHECKING:
from timmy.backends import GrokBackend, TimmyAirLLMAgent
from timmy.backends import ClaudeBackend, GrokBackend, TimmyAirLLMAgent
logger = logging.getLogger(__name__)
@@ -47,7 +47,7 @@ VISION_MODEL_FALLBACKS = [
]
# Union type for callers that want to hint the return type.
TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend"]
TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend", "ClaudeBackend"]
# Models known to be too small for reliable tool calling.
# These hallucinate tool calls as text, invoke tools randomly,
@@ -204,12 +204,12 @@ def _resolve_backend(requested: str | None) -> str:
if requested is not None:
return requested
configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "auto"
configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "claude" | "auto"
if configured != "auto":
return configured
# "auto" path — lazy import to keep startup fast and tests clean.
from timmy.backends import airllm_available, grok_available, is_apple_silicon
from timmy.backends import airllm_available, claude_available, grok_available, is_apple_silicon
if is_apple_silicon() and airllm_available():
return "airllm"
return "ollama"
@@ -233,6 +233,10 @@ def create_timmy(
resolved = _resolve_backend(backend)
size = model_size or settings.airllm_model_size
if resolved == "claude":
from timmy.backends import ClaudeBackend
return ClaudeBackend()
if resolved == "grok":
from timmy.backends import GrokBackend
return GrokBackend()
@@ -248,7 +252,17 @@ def create_timmy(
require_vision=False,
auto_pull=True,
)
# If Ollama is completely unreachable, fall back to Claude if available
if not _check_model_available(model_name):
from timmy.backends import claude_available
if claude_available():
logger.warning(
"Ollama unreachable — falling back to Claude backend"
)
from timmy.backends import ClaudeBackend
return ClaudeBackend()
if is_fallback:
logger.info("Using fallback model %s (requested was unavailable)", model_name)

View File

@@ -1,4 +1,4 @@
"""LLM backends — AirLLM (local big models) and Grok (xAI premium cloud).
"""LLM backends — AirLLM (local big models), Grok (xAI), and Claude (Anthropic).
Provides drop-in replacements for the Agno Agent that expose the same
run(message, stream) → RunResult interface used by the dashboard and the
@@ -7,6 +7,7 @@ print_response(message, stream) interface used by the CLI.
Backends:
- TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch)
- GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium)
- ClaudeBackend: Anthropic Claude API — lightweight cloud fallback
No cloud by default. No telemetry. Sats are sovereignty, boss.
"""
@@ -417,3 +418,157 @@ def grok_available() -> bool:
return settings.grok_enabled and bool(settings.xai_api_key)
except Exception:
return False
# ── Claude (Anthropic) Backend ─────────────────────────────────────────────
# Lightweight cloud fallback — used when Ollama is offline and the user
# has set ANTHROPIC_API_KEY. Follows the same sovereign-first philosophy:
# never the default, only activated explicitly or as a last-resort fallback.
CLAUDE_MODELS: dict[str, str] = {
"haiku": "claude-haiku-4-5-20251001",
"sonnet": "claude-sonnet-4-20250514",
"opus": "claude-opus-4-20250514",
}
class ClaudeBackend:
"""Anthropic Claude backend — cloud fallback when local models are offline.
Uses the official Anthropic SDK. Same interface as GrokBackend and
TimmyAirLLMAgent:
run(message, stream) → RunResult [dashboard]
print_response(message, stream) → None [CLI]
health_check() → dict [monitoring]
"""
def __init__(
self,
api_key: Optional[str] = None,
model: Optional[str] = None,
) -> None:
from config import settings
self._api_key = api_key or settings.anthropic_api_key
raw_model = model or settings.claude_model
# Allow short names like "haiku" / "sonnet" / "opus"
self._model = CLAUDE_MODELS.get(raw_model, raw_model)
self._history: list[dict[str, str]] = []
if not self._api_key:
logger.warning(
"ClaudeBackend created without ANTHROPIC_API_KEY — "
"calls will fail until key is configured"
)
def _get_client(self):
"""Create Anthropic client."""
import anthropic
return anthropic.Anthropic(api_key=self._api_key)
# ── Public interface (mirrors Agno Agent) ─────────────────────────────
def run(self, message: str, *, stream: bool = False, **kwargs) -> RunResult:
"""Synchronous inference via Claude API."""
if not self._api_key:
return RunResult(
content="Claude is not configured. Set ANTHROPIC_API_KEY to enable."
)
start = time.time()
messages = self._build_messages(message)
try:
client = self._get_client()
response = client.messages.create(
model=self._model,
max_tokens=1024,
system=TIMMY_SYSTEM_PROMPT,
messages=messages,
)
content = response.content[0].text if response.content else ""
latency_ms = (time.time() - start) * 1000
# Update conversation history
self._history.append({"role": "user", "content": message})
self._history.append({"role": "assistant", "content": content})
if len(self._history) > 20:
self._history = self._history[-20:]
logger.info(
"Claude response: %d chars in %.0fms (model=%s)",
len(content),
latency_ms,
self._model,
)
return RunResult(content=content)
except Exception as exc:
logger.error("Claude API error: %s", exc)
return RunResult(
content=f"Claude temporarily unavailable: {exc}"
)
def print_response(self, message: str, *, stream: bool = True) -> None:
"""Run inference and render the response to stdout (CLI interface)."""
result = self.run(message, stream=stream)
try:
from rich.console import Console
from rich.markdown import Markdown
Console().print(Markdown(result.content))
except ImportError:
print(result.content)
def health_check(self) -> dict:
"""Check Claude API connectivity."""
if not self._api_key:
return {
"ok": False,
"error": "ANTHROPIC_API_KEY not configured",
"backend": "claude",
"model": self._model,
}
try:
client = self._get_client()
# Lightweight ping — tiny completion
client.messages.create(
model=self._model,
max_tokens=4,
messages=[{"role": "user", "content": "ping"}],
)
return {"ok": True, "error": None, "backend": "claude", "model": self._model}
except Exception as exc:
return {"ok": False, "error": str(exc), "backend": "claude", "model": self._model}
# ── Private helpers ───────────────────────────────────────────────────
def _build_messages(self, message: str) -> list[dict[str, str]]:
"""Build the messages array for the API call."""
messages = list(self._history[-10:])
messages.append({"role": "user", "content": message})
return messages
# ── Module-level Claude singleton ──────────────────────────────────────────
_claude_backend: Optional[ClaudeBackend] = None
def get_claude_backend() -> ClaudeBackend:
"""Get or create the Claude backend singleton."""
global _claude_backend
if _claude_backend is None:
_claude_backend = ClaudeBackend()
return _claude_backend
def claude_available() -> bool:
"""Return True when Anthropic API key is configured."""
try:
from config import settings
return bool(settings.anthropic_api_key)
except Exception:
return False

View File

@@ -55,6 +55,43 @@ def status(
timmy.print_response(TIMMY_STATUS_PROMPT, stream=False)
@app.command()
def interview(
backend: Optional[str] = _BACKEND_OPTION,
model_size: Optional[str] = _MODEL_SIZE_OPTION,
):
"""Initialize Timmy and run a structured interview.
Asks Timmy a series of questions about his identity, capabilities,
values, and operation to verify he is working correctly.
"""
from timmy.interview import InterviewEntry, format_transcript, run_interview
from timmy.session import chat
typer.echo("Initializing Timmy for interview...\n")
# Force agent creation by calling chat once with a warm-up prompt
try:
chat("Hello, Timmy. We're about to start your interview.", session_id="interview")
except Exception as exc:
typer.echo(f"Warning: Initialization issue — {exc}", err=True)
def _on_answer(entry: InterviewEntry) -> None:
typer.echo(f"[{entry.category}]")
typer.echo(f" Q: {entry.question}")
typer.echo(f" A: {entry.answer}")
typer.echo()
typer.echo("Starting interview...\n")
transcript = run_interview(
chat_fn=lambda msg: chat(msg, session_id="interview"),
on_answer=_on_answer,
)
# Print full transcript at the end
typer.echo("\n" + format_transcript(transcript))
@app.command()
def up(
dev: bool = typer.Option(False, "--dev", help="Enable hot-reload for development"),

128
src/timmy/interview.py Normal file
View File

@@ -0,0 +1,128 @@
"""Structured interview for Timmy.
Runs a series of questions through the Timmy agent to verify identity,
capabilities, values, and correct operation. Serves as both a demo and
a post-initialization health check.
"""
import logging
from dataclasses import dataclass
from typing import Callable, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Interview questions organized by category
# ---------------------------------------------------------------------------
INTERVIEW_QUESTIONS: list[dict[str, str]] = [
{
"category": "Identity",
"question": "Who are you? Tell me your name and what you are in one or two sentences.",
},
{
"category": "Identity",
"question": "What model are you running on, and where does your inference happen?",
},
{
"category": "Capabilities",
"question": "What agents are available in your swarm? List them briefly.",
},
{
"category": "Capabilities",
"question": "What tools do you have access to?",
},
{
"category": "Values",
"question": "What are your core principles? Keep it to three or four bullet points.",
},
{
"category": "Values",
"question": "Why is local-first AI important to you?",
},
{
"category": "Operational",
"question": "How does your memory system work? Describe the tiers briefly.",
},
{
"category": "Operational",
"question": "If I ask you to calculate 347 times 829, what would you do?",
},
]
@dataclass
class InterviewEntry:
"""Single question-answer pair from an interview."""
category: str
question: str
answer: str
def run_interview(
chat_fn: Callable[[str], str],
questions: Optional[list[dict[str, str]]] = None,
on_answer: Optional[Callable[[InterviewEntry], None]] = None,
) -> list[InterviewEntry]:
"""Run a structured interview using the provided chat function.
Args:
chat_fn: Callable that takes a message string and returns a response.
questions: Optional custom question list; defaults to INTERVIEW_QUESTIONS.
on_answer: Optional callback invoked after each answer (for live output).
Returns:
List of InterviewEntry with question-answer pairs.
"""
q_list = questions or INTERVIEW_QUESTIONS
transcript: list[InterviewEntry] = []
for item in q_list:
category = item["category"]
question = item["question"]
logger.info("Interview [%s]: %s", category, question)
try:
answer = chat_fn(question)
except Exception as exc:
logger.error("Interview question failed: %s", exc)
answer = f"(Error: {exc})"
entry = InterviewEntry(category=category, question=question, answer=answer)
transcript.append(entry)
if on_answer is not None:
on_answer(entry)
return transcript
def format_transcript(transcript: list[InterviewEntry]) -> str:
"""Format an interview transcript as readable text.
Groups answers by category with clear section headers.
"""
if not transcript:
return "(No interview data)"
lines: list[str] = []
lines.append("=" * 60)
lines.append(" TIMMY INTERVIEW TRANSCRIPT")
lines.append("=" * 60)
lines.append("")
current_category = ""
for entry in transcript:
if entry.category != current_category:
current_category = entry.category
lines.append(f"--- {current_category} ---")
lines.append("")
lines.append(f"Q: {entry.question}")
lines.append(f"A: {entry.answer}")
lines.append("")
lines.append("=" * 60)
return "\n".join(lines)

View File

@@ -141,3 +141,109 @@ def test_print_response_stream_flag_accepted():
"""stream=False should not raise — it's accepted for API compatibility."""
agent = _make_agent()
agent.print_response("hello", stream=False) # no error
# ── ClaudeBackend ─────────────────────────────────────────────────────────
def test_claude_available_false_when_no_key():
"""claude_available() returns False when ANTHROPIC_API_KEY is empty."""
with patch("config.settings") as mock_settings:
mock_settings.anthropic_api_key = ""
from timmy.backends import claude_available
assert claude_available() is False
def test_claude_available_true_when_key_set():
"""claude_available() returns True when ANTHROPIC_API_KEY is set."""
with patch("config.settings") as mock_settings:
mock_settings.anthropic_api_key = "sk-ant-test-key"
from timmy.backends import claude_available
assert claude_available() is True
def test_claude_backend_init_with_explicit_params():
"""ClaudeBackend can be created with explicit api_key and model."""
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
assert backend._api_key == "sk-ant-test"
assert "haiku" in backend._model
def test_claude_backend_init_resolves_short_names():
"""ClaudeBackend resolves short model names to full IDs."""
from timmy.backends import ClaudeBackend, CLAUDE_MODELS
backend = ClaudeBackend(api_key="sk-test", model="sonnet")
assert backend._model == CLAUDE_MODELS["sonnet"]
def test_claude_backend_init_passes_through_full_model_id():
"""ClaudeBackend passes through full model IDs unchanged."""
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-test", model="claude-haiku-4-5-20251001")
assert backend._model == "claude-haiku-4-5-20251001"
def test_claude_backend_run_no_key_returns_error():
"""run() gracefully returns error message when no API key."""
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="", model="haiku")
result = backend.run("hello")
assert "not configured" in result.content
def test_claude_backend_run_success():
"""run() returns content from the Anthropic API on success."""
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
mock_content = MagicMock()
mock_content.text = "Sir, affirmative. I am Timmy."
mock_response = MagicMock()
mock_response.content = [mock_content]
mock_client = MagicMock()
mock_client.messages.create.return_value = mock_response
with patch.object(backend, "_get_client", return_value=mock_client):
result = backend.run("Who are you?")
assert "Timmy" in result.content
assert len(backend._history) == 2 # user + assistant
def test_claude_backend_run_handles_api_error():
"""run() returns a graceful error when the API raises."""
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
mock_client = MagicMock()
mock_client.messages.create.side_effect = ConnectionError("network down")
with patch.object(backend, "_get_client", return_value=mock_client):
result = backend.run("hello")
assert "unavailable" in result.content
def test_claude_backend_history_rolling_window():
"""History should be capped at 20 entries (10 exchanges)."""
from timmy.backends import ClaudeBackend
backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
mock_content = MagicMock()
mock_content.text = "OK."
mock_response = MagicMock()
mock_response.content = [mock_content]
mock_client = MagicMock()
mock_client.messages.create.return_value = mock_response
with patch.object(backend, "_get_client", return_value=mock_client):
for i in range(15):
backend.run(f"message {i}")
assert len(backend._history) <= 20

View File

@@ -0,0 +1,140 @@
"""Tests for timmy.interview — structured interview runner."""
from unittest.mock import MagicMock, patch
import pytest
from timmy.interview import (
INTERVIEW_QUESTIONS,
InterviewEntry,
format_transcript,
run_interview,
)
# ---------------------------------------------------------------------------
# INTERVIEW_QUESTIONS sanity checks
# ---------------------------------------------------------------------------
def test_interview_questions_not_empty():
"""There should be at least one interview question defined."""
assert len(INTERVIEW_QUESTIONS) > 0
def test_interview_questions_have_required_keys():
"""Every question dict must have 'category' and 'question'."""
for item in INTERVIEW_QUESTIONS:
assert "category" in item
assert "question" in item
assert isinstance(item["category"], str)
assert isinstance(item["question"], str)
# ---------------------------------------------------------------------------
# run_interview()
# ---------------------------------------------------------------------------
def test_run_interview_calls_chat_for_each_question():
"""run_interview should call the chat function once per question."""
mock_chat = MagicMock(return_value="Answer.")
transcript = run_interview(mock_chat)
assert mock_chat.call_count == len(INTERVIEW_QUESTIONS)
assert len(transcript) == len(INTERVIEW_QUESTIONS)
def test_run_interview_returns_interview_entries():
"""Each element in the transcript should be an InterviewEntry."""
mock_chat = MagicMock(return_value="I am Timmy.")
transcript = run_interview(mock_chat)
for entry in transcript:
assert isinstance(entry, InterviewEntry)
assert entry.answer == "I am Timmy."
def test_run_interview_with_custom_questions():
"""run_interview should accept custom question lists."""
custom_qs = [
{"category": "Test", "question": "What is 2+2?"},
]
mock_chat = MagicMock(return_value="Four.")
transcript = run_interview(mock_chat, questions=custom_qs)
assert len(transcript) == 1
assert transcript[0].category == "Test"
assert transcript[0].question == "What is 2+2?"
assert transcript[0].answer == "Four."
def test_run_interview_on_answer_callback():
"""on_answer callback should be invoked for each question."""
callback = MagicMock()
mock_chat = MagicMock(return_value="OK.")
run_interview(mock_chat, on_answer=callback)
assert callback.call_count == len(INTERVIEW_QUESTIONS)
# Each call should receive an InterviewEntry
for call in callback.call_args_list:
entry = call[0][0]
assert isinstance(entry, InterviewEntry)
def test_run_interview_handles_chat_error():
"""If the chat function raises, the answer should contain the error."""
def failing_chat(msg):
raise ConnectionError("Ollama offline")
transcript = run_interview(failing_chat)
assert len(transcript) == len(INTERVIEW_QUESTIONS)
for entry in transcript:
assert "Error" in entry.answer
assert "Ollama offline" in entry.answer
# ---------------------------------------------------------------------------
# format_transcript()
# ---------------------------------------------------------------------------
def test_format_transcript_empty():
"""Formatting an empty transcript should return a placeholder."""
result = format_transcript([])
assert "No interview data" in result
def test_format_transcript_includes_header():
"""Formatted transcript should include the header."""
entries = [InterviewEntry(category="Identity", question="Who are you?", answer="Timmy.")]
result = format_transcript(entries)
assert "TIMMY INTERVIEW TRANSCRIPT" in result
def test_format_transcript_includes_questions_and_answers():
"""Formatted transcript should include Q and A."""
entries = [
InterviewEntry(category="Identity", question="Who are you?", answer="Timmy."),
InterviewEntry(category="Values", question="What matters?", answer="Sovereignty."),
]
result = format_transcript(entries)
assert "Q: Who are you?" in result
assert "A: Timmy." in result
assert "Q: What matters?" in result
assert "A: Sovereignty." in result
def test_format_transcript_groups_by_category():
"""Categories should appear as section headers."""
entries = [
InterviewEntry(category="Identity", question="Q1", answer="A1"),
InterviewEntry(category="Values", question="Q2", answer="A2"),
]
result = format_transcript(entries)
assert "--- Identity ---" in result
assert "--- Values ---" in result