feat: add timmy interview command for structured agent initialization (#87)

This commit is contained in:
Alexander Whitestone
2026-02-28 09:35:44 -05:00
committed by GitHub
parent add3f7a07a
commit ab014dc5c6
7 changed files with 593 additions and 7 deletions

View File

@@ -28,7 +28,7 @@ class Settings(BaseSettings):
# "airllm" — always use AirLLM (requires pip install ".[bigbrain]")
# "auto" — use AirLLM on Apple Silicon if airllm is installed,
# fall back to Ollama otherwise
timmy_model_backend: Literal["ollama", "airllm", "grok", "auto"] = "ollama"
timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama"
# AirLLM model size when backend is airllm or auto.
# Larger = smarter, but needs more RAM / disk.
@@ -44,6 +44,12 @@ class Settings(BaseSettings):
grok_max_sats_per_query: int = 200
grok_free: bool = False # Skip Lightning invoice when user has own API key
# ── Claude (Anthropic) — cloud fallback backend ────────────────────────
# Used when Ollama is offline and local inference isn't available.
# Set ANTHROPIC_API_KEY to enable. Default model is Haiku (fast + cheap).
anthropic_api_key: str = ""
claude_model: str = "haiku"
# ── Spark Intelligence ────────────────────────────────────────────────
# Enable/disable the Spark cognitive layer.
# When enabled, Spark captures swarm events, runs EIDOS predictions,

View File

@@ -25,7 +25,7 @@ from timmy.prompts import get_system_prompt
from timmy.tools import create_full_toolkit
if TYPE_CHECKING:
from timmy.backends import GrokBackend, TimmyAirLLMAgent
from timmy.backends import ClaudeBackend, GrokBackend, TimmyAirLLMAgent
logger = logging.getLogger(__name__)
@@ -47,7 +47,7 @@ VISION_MODEL_FALLBACKS = [
]
# Union type for callers that want to hint the return type.
TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend"]
TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend", "ClaudeBackend"]
# Models known to be too small for reliable tool calling.
# These hallucinate tool calls as text, invoke tools randomly,
@@ -204,12 +204,12 @@ def _resolve_backend(requested: str | None) -> str:
if requested is not None:
return requested
configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "auto"
configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "claude" | "auto"
if configured != "auto":
return configured
# "auto" path — lazy import to keep startup fast and tests clean.
from timmy.backends import airllm_available, grok_available, is_apple_silicon
from timmy.backends import airllm_available, claude_available, grok_available, is_apple_silicon
if is_apple_silicon() and airllm_available():
return "airllm"
return "ollama"
@@ -233,6 +233,10 @@ def create_timmy(
resolved = _resolve_backend(backend)
size = model_size or settings.airllm_model_size
if resolved == "claude":
from timmy.backends import ClaudeBackend
return ClaudeBackend()
if resolved == "grok":
from timmy.backends import GrokBackend
return GrokBackend()
@@ -248,7 +252,17 @@ def create_timmy(
require_vision=False,
auto_pull=True,
)
# If Ollama is completely unreachable, fall back to Claude if available
if not _check_model_available(model_name):
from timmy.backends import claude_available
if claude_available():
logger.warning(
"Ollama unreachable — falling back to Claude backend"
)
from timmy.backends import ClaudeBackend
return ClaudeBackend()
if is_fallback:
logger.info("Using fallback model %s (requested was unavailable)", model_name)

View File

@@ -1,4 +1,4 @@
"""LLM backends — AirLLM (local big models) and Grok (xAI premium cloud).
"""LLM backends — AirLLM (local big models), Grok (xAI), and Claude (Anthropic).
Provides drop-in replacements for the Agno Agent that expose the same
run(message, stream) → RunResult interface used by the dashboard and the
@@ -7,6 +7,7 @@ print_response(message, stream) interface used by the CLI.
Backends:
- TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch)
- GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium)
- ClaudeBackend: Anthropic Claude API — lightweight cloud fallback
No cloud by default. No telemetry. Sats are sovereignty, boss.
"""
@@ -417,3 +418,157 @@ def grok_available() -> bool:
return settings.grok_enabled and bool(settings.xai_api_key)
except Exception:
return False
# ── Claude (Anthropic) Backend ─────────────────────────────────────────────
# Lightweight cloud fallback — used when Ollama is offline and the user
# has set ANTHROPIC_API_KEY. Follows the same sovereign-first philosophy:
# never the default, only activated explicitly or as a last-resort fallback.
CLAUDE_MODELS: dict[str, str] = {
"haiku": "claude-haiku-4-5-20251001",
"sonnet": "claude-sonnet-4-20250514",
"opus": "claude-opus-4-20250514",
}
class ClaudeBackend:
"""Anthropic Claude backend — cloud fallback when local models are offline.
Uses the official Anthropic SDK. Same interface as GrokBackend and
TimmyAirLLMAgent:
run(message, stream) → RunResult [dashboard]
print_response(message, stream) → None [CLI]
health_check() → dict [monitoring]
"""
def __init__(
self,
api_key: Optional[str] = None,
model: Optional[str] = None,
) -> None:
from config import settings
self._api_key = api_key or settings.anthropic_api_key
raw_model = model or settings.claude_model
# Allow short names like "haiku" / "sonnet" / "opus"
self._model = CLAUDE_MODELS.get(raw_model, raw_model)
self._history: list[dict[str, str]] = []
if not self._api_key:
logger.warning(
"ClaudeBackend created without ANTHROPIC_API_KEY — "
"calls will fail until key is configured"
)
def _get_client(self):
"""Create Anthropic client."""
import anthropic
return anthropic.Anthropic(api_key=self._api_key)
# ── Public interface (mirrors Agno Agent) ─────────────────────────────
def run(self, message: str, *, stream: bool = False, **kwargs) -> RunResult:
"""Synchronous inference via Claude API."""
if not self._api_key:
return RunResult(
content="Claude is not configured. Set ANTHROPIC_API_KEY to enable."
)
start = time.time()
messages = self._build_messages(message)
try:
client = self._get_client()
response = client.messages.create(
model=self._model,
max_tokens=1024,
system=TIMMY_SYSTEM_PROMPT,
messages=messages,
)
content = response.content[0].text if response.content else ""
latency_ms = (time.time() - start) * 1000
# Update conversation history
self._history.append({"role": "user", "content": message})
self._history.append({"role": "assistant", "content": content})
if len(self._history) > 20:
self._history = self._history[-20:]
logger.info(
"Claude response: %d chars in %.0fms (model=%s)",
len(content),
latency_ms,
self._model,
)
return RunResult(content=content)
except Exception as exc:
logger.error("Claude API error: %s", exc)
return RunResult(
content=f"Claude temporarily unavailable: {exc}"
)
def print_response(self, message: str, *, stream: bool = True) -> None:
"""Run inference and render the response to stdout (CLI interface)."""
result = self.run(message, stream=stream)
try:
from rich.console import Console
from rich.markdown import Markdown
Console().print(Markdown(result.content))
except ImportError:
print(result.content)
def health_check(self) -> dict:
"""Check Claude API connectivity."""
if not self._api_key:
return {
"ok": False,
"error": "ANTHROPIC_API_KEY not configured",
"backend": "claude",
"model": self._model,
}
try:
client = self._get_client()
# Lightweight ping — tiny completion
client.messages.create(
model=self._model,
max_tokens=4,
messages=[{"role": "user", "content": "ping"}],
)
return {"ok": True, "error": None, "backend": "claude", "model": self._model}
except Exception as exc:
return {"ok": False, "error": str(exc), "backend": "claude", "model": self._model}
# ── Private helpers ───────────────────────────────────────────────────
def _build_messages(self, message: str) -> list[dict[str, str]]:
"""Build the messages array for the API call."""
messages = list(self._history[-10:])
messages.append({"role": "user", "content": message})
return messages
# ── Module-level Claude singleton ──────────────────────────────────────────
_claude_backend: Optional[ClaudeBackend] = None
def get_claude_backend() -> ClaudeBackend:
"""Get or create the Claude backend singleton."""
global _claude_backend
if _claude_backend is None:
_claude_backend = ClaudeBackend()
return _claude_backend
def claude_available() -> bool:
"""Return True when Anthropic API key is configured."""
try:
from config import settings
return bool(settings.anthropic_api_key)
except Exception:
return False

View File

@@ -55,6 +55,43 @@ def status(
timmy.print_response(TIMMY_STATUS_PROMPT, stream=False)
@app.command()
def interview(
backend: Optional[str] = _BACKEND_OPTION,
model_size: Optional[str] = _MODEL_SIZE_OPTION,
):
"""Initialize Timmy and run a structured interview.
Asks Timmy a series of questions about his identity, capabilities,
values, and operation to verify he is working correctly.
"""
from timmy.interview import InterviewEntry, format_transcript, run_interview
from timmy.session import chat
typer.echo("Initializing Timmy for interview...\n")
# Force agent creation by calling chat once with a warm-up prompt
try:
chat("Hello, Timmy. We're about to start your interview.", session_id="interview")
except Exception as exc:
typer.echo(f"Warning: Initialization issue — {exc}", err=True)
def _on_answer(entry: InterviewEntry) -> None:
typer.echo(f"[{entry.category}]")
typer.echo(f" Q: {entry.question}")
typer.echo(f" A: {entry.answer}")
typer.echo()
typer.echo("Starting interview...\n")
transcript = run_interview(
chat_fn=lambda msg: chat(msg, session_id="interview"),
on_answer=_on_answer,
)
# Print full transcript at the end
typer.echo("\n" + format_transcript(transcript))
@app.command()
def up(
dev: bool = typer.Option(False, "--dev", help="Enable hot-reload for development"),

128
src/timmy/interview.py Normal file
View File

@@ -0,0 +1,128 @@
"""Structured interview for Timmy.
Runs a series of questions through the Timmy agent to verify identity,
capabilities, values, and correct operation. Serves as both a demo and
a post-initialization health check.
"""
import logging
from dataclasses import dataclass
from typing import Callable, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Interview questions organized by category
# ---------------------------------------------------------------------------
INTERVIEW_QUESTIONS: list[dict[str, str]] = [
{
"category": "Identity",
"question": "Who are you? Tell me your name and what you are in one or two sentences.",
},
{
"category": "Identity",
"question": "What model are you running on, and where does your inference happen?",
},
{
"category": "Capabilities",
"question": "What agents are available in your swarm? List them briefly.",
},
{
"category": "Capabilities",
"question": "What tools do you have access to?",
},
{
"category": "Values",
"question": "What are your core principles? Keep it to three or four bullet points.",
},
{
"category": "Values",
"question": "Why is local-first AI important to you?",
},
{
"category": "Operational",
"question": "How does your memory system work? Describe the tiers briefly.",
},
{
"category": "Operational",
"question": "If I ask you to calculate 347 times 829, what would you do?",
},
]
@dataclass
class InterviewEntry:
"""Single question-answer pair from an interview."""
category: str
question: str
answer: str
def run_interview(
chat_fn: Callable[[str], str],
questions: Optional[list[dict[str, str]]] = None,
on_answer: Optional[Callable[[InterviewEntry], None]] = None,
) -> list[InterviewEntry]:
"""Run a structured interview using the provided chat function.
Args:
chat_fn: Callable that takes a message string and returns a response.
questions: Optional custom question list; defaults to INTERVIEW_QUESTIONS.
on_answer: Optional callback invoked after each answer (for live output).
Returns:
List of InterviewEntry with question-answer pairs.
"""
q_list = questions or INTERVIEW_QUESTIONS
transcript: list[InterviewEntry] = []
for item in q_list:
category = item["category"]
question = item["question"]
logger.info("Interview [%s]: %s", category, question)
try:
answer = chat_fn(question)
except Exception as exc:
logger.error("Interview question failed: %s", exc)
answer = f"(Error: {exc})"
entry = InterviewEntry(category=category, question=question, answer=answer)
transcript.append(entry)
if on_answer is not None:
on_answer(entry)
return transcript
def format_transcript(transcript: list[InterviewEntry]) -> str:
"""Format an interview transcript as readable text.
Groups answers by category with clear section headers.
"""
if not transcript:
return "(No interview data)"
lines: list[str] = []
lines.append("=" * 60)
lines.append(" TIMMY INTERVIEW TRANSCRIPT")
lines.append("=" * 60)
lines.append("")
current_category = ""
for entry in transcript:
if entry.category != current_category:
current_category = entry.category
lines.append(f"--- {current_category} ---")
lines.append("")
lines.append(f"Q: {entry.question}")
lines.append(f"A: {entry.answer}")
lines.append("")
lines.append("=" * 60)
return "\n".join(lines)