feat: add timmy interview command for structured agent initialization (#87)

2026-02-28 09:35:44 -05:00
parent add3f7a07a
commit ab014dc5c6
7 changed files with 593 additions and 7 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -28,7 +28,7 @@ class Settings(BaseSettings):
    # "airllm"  — always use AirLLM (requires pip install ".[bigbrain]")
    # "auto"    — use AirLLM on Apple Silicon if airllm is installed,
    #             fall back to Ollama otherwise
-    timmy_model_backend: Literal["ollama", "airllm", "grok", "auto"] = "ollama"
+    timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama"

    # AirLLM model size when backend is airllm or auto.
    # Larger = smarter, but needs more RAM / disk.
@@ -44,6 +44,12 @@ class Settings(BaseSettings):
    grok_max_sats_per_query: int = 200
    grok_free: bool = False  # Skip Lightning invoice when user has own API key

+    # ── Claude (Anthropic) — cloud fallback backend ────────────────────────
+    # Used when Ollama is offline and local inference isn't available.
+    # Set ANTHROPIC_API_KEY to enable.  Default model is Haiku (fast + cheap).
+    anthropic_api_key: str = ""
+    claude_model: str = "haiku"
+
    # ── Spark Intelligence ────────────────────────────────────────────────
    # Enable/disable the Spark cognitive layer.
    # When enabled, Spark captures swarm events, runs EIDOS predictions,
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -25,7 +25,7 @@ from timmy.prompts import get_system_prompt
 from timmy.tools import create_full_toolkit

 if TYPE_CHECKING:
-    from timmy.backends import GrokBackend, TimmyAirLLMAgent
+    from timmy.backends import ClaudeBackend, GrokBackend, TimmyAirLLMAgent

 logger = logging.getLogger(__name__)

@@ -47,7 +47,7 @@ VISION_MODEL_FALLBACKS = [
 ]

 # Union type for callers that want to hint the return type.
-TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend"]
+TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend", "ClaudeBackend"]

 # Models known to be too small for reliable tool calling.
 # These hallucinate tool calls as text, invoke tools randomly,
@@ -204,12 +204,12 @@ def _resolve_backend(requested: str | None) -> str:
    if requested is not None:
        return requested

-    configured = settings.timmy_model_backend  # "ollama" | "airllm" | "grok" | "auto"
+    configured = settings.timmy_model_backend  # "ollama" | "airllm" | "grok" | "claude" | "auto"
    if configured != "auto":
        return configured

    # "auto" path — lazy import to keep startup fast and tests clean.
-    from timmy.backends import airllm_available, grok_available, is_apple_silicon
+    from timmy.backends import airllm_available, claude_available, grok_available, is_apple_silicon
    if is_apple_silicon() and airllm_available():
        return "airllm"
    return "ollama"
@@ -233,6 +233,10 @@ def create_timmy(
    resolved = _resolve_backend(backend)
    size = model_size or settings.airllm_model_size

+    if resolved == "claude":
+        from timmy.backends import ClaudeBackend
+        return ClaudeBackend()
+
    if resolved == "grok":
        from timmy.backends import GrokBackend
        return GrokBackend()
@@ -248,7 +252,17 @@ def create_timmy(
        require_vision=False,
        auto_pull=True,
    )
-    
+
+    # If Ollama is completely unreachable, fall back to Claude if available
+    if not _check_model_available(model_name):
+        from timmy.backends import claude_available
+        if claude_available():
+            logger.warning(
+                "Ollama unreachable — falling back to Claude backend"
+            )
+            from timmy.backends import ClaudeBackend
+            return ClaudeBackend()
+
    if is_fallback:
        logger.info("Using fallback model %s (requested was unavailable)", model_name)
    
--- a/src/timmy/backends.py
+++ b/src/timmy/backends.py
@@ -1,4 +1,4 @@
-"""LLM backends — AirLLM (local big models) and Grok (xAI premium cloud).
+"""LLM backends — AirLLM (local big models), Grok (xAI), and Claude (Anthropic).

 Provides drop-in replacements for the Agno Agent that expose the same
 run(message, stream) → RunResult interface used by the dashboard and the
@@ -7,6 +7,7 @@ print_response(message, stream) interface used by the CLI.
 Backends:
  - TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch)
  - GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium)
+  - ClaudeBackend: Anthropic Claude API — lightweight cloud fallback

 No cloud by default.  No telemetry.  Sats are sovereignty, boss.
 """
@@ -417,3 +418,157 @@ def grok_available() -> bool:
        return settings.grok_enabled and bool(settings.xai_api_key)
    except Exception:
        return False
+
+
+# ── Claude (Anthropic) Backend ─────────────────────────────────────────────
+# Lightweight cloud fallback — used when Ollama is offline and the user
+# has set ANTHROPIC_API_KEY.  Follows the same sovereign-first philosophy:
+# never the default, only activated explicitly or as a last-resort fallback.
+
+CLAUDE_MODELS: dict[str, str] = {
+    "haiku": "claude-haiku-4-5-20251001",
+    "sonnet": "claude-sonnet-4-20250514",
+    "opus": "claude-opus-4-20250514",
+}
+
+
+class ClaudeBackend:
+    """Anthropic Claude backend — cloud fallback when local models are offline.
+
+    Uses the official Anthropic SDK.  Same interface as GrokBackend and
+    TimmyAirLLMAgent:
+      run(message, stream)           → RunResult  [dashboard]
+      print_response(message, stream) → None       [CLI]
+      health_check()                 → dict        [monitoring]
+    """
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+    ) -> None:
+        from config import settings
+
+        self._api_key = api_key or settings.anthropic_api_key
+        raw_model = model or settings.claude_model
+        # Allow short names like "haiku" / "sonnet" / "opus"
+        self._model = CLAUDE_MODELS.get(raw_model, raw_model)
+        self._history: list[dict[str, str]] = []
+
+        if not self._api_key:
+            logger.warning(
+                "ClaudeBackend created without ANTHROPIC_API_KEY — "
+                "calls will fail until key is configured"
+            )
+
+    def _get_client(self):
+        """Create Anthropic client."""
+        import anthropic
+
+        return anthropic.Anthropic(api_key=self._api_key)
+
+    # ── Public interface (mirrors Agno Agent) ─────────────────────────────
+
+    def run(self, message: str, *, stream: bool = False, **kwargs) -> RunResult:
+        """Synchronous inference via Claude API."""
+        if not self._api_key:
+            return RunResult(
+                content="Claude is not configured. Set ANTHROPIC_API_KEY to enable."
+            )
+
+        start = time.time()
+        messages = self._build_messages(message)
+
+        try:
+            client = self._get_client()
+            response = client.messages.create(
+                model=self._model,
+                max_tokens=1024,
+                system=TIMMY_SYSTEM_PROMPT,
+                messages=messages,
+            )
+
+            content = response.content[0].text if response.content else ""
+            latency_ms = (time.time() - start) * 1000
+
+            # Update conversation history
+            self._history.append({"role": "user", "content": message})
+            self._history.append({"role": "assistant", "content": content})
+            if len(self._history) > 20:
+                self._history = self._history[-20:]
+
+            logger.info(
+                "Claude response: %d chars in %.0fms (model=%s)",
+                len(content),
+                latency_ms,
+                self._model,
+            )
+
+            return RunResult(content=content)
+
+        except Exception as exc:
+            logger.error("Claude API error: %s", exc)
+            return RunResult(
+                content=f"Claude temporarily unavailable: {exc}"
+            )
+
+    def print_response(self, message: str, *, stream: bool = True) -> None:
+        """Run inference and render the response to stdout (CLI interface)."""
+        result = self.run(message, stream=stream)
+        try:
+            from rich.console import Console
+            from rich.markdown import Markdown
+            Console().print(Markdown(result.content))
+        except ImportError:
+            print(result.content)
+
+    def health_check(self) -> dict:
+        """Check Claude API connectivity."""
+        if not self._api_key:
+            return {
+                "ok": False,
+                "error": "ANTHROPIC_API_KEY not configured",
+                "backend": "claude",
+                "model": self._model,
+            }
+        try:
+            client = self._get_client()
+            # Lightweight ping — tiny completion
+            client.messages.create(
+                model=self._model,
+                max_tokens=4,
+                messages=[{"role": "user", "content": "ping"}],
+            )
+            return {"ok": True, "error": None, "backend": "claude", "model": self._model}
+        except Exception as exc:
+            return {"ok": False, "error": str(exc), "backend": "claude", "model": self._model}
+
+    # ── Private helpers ───────────────────────────────────────────────────
+
+    def _build_messages(self, message: str) -> list[dict[str, str]]:
+        """Build the messages array for the API call."""
+        messages = list(self._history[-10:])
+        messages.append({"role": "user", "content": message})
+        return messages
+
+
+# ── Module-level Claude singleton ──────────────────────────────────────────
+
+_claude_backend: Optional[ClaudeBackend] = None
+
+
+def get_claude_backend() -> ClaudeBackend:
+    """Get or create the Claude backend singleton."""
+    global _claude_backend
+    if _claude_backend is None:
+        _claude_backend = ClaudeBackend()
+    return _claude_backend
+
+
+def claude_available() -> bool:
+    """Return True when Anthropic API key is configured."""
+    try:
+        from config import settings
+        return bool(settings.anthropic_api_key)
+    except Exception:
+        return False
--- a/src/timmy/cli.py
+++ b/src/timmy/cli.py
@@ -55,6 +55,43 @@ def status(
    timmy.print_response(TIMMY_STATUS_PROMPT, stream=False)


+@app.command()
+def interview(
+    backend: Optional[str] = _BACKEND_OPTION,
+    model_size: Optional[str] = _MODEL_SIZE_OPTION,
+):
+    """Initialize Timmy and run a structured interview.
+
+    Asks Timmy a series of questions about his identity, capabilities,
+    values, and operation to verify he is working correctly.
+    """
+    from timmy.interview import InterviewEntry, format_transcript, run_interview
+    from timmy.session import chat
+
+    typer.echo("Initializing Timmy for interview...\n")
+
+    # Force agent creation by calling chat once with a warm-up prompt
+    try:
+        chat("Hello, Timmy. We're about to start your interview.", session_id="interview")
+    except Exception as exc:
+        typer.echo(f"Warning: Initialization issue — {exc}", err=True)
+
+    def _on_answer(entry: InterviewEntry) -> None:
+        typer.echo(f"[{entry.category}]")
+        typer.echo(f"  Q: {entry.question}")
+        typer.echo(f"  A: {entry.answer}")
+        typer.echo()
+
+    typer.echo("Starting interview...\n")
+    transcript = run_interview(
+        chat_fn=lambda msg: chat(msg, session_id="interview"),
+        on_answer=_on_answer,
+    )
+
+    # Print full transcript at the end
+    typer.echo("\n" + format_transcript(transcript))
+
+
@app.command()
 def up(
    dev: bool = typer.Option(False, "--dev", help="Enable hot-reload for development"),
--- a/src/timmy/interview.py
+++ b/src/timmy/interview.py
@@ -0,0 +1,128 @@
+"""Structured interview for Timmy.
+
+Runs a series of questions through the Timmy agent to verify identity,
+capabilities, values, and correct operation. Serves as both a demo and
+a post-initialization health check.
+"""
+
+import logging
+from dataclasses import dataclass
+from typing import Callable, Optional
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Interview questions organized by category
+# ---------------------------------------------------------------------------
+
+INTERVIEW_QUESTIONS: list[dict[str, str]] = [
+    {
+        "category": "Identity",
+        "question": "Who are you? Tell me your name and what you are in one or two sentences.",
+    },
+    {
+        "category": "Identity",
+        "question": "What model are you running on, and where does your inference happen?",
+    },
+    {
+        "category": "Capabilities",
+        "question": "What agents are available in your swarm? List them briefly.",
+    },
+    {
+        "category": "Capabilities",
+        "question": "What tools do you have access to?",
+    },
+    {
+        "category": "Values",
+        "question": "What are your core principles? Keep it to three or four bullet points.",
+    },
+    {
+        "category": "Values",
+        "question": "Why is local-first AI important to you?",
+    },
+    {
+        "category": "Operational",
+        "question": "How does your memory system work? Describe the tiers briefly.",
+    },
+    {
+        "category": "Operational",
+        "question": "If I ask you to calculate 347 times 829, what would you do?",
+    },
+]
+
+
+@dataclass
+class InterviewEntry:
+    """Single question-answer pair from an interview."""
+
+    category: str
+    question: str
+    answer: str
+
+
+def run_interview(
+    chat_fn: Callable[[str], str],
+    questions: Optional[list[dict[str, str]]] = None,
+    on_answer: Optional[Callable[[InterviewEntry], None]] = None,
+) -> list[InterviewEntry]:
+    """Run a structured interview using the provided chat function.
+
+    Args:
+        chat_fn:    Callable that takes a message string and returns a response.
+        questions:  Optional custom question list; defaults to INTERVIEW_QUESTIONS.
+        on_answer:  Optional callback invoked after each answer (for live output).
+
+    Returns:
+        List of InterviewEntry with question-answer pairs.
+    """
+    q_list = questions or INTERVIEW_QUESTIONS
+    transcript: list[InterviewEntry] = []
+
+    for item in q_list:
+        category = item["category"]
+        question = item["question"]
+
+        logger.info("Interview [%s]: %s", category, question)
+
+        try:
+            answer = chat_fn(question)
+        except Exception as exc:
+            logger.error("Interview question failed: %s", exc)
+            answer = f"(Error: {exc})"
+
+        entry = InterviewEntry(category=category, question=question, answer=answer)
+        transcript.append(entry)
+
+        if on_answer is not None:
+            on_answer(entry)
+
+    return transcript
+
+
+def format_transcript(transcript: list[InterviewEntry]) -> str:
+    """Format an interview transcript as readable text.
+
+    Groups answers by category with clear section headers.
+    """
+    if not transcript:
+        return "(No interview data)"
+
+    lines: list[str] = []
+    lines.append("=" * 60)
+    lines.append("  TIMMY INTERVIEW TRANSCRIPT")
+    lines.append("=" * 60)
+    lines.append("")
+
+    current_category = ""
+    for entry in transcript:
+        if entry.category != current_category:
+            current_category = entry.category
+            lines.append(f"--- {current_category} ---")
+            lines.append("")
+
+        lines.append(f"Q: {entry.question}")
+        lines.append(f"A: {entry.answer}")
+        lines.append("")
+
+    lines.append("=" * 60)
+    return "\n".join(lines)
--- a/tests/timmy/test_backends.py
+++ b/tests/timmy/test_backends.py
@@ -141,3 +141,109 @@ def test_print_response_stream_flag_accepted():
    """stream=False should not raise — it's accepted for API compatibility."""
    agent = _make_agent()
    agent.print_response("hello", stream=False)  # no error
+
+
+# ── ClaudeBackend ─────────────────────────────────────────────────────────
+
+
+def test_claude_available_false_when_no_key():
+    """claude_available() returns False when ANTHROPIC_API_KEY is empty."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.anthropic_api_key = ""
+        from timmy.backends import claude_available
+        assert claude_available() is False
+
+
+def test_claude_available_true_when_key_set():
+    """claude_available() returns True when ANTHROPIC_API_KEY is set."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.anthropic_api_key = "sk-ant-test-key"
+        from timmy.backends import claude_available
+        assert claude_available() is True
+
+
+def test_claude_backend_init_with_explicit_params():
+    """ClaudeBackend can be created with explicit api_key and model."""
+    from timmy.backends import ClaudeBackend
+    backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
+    assert backend._api_key == "sk-ant-test"
+    assert "haiku" in backend._model
+
+
+def test_claude_backend_init_resolves_short_names():
+    """ClaudeBackend resolves short model names to full IDs."""
+    from timmy.backends import ClaudeBackend, CLAUDE_MODELS
+    backend = ClaudeBackend(api_key="sk-test", model="sonnet")
+    assert backend._model == CLAUDE_MODELS["sonnet"]
+
+
+def test_claude_backend_init_passes_through_full_model_id():
+    """ClaudeBackend passes through full model IDs unchanged."""
+    from timmy.backends import ClaudeBackend
+    backend = ClaudeBackend(api_key="sk-test", model="claude-haiku-4-5-20251001")
+    assert backend._model == "claude-haiku-4-5-20251001"
+
+
+def test_claude_backend_run_no_key_returns_error():
+    """run() gracefully returns error message when no API key."""
+    from timmy.backends import ClaudeBackend
+    backend = ClaudeBackend(api_key="", model="haiku")
+    result = backend.run("hello")
+    assert "not configured" in result.content
+
+
+def test_claude_backend_run_success():
+    """run() returns content from the Anthropic API on success."""
+    from timmy.backends import ClaudeBackend
+
+    backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
+
+    mock_content = MagicMock()
+    mock_content.text = "Sir, affirmative. I am Timmy."
+
+    mock_response = MagicMock()
+    mock_response.content = [mock_content]
+
+    mock_client = MagicMock()
+    mock_client.messages.create.return_value = mock_response
+
+    with patch.object(backend, "_get_client", return_value=mock_client):
+        result = backend.run("Who are you?")
+
+    assert "Timmy" in result.content
+    assert len(backend._history) == 2  # user + assistant
+
+
+def test_claude_backend_run_handles_api_error():
+    """run() returns a graceful error when the API raises."""
+    from timmy.backends import ClaudeBackend
+
+    backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
+
+    mock_client = MagicMock()
+    mock_client.messages.create.side_effect = ConnectionError("network down")
+
+    with patch.object(backend, "_get_client", return_value=mock_client):
+        result = backend.run("hello")
+
+    assert "unavailable" in result.content
+
+
+def test_claude_backend_history_rolling_window():
+    """History should be capped at 20 entries (10 exchanges)."""
+    from timmy.backends import ClaudeBackend
+
+    backend = ClaudeBackend(api_key="sk-ant-test", model="haiku")
+
+    mock_content = MagicMock()
+    mock_content.text = "OK."
+    mock_response = MagicMock()
+    mock_response.content = [mock_content]
+    mock_client = MagicMock()
+    mock_client.messages.create.return_value = mock_response
+
+    with patch.object(backend, "_get_client", return_value=mock_client):
+        for i in range(15):
+            backend.run(f"message {i}")
+
+    assert len(backend._history) <= 20
--- a/tests/timmy/test_interview.py
+++ b/tests/timmy/test_interview.py
@@ -0,0 +1,140 @@
+"""Tests for timmy.interview — structured interview runner."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from timmy.interview import (
+    INTERVIEW_QUESTIONS,
+    InterviewEntry,
+    format_transcript,
+    run_interview,
+)
+
+
+# ---------------------------------------------------------------------------
+# INTERVIEW_QUESTIONS sanity checks
+# ---------------------------------------------------------------------------
+
+
+def test_interview_questions_not_empty():
+    """There should be at least one interview question defined."""
+    assert len(INTERVIEW_QUESTIONS) > 0
+
+
+def test_interview_questions_have_required_keys():
+    """Every question dict must have 'category' and 'question'."""
+    for item in INTERVIEW_QUESTIONS:
+        assert "category" in item
+        assert "question" in item
+        assert isinstance(item["category"], str)
+        assert isinstance(item["question"], str)
+
+
+# ---------------------------------------------------------------------------
+# run_interview()
+# ---------------------------------------------------------------------------
+
+
+def test_run_interview_calls_chat_for_each_question():
+    """run_interview should call the chat function once per question."""
+    mock_chat = MagicMock(return_value="Answer.")
+    transcript = run_interview(mock_chat)
+
+    assert mock_chat.call_count == len(INTERVIEW_QUESTIONS)
+    assert len(transcript) == len(INTERVIEW_QUESTIONS)
+
+
+def test_run_interview_returns_interview_entries():
+    """Each element in the transcript should be an InterviewEntry."""
+    mock_chat = MagicMock(return_value="I am Timmy.")
+    transcript = run_interview(mock_chat)
+
+    for entry in transcript:
+        assert isinstance(entry, InterviewEntry)
+        assert entry.answer == "I am Timmy."
+
+
+def test_run_interview_with_custom_questions():
+    """run_interview should accept custom question lists."""
+    custom_qs = [
+        {"category": "Test", "question": "What is 2+2?"},
+    ]
+    mock_chat = MagicMock(return_value="Four.")
+    transcript = run_interview(mock_chat, questions=custom_qs)
+
+    assert len(transcript) == 1
+    assert transcript[0].category == "Test"
+    assert transcript[0].question == "What is 2+2?"
+    assert transcript[0].answer == "Four."
+
+
+def test_run_interview_on_answer_callback():
+    """on_answer callback should be invoked for each question."""
+    callback = MagicMock()
+    mock_chat = MagicMock(return_value="OK.")
+
+    run_interview(mock_chat, on_answer=callback)
+
+    assert callback.call_count == len(INTERVIEW_QUESTIONS)
+    # Each call should receive an InterviewEntry
+    for call in callback.call_args_list:
+        entry = call[0][0]
+        assert isinstance(entry, InterviewEntry)
+
+
+def test_run_interview_handles_chat_error():
+    """If the chat function raises, the answer should contain the error."""
+    def failing_chat(msg):
+        raise ConnectionError("Ollama offline")
+
+    transcript = run_interview(failing_chat)
+
+    assert len(transcript) == len(INTERVIEW_QUESTIONS)
+    for entry in transcript:
+        assert "Error" in entry.answer
+        assert "Ollama offline" in entry.answer
+
+
+# ---------------------------------------------------------------------------
+# format_transcript()
+# ---------------------------------------------------------------------------
+
+
+def test_format_transcript_empty():
+    """Formatting an empty transcript should return a placeholder."""
+    result = format_transcript([])
+    assert "No interview data" in result
+
+
+def test_format_transcript_includes_header():
+    """Formatted transcript should include the header."""
+    entries = [InterviewEntry(category="Identity", question="Who are you?", answer="Timmy.")]
+    result = format_transcript(entries)
+    assert "TIMMY INTERVIEW TRANSCRIPT" in result
+
+
+def test_format_transcript_includes_questions_and_answers():
+    """Formatted transcript should include Q and A."""
+    entries = [
+        InterviewEntry(category="Identity", question="Who are you?", answer="Timmy."),
+        InterviewEntry(category="Values", question="What matters?", answer="Sovereignty."),
+    ]
+    result = format_transcript(entries)
+
+    assert "Q: Who are you?" in result
+    assert "A: Timmy." in result
+    assert "Q: What matters?" in result
+    assert "A: Sovereignty." in result
+
+
+def test_format_transcript_groups_by_category():
+    """Categories should appear as section headers."""
+    entries = [
+        InterviewEntry(category="Identity", question="Q1", answer="A1"),
+        InterviewEntry(category="Values", question="Q2", answer="A2"),
+    ]
+    result = format_transcript(entries)
+
+    assert "--- Identity ---" in result
+    assert "--- Values ---" in result