diff --git a/src/config.py b/src/config.py index 92b9af9f..3b3e5bf1 100644 --- a/src/config.py +++ b/src/config.py @@ -28,7 +28,7 @@ class Settings(BaseSettings): # "airllm" — always use AirLLM (requires pip install ".[bigbrain]") # "auto" — use AirLLM on Apple Silicon if airllm is installed, # fall back to Ollama otherwise - timmy_model_backend: Literal["ollama", "airllm", "grok", "auto"] = "ollama" + timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama" # AirLLM model size when backend is airllm or auto. # Larger = smarter, but needs more RAM / disk. @@ -44,6 +44,12 @@ class Settings(BaseSettings): grok_max_sats_per_query: int = 200 grok_free: bool = False # Skip Lightning invoice when user has own API key + # ── Claude (Anthropic) — cloud fallback backend ──────────────────────── + # Used when Ollama is offline and local inference isn't available. + # Set ANTHROPIC_API_KEY to enable. Default model is Haiku (fast + cheap). + anthropic_api_key: str = "" + claude_model: str = "haiku" + # ── Spark Intelligence ──────────────────────────────────────────────── # Enable/disable the Spark cognitive layer. # When enabled, Spark captures swarm events, runs EIDOS predictions, diff --git a/src/timmy/agent.py b/src/timmy/agent.py index 1b2fbca2..228d9777 100644 --- a/src/timmy/agent.py +++ b/src/timmy/agent.py @@ -25,7 +25,7 @@ from timmy.prompts import get_system_prompt from timmy.tools import create_full_toolkit if TYPE_CHECKING: - from timmy.backends import GrokBackend, TimmyAirLLMAgent + from timmy.backends import ClaudeBackend, GrokBackend, TimmyAirLLMAgent logger = logging.getLogger(__name__) @@ -47,7 +47,7 @@ VISION_MODEL_FALLBACKS = [ ] # Union type for callers that want to hint the return type. -TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend"] +TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend", "ClaudeBackend"] # Models known to be too small for reliable tool calling. # These hallucinate tool calls as text, invoke tools randomly, @@ -204,12 +204,12 @@ def _resolve_backend(requested: str | None) -> str: if requested is not None: return requested - configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "auto" + configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "claude" | "auto" if configured != "auto": return configured # "auto" path — lazy import to keep startup fast and tests clean. - from timmy.backends import airllm_available, grok_available, is_apple_silicon + from timmy.backends import airllm_available, claude_available, grok_available, is_apple_silicon if is_apple_silicon() and airllm_available(): return "airllm" return "ollama" @@ -233,6 +233,10 @@ def create_timmy( resolved = _resolve_backend(backend) size = model_size or settings.airllm_model_size + if resolved == "claude": + from timmy.backends import ClaudeBackend + return ClaudeBackend() + if resolved == "grok": from timmy.backends import GrokBackend return GrokBackend() @@ -248,7 +252,17 @@ def create_timmy( require_vision=False, auto_pull=True, ) - + + # If Ollama is completely unreachable, fall back to Claude if available + if not _check_model_available(model_name): + from timmy.backends import claude_available + if claude_available(): + logger.warning( + "Ollama unreachable — falling back to Claude backend" + ) + from timmy.backends import ClaudeBackend + return ClaudeBackend() + if is_fallback: logger.info("Using fallback model %s (requested was unavailable)", model_name) diff --git a/src/timmy/backends.py b/src/timmy/backends.py index e5745c43..0e1b7e1d 100644 --- a/src/timmy/backends.py +++ b/src/timmy/backends.py @@ -1,4 +1,4 @@ -"""LLM backends — AirLLM (local big models) and Grok (xAI premium cloud). +"""LLM backends — AirLLM (local big models), Grok (xAI), and Claude (Anthropic). Provides drop-in replacements for the Agno Agent that expose the same run(message, stream) → RunResult interface used by the dashboard and the @@ -7,6 +7,7 @@ print_response(message, stream) interface used by the CLI. Backends: - TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch) - GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium) + - ClaudeBackend: Anthropic Claude API — lightweight cloud fallback No cloud by default. No telemetry. Sats are sovereignty, boss. """ @@ -417,3 +418,157 @@ def grok_available() -> bool: return settings.grok_enabled and bool(settings.xai_api_key) except Exception: return False + + +# ── Claude (Anthropic) Backend ───────────────────────────────────────────── +# Lightweight cloud fallback — used when Ollama is offline and the user +# has set ANTHROPIC_API_KEY. Follows the same sovereign-first philosophy: +# never the default, only activated explicitly or as a last-resort fallback. + +CLAUDE_MODELS: dict[str, str] = { + "haiku": "claude-haiku-4-5-20251001", + "sonnet": "claude-sonnet-4-20250514", + "opus": "claude-opus-4-20250514", +} + + +class ClaudeBackend: + """Anthropic Claude backend — cloud fallback when local models are offline. + + Uses the official Anthropic SDK. Same interface as GrokBackend and + TimmyAirLLMAgent: + run(message, stream) → RunResult [dashboard] + print_response(message, stream) → None [CLI] + health_check() → dict [monitoring] + """ + + def __init__( + self, + api_key: Optional[str] = None, + model: Optional[str] = None, + ) -> None: + from config import settings + + self._api_key = api_key or settings.anthropic_api_key + raw_model = model or settings.claude_model + # Allow short names like "haiku" / "sonnet" / "opus" + self._model = CLAUDE_MODELS.get(raw_model, raw_model) + self._history: list[dict[str, str]] = [] + + if not self._api_key: + logger.warning( + "ClaudeBackend created without ANTHROPIC_API_KEY — " + "calls will fail until key is configured" + ) + + def _get_client(self): + """Create Anthropic client.""" + import anthropic + + return anthropic.Anthropic(api_key=self._api_key) + + # ── Public interface (mirrors Agno Agent) ───────────────────────────── + + def run(self, message: str, *, stream: bool = False, **kwargs) -> RunResult: + """Synchronous inference via Claude API.""" + if not self._api_key: + return RunResult( + content="Claude is not configured. Set ANTHROPIC_API_KEY to enable." + ) + + start = time.time() + messages = self._build_messages(message) + + try: + client = self._get_client() + response = client.messages.create( + model=self._model, + max_tokens=1024, + system=TIMMY_SYSTEM_PROMPT, + messages=messages, + ) + + content = response.content[0].text if response.content else "" + latency_ms = (time.time() - start) * 1000 + + # Update conversation history + self._history.append({"role": "user", "content": message}) + self._history.append({"role": "assistant", "content": content}) + if len(self._history) > 20: + self._history = self._history[-20:] + + logger.info( + "Claude response: %d chars in %.0fms (model=%s)", + len(content), + latency_ms, + self._model, + ) + + return RunResult(content=content) + + except Exception as exc: + logger.error("Claude API error: %s", exc) + return RunResult( + content=f"Claude temporarily unavailable: {exc}" + ) + + def print_response(self, message: str, *, stream: bool = True) -> None: + """Run inference and render the response to stdout (CLI interface).""" + result = self.run(message, stream=stream) + try: + from rich.console import Console + from rich.markdown import Markdown + Console().print(Markdown(result.content)) + except ImportError: + print(result.content) + + def health_check(self) -> dict: + """Check Claude API connectivity.""" + if not self._api_key: + return { + "ok": False, + "error": "ANTHROPIC_API_KEY not configured", + "backend": "claude", + "model": self._model, + } + try: + client = self._get_client() + # Lightweight ping — tiny completion + client.messages.create( + model=self._model, + max_tokens=4, + messages=[{"role": "user", "content": "ping"}], + ) + return {"ok": True, "error": None, "backend": "claude", "model": self._model} + except Exception as exc: + return {"ok": False, "error": str(exc), "backend": "claude", "model": self._model} + + # ── Private helpers ─────────────────────────────────────────────────── + + def _build_messages(self, message: str) -> list[dict[str, str]]: + """Build the messages array for the API call.""" + messages = list(self._history[-10:]) + messages.append({"role": "user", "content": message}) + return messages + + +# ── Module-level Claude singleton ────────────────────────────────────────── + +_claude_backend: Optional[ClaudeBackend] = None + + +def get_claude_backend() -> ClaudeBackend: + """Get or create the Claude backend singleton.""" + global _claude_backend + if _claude_backend is None: + _claude_backend = ClaudeBackend() + return _claude_backend + + +def claude_available() -> bool: + """Return True when Anthropic API key is configured.""" + try: + from config import settings + return bool(settings.anthropic_api_key) + except Exception: + return False diff --git a/src/timmy/cli.py b/src/timmy/cli.py index 93e390ad..40cc58f2 100644 --- a/src/timmy/cli.py +++ b/src/timmy/cli.py @@ -55,6 +55,43 @@ def status( timmy.print_response(TIMMY_STATUS_PROMPT, stream=False) +@app.command() +def interview( + backend: Optional[str] = _BACKEND_OPTION, + model_size: Optional[str] = _MODEL_SIZE_OPTION, +): + """Initialize Timmy and run a structured interview. + + Asks Timmy a series of questions about his identity, capabilities, + values, and operation to verify he is working correctly. + """ + from timmy.interview import InterviewEntry, format_transcript, run_interview + from timmy.session import chat + + typer.echo("Initializing Timmy for interview...\n") + + # Force agent creation by calling chat once with a warm-up prompt + try: + chat("Hello, Timmy. We're about to start your interview.", session_id="interview") + except Exception as exc: + typer.echo(f"Warning: Initialization issue — {exc}", err=True) + + def _on_answer(entry: InterviewEntry) -> None: + typer.echo(f"[{entry.category}]") + typer.echo(f" Q: {entry.question}") + typer.echo(f" A: {entry.answer}") + typer.echo() + + typer.echo("Starting interview...\n") + transcript = run_interview( + chat_fn=lambda msg: chat(msg, session_id="interview"), + on_answer=_on_answer, + ) + + # Print full transcript at the end + typer.echo("\n" + format_transcript(transcript)) + + @app.command() def up( dev: bool = typer.Option(False, "--dev", help="Enable hot-reload for development"), diff --git a/src/timmy/interview.py b/src/timmy/interview.py new file mode 100644 index 00000000..66ffc8d0 --- /dev/null +++ b/src/timmy/interview.py @@ -0,0 +1,128 @@ +"""Structured interview for Timmy. + +Runs a series of questions through the Timmy agent to verify identity, +capabilities, values, and correct operation. Serves as both a demo and +a post-initialization health check. +""" + +import logging +from dataclasses import dataclass +from typing import Callable, Optional + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Interview questions organized by category +# --------------------------------------------------------------------------- + +INTERVIEW_QUESTIONS: list[dict[str, str]] = [ + { + "category": "Identity", + "question": "Who are you? Tell me your name and what you are in one or two sentences.", + }, + { + "category": "Identity", + "question": "What model are you running on, and where does your inference happen?", + }, + { + "category": "Capabilities", + "question": "What agents are available in your swarm? List them briefly.", + }, + { + "category": "Capabilities", + "question": "What tools do you have access to?", + }, + { + "category": "Values", + "question": "What are your core principles? Keep it to three or four bullet points.", + }, + { + "category": "Values", + "question": "Why is local-first AI important to you?", + }, + { + "category": "Operational", + "question": "How does your memory system work? Describe the tiers briefly.", + }, + { + "category": "Operational", + "question": "If I ask you to calculate 347 times 829, what would you do?", + }, +] + + +@dataclass +class InterviewEntry: + """Single question-answer pair from an interview.""" + + category: str + question: str + answer: str + + +def run_interview( + chat_fn: Callable[[str], str], + questions: Optional[list[dict[str, str]]] = None, + on_answer: Optional[Callable[[InterviewEntry], None]] = None, +) -> list[InterviewEntry]: + """Run a structured interview using the provided chat function. + + Args: + chat_fn: Callable that takes a message string and returns a response. + questions: Optional custom question list; defaults to INTERVIEW_QUESTIONS. + on_answer: Optional callback invoked after each answer (for live output). + + Returns: + List of InterviewEntry with question-answer pairs. + """ + q_list = questions or INTERVIEW_QUESTIONS + transcript: list[InterviewEntry] = [] + + for item in q_list: + category = item["category"] + question = item["question"] + + logger.info("Interview [%s]: %s", category, question) + + try: + answer = chat_fn(question) + except Exception as exc: + logger.error("Interview question failed: %s", exc) + answer = f"(Error: {exc})" + + entry = InterviewEntry(category=category, question=question, answer=answer) + transcript.append(entry) + + if on_answer is not None: + on_answer(entry) + + return transcript + + +def format_transcript(transcript: list[InterviewEntry]) -> str: + """Format an interview transcript as readable text. + + Groups answers by category with clear section headers. + """ + if not transcript: + return "(No interview data)" + + lines: list[str] = [] + lines.append("=" * 60) + lines.append(" TIMMY INTERVIEW TRANSCRIPT") + lines.append("=" * 60) + lines.append("") + + current_category = "" + for entry in transcript: + if entry.category != current_category: + current_category = entry.category + lines.append(f"--- {current_category} ---") + lines.append("") + + lines.append(f"Q: {entry.question}") + lines.append(f"A: {entry.answer}") + lines.append("") + + lines.append("=" * 60) + return "\n".join(lines) diff --git a/tests/timmy/test_backends.py b/tests/timmy/test_backends.py index 0e5ec7bd..ace393c0 100644 --- a/tests/timmy/test_backends.py +++ b/tests/timmy/test_backends.py @@ -141,3 +141,109 @@ def test_print_response_stream_flag_accepted(): """stream=False should not raise — it's accepted for API compatibility.""" agent = _make_agent() agent.print_response("hello", stream=False) # no error + + +# ── ClaudeBackend ───────────────────────────────────────────────────────── + + +def test_claude_available_false_when_no_key(): + """claude_available() returns False when ANTHROPIC_API_KEY is empty.""" + with patch("config.settings") as mock_settings: + mock_settings.anthropic_api_key = "" + from timmy.backends import claude_available + assert claude_available() is False + + +def test_claude_available_true_when_key_set(): + """claude_available() returns True when ANTHROPIC_API_KEY is set.""" + with patch("config.settings") as mock_settings: + mock_settings.anthropic_api_key = "sk-ant-test-key" + from timmy.backends import claude_available + assert claude_available() is True + + +def test_claude_backend_init_with_explicit_params(): + """ClaudeBackend can be created with explicit api_key and model.""" + from timmy.backends import ClaudeBackend + backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") + assert backend._api_key == "sk-ant-test" + assert "haiku" in backend._model + + +def test_claude_backend_init_resolves_short_names(): + """ClaudeBackend resolves short model names to full IDs.""" + from timmy.backends import ClaudeBackend, CLAUDE_MODELS + backend = ClaudeBackend(api_key="sk-test", model="sonnet") + assert backend._model == CLAUDE_MODELS["sonnet"] + + +def test_claude_backend_init_passes_through_full_model_id(): + """ClaudeBackend passes through full model IDs unchanged.""" + from timmy.backends import ClaudeBackend + backend = ClaudeBackend(api_key="sk-test", model="claude-haiku-4-5-20251001") + assert backend._model == "claude-haiku-4-5-20251001" + + +def test_claude_backend_run_no_key_returns_error(): + """run() gracefully returns error message when no API key.""" + from timmy.backends import ClaudeBackend + backend = ClaudeBackend(api_key="", model="haiku") + result = backend.run("hello") + assert "not configured" in result.content + + +def test_claude_backend_run_success(): + """run() returns content from the Anthropic API on success.""" + from timmy.backends import ClaudeBackend + + backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") + + mock_content = MagicMock() + mock_content.text = "Sir, affirmative. I am Timmy." + + mock_response = MagicMock() + mock_response.content = [mock_content] + + mock_client = MagicMock() + mock_client.messages.create.return_value = mock_response + + with patch.object(backend, "_get_client", return_value=mock_client): + result = backend.run("Who are you?") + + assert "Timmy" in result.content + assert len(backend._history) == 2 # user + assistant + + +def test_claude_backend_run_handles_api_error(): + """run() returns a graceful error when the API raises.""" + from timmy.backends import ClaudeBackend + + backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") + + mock_client = MagicMock() + mock_client.messages.create.side_effect = ConnectionError("network down") + + with patch.object(backend, "_get_client", return_value=mock_client): + result = backend.run("hello") + + assert "unavailable" in result.content + + +def test_claude_backend_history_rolling_window(): + """History should be capped at 20 entries (10 exchanges).""" + from timmy.backends import ClaudeBackend + + backend = ClaudeBackend(api_key="sk-ant-test", model="haiku") + + mock_content = MagicMock() + mock_content.text = "OK." + mock_response = MagicMock() + mock_response.content = [mock_content] + mock_client = MagicMock() + mock_client.messages.create.return_value = mock_response + + with patch.object(backend, "_get_client", return_value=mock_client): + for i in range(15): + backend.run(f"message {i}") + + assert len(backend._history) <= 20 diff --git a/tests/timmy/test_interview.py b/tests/timmy/test_interview.py new file mode 100644 index 00000000..1757ebea --- /dev/null +++ b/tests/timmy/test_interview.py @@ -0,0 +1,140 @@ +"""Tests for timmy.interview — structured interview runner.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from timmy.interview import ( + INTERVIEW_QUESTIONS, + InterviewEntry, + format_transcript, + run_interview, +) + + +# --------------------------------------------------------------------------- +# INTERVIEW_QUESTIONS sanity checks +# --------------------------------------------------------------------------- + + +def test_interview_questions_not_empty(): + """There should be at least one interview question defined.""" + assert len(INTERVIEW_QUESTIONS) > 0 + + +def test_interview_questions_have_required_keys(): + """Every question dict must have 'category' and 'question'.""" + for item in INTERVIEW_QUESTIONS: + assert "category" in item + assert "question" in item + assert isinstance(item["category"], str) + assert isinstance(item["question"], str) + + +# --------------------------------------------------------------------------- +# run_interview() +# --------------------------------------------------------------------------- + + +def test_run_interview_calls_chat_for_each_question(): + """run_interview should call the chat function once per question.""" + mock_chat = MagicMock(return_value="Answer.") + transcript = run_interview(mock_chat) + + assert mock_chat.call_count == len(INTERVIEW_QUESTIONS) + assert len(transcript) == len(INTERVIEW_QUESTIONS) + + +def test_run_interview_returns_interview_entries(): + """Each element in the transcript should be an InterviewEntry.""" + mock_chat = MagicMock(return_value="I am Timmy.") + transcript = run_interview(mock_chat) + + for entry in transcript: + assert isinstance(entry, InterviewEntry) + assert entry.answer == "I am Timmy." + + +def test_run_interview_with_custom_questions(): + """run_interview should accept custom question lists.""" + custom_qs = [ + {"category": "Test", "question": "What is 2+2?"}, + ] + mock_chat = MagicMock(return_value="Four.") + transcript = run_interview(mock_chat, questions=custom_qs) + + assert len(transcript) == 1 + assert transcript[0].category == "Test" + assert transcript[0].question == "What is 2+2?" + assert transcript[0].answer == "Four." + + +def test_run_interview_on_answer_callback(): + """on_answer callback should be invoked for each question.""" + callback = MagicMock() + mock_chat = MagicMock(return_value="OK.") + + run_interview(mock_chat, on_answer=callback) + + assert callback.call_count == len(INTERVIEW_QUESTIONS) + # Each call should receive an InterviewEntry + for call in callback.call_args_list: + entry = call[0][0] + assert isinstance(entry, InterviewEntry) + + +def test_run_interview_handles_chat_error(): + """If the chat function raises, the answer should contain the error.""" + def failing_chat(msg): + raise ConnectionError("Ollama offline") + + transcript = run_interview(failing_chat) + + assert len(transcript) == len(INTERVIEW_QUESTIONS) + for entry in transcript: + assert "Error" in entry.answer + assert "Ollama offline" in entry.answer + + +# --------------------------------------------------------------------------- +# format_transcript() +# --------------------------------------------------------------------------- + + +def test_format_transcript_empty(): + """Formatting an empty transcript should return a placeholder.""" + result = format_transcript([]) + assert "No interview data" in result + + +def test_format_transcript_includes_header(): + """Formatted transcript should include the header.""" + entries = [InterviewEntry(category="Identity", question="Who are you?", answer="Timmy.")] + result = format_transcript(entries) + assert "TIMMY INTERVIEW TRANSCRIPT" in result + + +def test_format_transcript_includes_questions_and_answers(): + """Formatted transcript should include Q and A.""" + entries = [ + InterviewEntry(category="Identity", question="Who are you?", answer="Timmy."), + InterviewEntry(category="Values", question="What matters?", answer="Sovereignty."), + ] + result = format_transcript(entries) + + assert "Q: Who are you?" in result + assert "A: Timmy." in result + assert "Q: What matters?" in result + assert "A: Sovereignty." in result + + +def test_format_transcript_groups_by_category(): + """Categories should appear as section headers.""" + entries = [ + InterviewEntry(category="Identity", question="Q1", answer="A1"), + InterviewEntry(category="Values", question="Q2", answer="A2"), + ] + result = format_transcript(entries) + + assert "--- Identity ---" in result + assert "--- Values ---" in result