diff --git a/.env.example b/.env.example
index a58439f6..47ca04d6 100644
--- a/.env.example
+++ b/.env.example
@@ -30,6 +30,15 @@
# 8b ~16 GB RAM | 70b ~140 GB RAM | 405b ~810 GB RAM
# AIRLLM_MODEL_SIZE=70b
+# ── Grok (xAI) — premium cloud augmentation ──────────────────────────────────
+# Enable Grok as an opt-in premium backend for frontier reasoning.
+# Local-first ethos is preserved — Grok only activates when explicitly enabled.
+# GROK_ENABLED=false
+# XAI_API_KEY=xai-...
+# GROK_DEFAULT_MODEL=grok-3-fast
+# GROK_MAX_SATS_PER_QUERY=200
+# GROK_FREE=false
+
# ── L402 Lightning secrets ───────────────────────────────────────────────────
# HMAC secret for invoice verification. MUST be changed in production.
# Generate with: python3 -c "import secrets; print(secrets.token_hex(32))"
diff --git a/docker-compose.yml b/docker-compose.yml
index 91180830..c19bd55b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -32,6 +32,10 @@ services:
DEBUG: "true"
# Point to host Ollama (Mac default). Override in .env if different.
OLLAMA_URL: "${OLLAMA_URL:-http://host.docker.internal:11434}"
+ # Grok (xAI) — opt-in premium cloud backend
+ GROK_ENABLED: "${GROK_ENABLED:-false}"
+ XAI_API_KEY: "${XAI_API_KEY:-}"
+ GROK_DEFAULT_MODEL: "${GROK_DEFAULT_MODEL:-grok-3-fast}"
extra_hosts:
- "host.docker.internal:host-gateway" # Linux compatibility
networks:
diff --git a/src/config.py b/src/config.py
index 231fb879..30d3c644 100644
--- a/src/config.py
+++ b/src/config.py
@@ -24,13 +24,22 @@ class Settings(BaseSettings):
# "airllm" — always use AirLLM (requires pip install ".[bigbrain]")
# "auto" — use AirLLM on Apple Silicon if airllm is installed,
# fall back to Ollama otherwise
- timmy_model_backend: Literal["ollama", "airllm", "auto"] = "ollama"
+ timmy_model_backend: Literal["ollama", "airllm", "grok", "auto"] = "ollama"
# AirLLM model size when backend is airllm or auto.
# Larger = smarter, but needs more RAM / disk.
# 8b ~16 GB | 70b ~140 GB | 405b ~810 GB
airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
+ # ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
+ # Grok is a premium augmentation layer — local-first ethos preserved.
+ # Only used when explicitly enabled and query complexity warrants it.
+ grok_enabled: bool = False
+ xai_api_key: str = ""
+ grok_default_model: str = "grok-3-fast"
+ grok_max_sats_per_query: int = 200
+ grok_free: bool = False # Skip Lightning invoice when user has own API key
+
# ── Spark Intelligence ────────────────────────────────────────────────
# Enable/disable the Spark cognitive layer.
# When enabled, Spark captures swarm events, runs EIDOS predictions,
diff --git a/src/dashboard/app.py b/src/dashboard/app.py
index 0447f723..b0bc7fa5 100644
--- a/src/dashboard/app.py
+++ b/src/dashboard/app.py
@@ -35,6 +35,7 @@ from dashboard.routes.scripture import router as scripture_router
from dashboard.routes.self_coding import router as self_coding_router
from dashboard.routes.self_coding import self_modify_router
from dashboard.routes.hands import router as hands_router
+from dashboard.routes.grok import router as grok_router
from infrastructure.router.api import router as cascade_router
logging.basicConfig(
@@ -206,6 +207,7 @@ app.include_router(work_orders_router)
app.include_router(tasks_router)
app.include_router(scripture_router)
app.include_router(hands_router)
+app.include_router(grok_router)
app.include_router(cascade_router)
diff --git a/src/dashboard/routes/grok.py b/src/dashboard/routes/grok.py
new file mode 100644
index 00000000..653c1337
--- /dev/null
+++ b/src/dashboard/routes/grok.py
@@ -0,0 +1,234 @@
+"""Grok (xAI) dashboard routes — premium cloud augmentation controls.
+
+Endpoints
+---------
+GET /grok/status — JSON status (API)
+POST /grok/toggle — Enable/disable Grok Mode (HTMX)
+POST /grok/chat — Direct Grok query (HTMX)
+GET /grok/stats — Usage statistics (JSON)
+"""
+
+import logging
+from pathlib import Path
+
+from fastapi import APIRouter, Form, Request
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.templating import Jinja2Templates
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/grok", tags=["grok"])
+templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))
+
+# In-memory toggle state (persists per process lifetime)
+_grok_mode_active: bool = False
+
+
+@router.get("/status")
+async def grok_status():
+ """Return Grok backend status as JSON."""
+ from timmy.backends import grok_available
+
+ status = {
+ "enabled": settings.grok_enabled,
+ "available": grok_available(),
+ "active": _grok_mode_active,
+ "model": settings.grok_default_model,
+ "free_mode": settings.grok_free,
+ "max_sats_per_query": settings.grok_max_sats_per_query,
+ "api_key_set": bool(settings.xai_api_key),
+ }
+
+ # Include usage stats if backend exists
+ try:
+ from timmy.backends import get_grok_backend
+ backend = get_grok_backend()
+ status["stats"] = {
+ "total_requests": backend.stats.total_requests,
+ "total_prompt_tokens": backend.stats.total_prompt_tokens,
+ "total_completion_tokens": backend.stats.total_completion_tokens,
+ "estimated_cost_sats": backend.stats.estimated_cost_sats,
+ "errors": backend.stats.errors,
+ }
+ except Exception:
+ status["stats"] = None
+
+ return status
+
+
+@router.post("/toggle")
+async def toggle_grok_mode(request: Request):
+ """Toggle Grok Mode on/off. Returns HTMX partial for the toggle card."""
+ global _grok_mode_active
+
+ from timmy.backends import grok_available
+
+ if not grok_available():
+ return HTMLResponse(
+ '
'
+ "Grok unavailable — set GROK_ENABLED=true and XAI_API_KEY in .env"
+ "
",
+ status_code=200,
+ )
+
+ _grok_mode_active = not _grok_mode_active
+ state = "ACTIVE" if _grok_mode_active else "STANDBY"
+
+ logger.info("Grok Mode toggled: %s", state)
+
+ # Log to Spark
+ try:
+ from spark.engine import spark_engine
+ import json
+
+ spark_engine.on_tool_executed(
+ agent_id="timmy",
+ tool_name="grok_mode_toggle",
+ success=True,
+ )
+ except Exception:
+ pass
+
+ return HTMLResponse(
+ _render_toggle_card(_grok_mode_active),
+ status_code=200,
+ )
+
+
+@router.post("/chat", response_class=HTMLResponse)
+async def grok_chat(request: Request, message: str = Form(...)):
+ """Send a message directly to Grok and return HTMX chat partial."""
+ from timmy.backends import grok_available, get_grok_backend
+ from dashboard.store import message_log
+ from datetime import datetime
+
+ timestamp = datetime.now().strftime("%H:%M:%S")
+
+ if not grok_available():
+ error = "Grok is not available. Set GROK_ENABLED=true and XAI_API_KEY."
+ message_log.append(role="user", content=f"[Grok] {message}", timestamp=timestamp)
+ message_log.append(role="error", content=error, timestamp=timestamp)
+ return templates.TemplateResponse(
+ request,
+ "partials/chat_message.html",
+ {
+ "user_message": f"[Grok] {message}",
+ "response": None,
+ "error": error,
+ "timestamp": timestamp,
+ },
+ )
+
+ backend = get_grok_backend()
+
+ # Generate invoice if monetization is active
+ invoice_note = ""
+ if not settings.grok_free:
+ try:
+ from lightning.factory import get_backend as get_ln_backend
+
+ ln = get_ln_backend()
+ sats = min(settings.grok_max_sats_per_query, 100)
+ inv = ln.create_invoice(sats, f"Grok: {message[:50]}")
+ invoice_note = f" | {sats} sats"
+ except Exception:
+ pass
+
+ try:
+ result = backend.run(message)
+ response_text = f"**[Grok]{invoice_note}:** {result.content}"
+ except Exception as exc:
+ response_text = None
+ error = f"Grok error: {exc}"
+
+ message_log.append(
+ role="user", content=f"[Ask Grok] {message}", timestamp=timestamp
+ )
+ if response_text:
+ message_log.append(role="agent", content=response_text, timestamp=timestamp)
+ return templates.TemplateResponse(
+ request,
+ "partials/chat_message.html",
+ {
+ "user_message": f"[Ask Grok] {message}",
+ "response": response_text,
+ "error": None,
+ "timestamp": timestamp,
+ },
+ )
+ else:
+ message_log.append(role="error", content=error, timestamp=timestamp)
+ return templates.TemplateResponse(
+ request,
+ "partials/chat_message.html",
+ {
+ "user_message": f"[Ask Grok] {message}",
+ "response": None,
+ "error": error,
+ "timestamp": timestamp,
+ },
+ )
+
+
+@router.get("/stats")
+async def grok_stats():
+ """Return detailed Grok usage statistics."""
+ try:
+ from timmy.backends import get_grok_backend
+
+ backend = get_grok_backend()
+ return {
+ "total_requests": backend.stats.total_requests,
+ "total_prompt_tokens": backend.stats.total_prompt_tokens,
+ "total_completion_tokens": backend.stats.total_completion_tokens,
+ "total_latency_ms": round(backend.stats.total_latency_ms, 2),
+ "avg_latency_ms": round(
+ backend.stats.total_latency_ms / max(backend.stats.total_requests, 1),
+ 2,
+ ),
+ "estimated_cost_sats": backend.stats.estimated_cost_sats,
+ "errors": backend.stats.errors,
+ "model": settings.grok_default_model,
+ }
+ except Exception as exc:
+ return {"error": str(exc)}
+
+
+def _render_toggle_card(active: bool) -> str:
+ """Render the Grok Mode toggle card HTML."""
+ color = "#00ff88" if active else "#666"
+ state = "ACTIVE" if active else "STANDBY"
+ glow = "0 0 20px rgba(0, 255, 136, 0.4)" if active else "none"
+
+ return f"""
+
+
+
+
+ GROK MODE: {state}
+
+
+ xAI frontier reasoning | {settings.grok_default_model}
+
+
+
+
+
+ """
+
+
+def is_grok_mode_active() -> bool:
+ """Check if Grok Mode is currently active (used by other modules)."""
+ return _grok_mode_active
diff --git a/src/dashboard/templates/base.html b/src/dashboard/templates/base.html
index 783bf8ae..a0973599 100644
--- a/src/dashboard/templates/base.html
+++ b/src/dashboard/templates/base.html
@@ -39,6 +39,7 @@
LEDGER
MEMORY
ROUTER
+ GROK
UPGRADES
SELF-CODING
HANDS
diff --git a/src/dashboard/templates/mission_control.html b/src/dashboard/templates/mission_control.html
index 2f9bbe80..c1bad5ed 100644
--- a/src/dashboard/templates/mission_control.html
+++ b/src/dashboard/templates/mission_control.html
@@ -59,10 +59,61 @@
+
+
+
+
+
+
+
+
+ GROK MODE: LOADING...
+
+
+ xAI frontier reasoning augmentation
+
+
+
+
+
+
+
+
+
@@ -61,4 +71,20 @@
}
}
scrollChat();
+
+ function askGrok() {
+ var input = document.getElementById('timmy-chat-input');
+ if (!input || !input.value.trim()) return;
+ var form = document.getElementById('timmy-chat-form');
+ // Temporarily redirect form to Grok endpoint
+ var originalAction = form.getAttribute('hx-post');
+ form.setAttribute('hx-post', '/grok/chat');
+ htmx.process(form);
+ htmx.trigger(form, 'submit');
+ // Restore original action after submission
+ setTimeout(function() {
+ form.setAttribute('hx-post', originalAction);
+ htmx.process(form);
+ }, 100);
+ }
diff --git a/src/infrastructure/router/cascade.py b/src/infrastructure/router/cascade.py
index 3118986c..17aa479e 100644
--- a/src/infrastructure/router/cascade.py
+++ b/src/infrastructure/router/cascade.py
@@ -220,10 +220,10 @@ class CascadeRouter:
except ImportError:
return False
- elif provider.type in ("openai", "anthropic"):
+ elif provider.type in ("openai", "anthropic", "grok"):
# Check if API key is set
return provider.api_key is not None and provider.api_key != ""
-
+
return True
async def complete(
@@ -337,6 +337,14 @@ class CascadeRouter:
temperature=temperature,
max_tokens=max_tokens,
)
+ elif provider.type == "grok":
+ result = await self._call_grok(
+ provider=provider,
+ messages=messages,
+ model=model or provider.get_default_model(),
+ temperature=temperature,
+ max_tokens=max_tokens,
+ )
else:
raise ValueError(f"Unknown provider type: {provider.type}")
@@ -455,7 +463,40 @@ class CascadeRouter:
"content": response.content[0].text,
"model": response.model,
}
-
+
+ async def _call_grok(
+ self,
+ provider: Provider,
+ messages: list[dict],
+ model: str,
+ temperature: float,
+ max_tokens: Optional[int],
+ ) -> dict:
+ """Call xAI Grok API via OpenAI-compatible SDK."""
+ import httpx
+ import openai
+
+ client = openai.AsyncOpenAI(
+ api_key=provider.api_key,
+ base_url=provider.base_url or "https://api.x.ai/v1",
+ timeout=httpx.Timeout(300.0),
+ )
+
+ kwargs = {
+ "model": model,
+ "messages": messages,
+ "temperature": temperature,
+ }
+ if max_tokens:
+ kwargs["max_tokens"] = max_tokens
+
+ response = await client.chat.completions.create(**kwargs)
+
+ return {
+ "content": response.choices[0].message.content,
+ "model": response.model,
+ }
+
def _record_success(self, provider: Provider, latency_ms: float) -> None:
"""Record a successful request."""
provider.metrics.total_requests += 1
diff --git a/src/timmy/agent.py b/src/timmy/agent.py
index c787adcf..0f52bc78 100644
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -20,12 +20,12 @@ from timmy.prompts import get_system_prompt
from timmy.tools import create_full_toolkit
if TYPE_CHECKING:
- from timmy.backends import TimmyAirLLMAgent
+ from timmy.backends import GrokBackend, TimmyAirLLMAgent
logger = logging.getLogger(__name__)
# Union type for callers that want to hint the return type.
-TimmyAgent = Union[Agent, "TimmyAirLLMAgent"]
+TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend"]
# Models known to be too small for reliable tool calling.
# These hallucinate tool calls as text, invoke tools randomly,
@@ -68,12 +68,12 @@ def _resolve_backend(requested: str | None) -> str:
if requested is not None:
return requested
- configured = settings.timmy_model_backend # "ollama" | "airllm" | "auto"
+ configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "auto"
if configured != "auto":
return configured
# "auto" path — lazy import to keep startup fast and tests clean.
- from timmy.backends import airllm_available, is_apple_silicon
+ from timmy.backends import airllm_available, grok_available, is_apple_silicon
if is_apple_silicon() and airllm_available():
return "airllm"
return "ollama"
@@ -97,6 +97,10 @@ def create_timmy(
resolved = _resolve_backend(backend)
size = model_size or settings.airllm_model_size
+ if resolved == "grok":
+ from timmy.backends import GrokBackend
+ return GrokBackend()
+
if resolved == "airllm":
from timmy.backends import TimmyAirLLMAgent
return TimmyAirLLMAgent(model_size=size)
diff --git a/src/timmy/backends.py b/src/timmy/backends.py
index ba94f304..e5745c43 100644
--- a/src/timmy/backends.py
+++ b/src/timmy/backends.py
@@ -1,20 +1,26 @@
-"""AirLLM backend — only imported when the airllm extra is installed.
+"""LLM backends — AirLLM (local big models) and Grok (xAI premium cloud).
-Provides TimmyAirLLMAgent: a drop-in replacement for an Agno Agent that
-exposes both the run(message, stream) → RunResult interface used by the
-dashboard and the print_response(message, stream) interface used by the CLI.
-On Apple Silicon (arm64 Darwin) the MLX backend is selected automatically;
-everywhere else AutoModel (PyTorch) is used.
+Provides drop-in replacements for the Agno Agent that expose the same
+run(message, stream) → RunResult interface used by the dashboard and the
+print_response(message, stream) interface used by the CLI.
-No cloud. No telemetry. Sats are sovereignty, boss.
+Backends:
+ - TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch)
+ - GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium)
+
+No cloud by default. No telemetry. Sats are sovereignty, boss.
"""
+import logging
import platform
-from dataclasses import dataclass
-from typing import Literal
+import time
+from dataclasses import dataclass, field
+from typing import Literal, Optional
from timmy.prompts import TIMMY_SYSTEM_PROMPT
+logger = logging.getLogger(__name__)
+
# HuggingFace model IDs for each supported size.
_AIRLLM_MODELS: dict[str, str] = {
"8b": "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -133,3 +139,281 @@ class TimmyAirLLMAgent:
Console().print(Markdown(text))
except ImportError:
print(text)
+
+
+# ── Grok (xAI) Backend ─────────────────────────────────────────────────────
+# Premium cloud augmentation — opt-in only, never the default path.
+
+# Available Grok models (configurable via GROK_DEFAULT_MODEL)
+GROK_MODELS: dict[str, str] = {
+ "grok-3-fast": "grok-3-fast",
+ "grok-3": "grok-3",
+ "grok-3-mini": "grok-3-mini",
+ "grok-3-mini-fast": "grok-3-mini-fast",
+}
+
+
+@dataclass
+class GrokUsageStats:
+ """Tracks Grok API usage for cost monitoring and Spark logging."""
+ total_requests: int = 0
+ total_prompt_tokens: int = 0
+ total_completion_tokens: int = 0
+ total_latency_ms: float = 0.0
+ errors: int = 0
+ last_request_at: Optional[float] = None
+
+ @property
+ def estimated_cost_sats(self) -> int:
+ """Rough cost estimate in sats based on token usage."""
+ # ~$5/1M input tokens, ~$15/1M output tokens for Grok
+ # At ~$100k/BTC, 1 sat ≈ $0.001
+ input_cost = (self.total_prompt_tokens / 1_000_000) * 5
+ output_cost = (self.total_completion_tokens / 1_000_000) * 15
+ total_usd = input_cost + output_cost
+ return int(total_usd / 0.001) # Convert to sats
+
+
+class GrokBackend:
+ """xAI Grok backend — premium cloud augmentation for frontier reasoning.
+
+ Uses the OpenAI-compatible SDK to connect to xAI's API.
+ Only activated when GROK_ENABLED=true and XAI_API_KEY is set.
+
+ Exposes the same interface as TimmyAirLLMAgent and Agno Agent:
+ run(message, stream) → RunResult [dashboard]
+ print_response(message, stream) → None [CLI]
+ health_check() → dict [monitoring]
+ """
+
+ def __init__(
+ self,
+ api_key: Optional[str] = None,
+ model: Optional[str] = None,
+ ) -> None:
+ from config import settings
+
+ self._api_key = api_key or settings.xai_api_key
+ self._model = model or settings.grok_default_model
+ self._history: list[dict[str, str]] = []
+ self.stats = GrokUsageStats()
+
+ if not self._api_key:
+ logger.warning(
+ "GrokBackend created without XAI_API_KEY — "
+ "calls will fail until key is configured"
+ )
+
+ def _get_client(self):
+ """Create OpenAI client configured for xAI endpoint."""
+ import httpx
+ from openai import OpenAI
+
+ return OpenAI(
+ api_key=self._api_key,
+ base_url="https://api.x.ai/v1",
+ timeout=httpx.Timeout(300.0),
+ )
+
+ async def _get_async_client(self):
+ """Create async OpenAI client configured for xAI endpoint."""
+ import httpx
+ from openai import AsyncOpenAI
+
+ return AsyncOpenAI(
+ api_key=self._api_key,
+ base_url="https://api.x.ai/v1",
+ timeout=httpx.Timeout(300.0),
+ )
+
+ # ── Public interface (mirrors Agno Agent) ─────────────────────────────
+
+ def run(self, message: str, *, stream: bool = False) -> RunResult:
+ """Synchronous inference via Grok API.
+
+ Args:
+ message: User prompt
+ stream: Accepted for API compat; Grok returns full response
+
+ Returns:
+ RunResult with response content
+ """
+ if not self._api_key:
+ return RunResult(
+ content="Grok is not configured. Set XAI_API_KEY to enable."
+ )
+
+ start = time.time()
+ messages = self._build_messages(message)
+
+ try:
+ client = self._get_client()
+ response = client.chat.completions.create(
+ model=self._model,
+ messages=messages,
+ temperature=0.7,
+ )
+
+ content = response.choices[0].message.content or ""
+ latency_ms = (time.time() - start) * 1000
+
+ # Track usage
+ self.stats.total_requests += 1
+ self.stats.total_latency_ms += latency_ms
+ self.stats.last_request_at = time.time()
+ if response.usage:
+ self.stats.total_prompt_tokens += response.usage.prompt_tokens
+ self.stats.total_completion_tokens += response.usage.completion_tokens
+
+ # Update conversation history
+ self._history.append({"role": "user", "content": message})
+ self._history.append({"role": "assistant", "content": content})
+ # Keep last 10 turns
+ if len(self._history) > 20:
+ self._history = self._history[-20:]
+
+ logger.info(
+ "Grok response: %d tokens in %.0fms (model=%s)",
+ response.usage.completion_tokens if response.usage else 0,
+ latency_ms,
+ self._model,
+ )
+
+ return RunResult(content=content)
+
+ except Exception as exc:
+ self.stats.errors += 1
+ logger.error("Grok API error: %s", exc)
+ return RunResult(
+ content=f"Grok temporarily unavailable: {exc}"
+ )
+
+ async def arun(self, message: str) -> RunResult:
+ """Async inference via Grok API — used by cascade router and tools."""
+ if not self._api_key:
+ return RunResult(
+ content="Grok is not configured. Set XAI_API_KEY to enable."
+ )
+
+ start = time.time()
+ messages = self._build_messages(message)
+
+ try:
+ client = await self._get_async_client()
+ response = await client.chat.completions.create(
+ model=self._model,
+ messages=messages,
+ temperature=0.7,
+ )
+
+ content = response.choices[0].message.content or ""
+ latency_ms = (time.time() - start) * 1000
+
+ # Track usage
+ self.stats.total_requests += 1
+ self.stats.total_latency_ms += latency_ms
+ self.stats.last_request_at = time.time()
+ if response.usage:
+ self.stats.total_prompt_tokens += response.usage.prompt_tokens
+ self.stats.total_completion_tokens += response.usage.completion_tokens
+
+ # Update conversation history
+ self._history.append({"role": "user", "content": message})
+ self._history.append({"role": "assistant", "content": content})
+ if len(self._history) > 20:
+ self._history = self._history[-20:]
+
+ logger.info(
+ "Grok async response: %d tokens in %.0fms (model=%s)",
+ response.usage.completion_tokens if response.usage else 0,
+ latency_ms,
+ self._model,
+ )
+
+ return RunResult(content=content)
+
+ except Exception as exc:
+ self.stats.errors += 1
+ logger.error("Grok async API error: %s", exc)
+ return RunResult(
+ content=f"Grok temporarily unavailable: {exc}"
+ )
+
+ def print_response(self, message: str, *, stream: bool = True) -> None:
+ """Run inference and render the response to stdout (CLI interface)."""
+ result = self.run(message, stream=stream)
+ try:
+ from rich.console import Console
+ from rich.markdown import Markdown
+ Console().print(Markdown(result.content))
+ except ImportError:
+ print(result.content)
+
+ def health_check(self) -> dict:
+ """Check Grok API connectivity and return status."""
+ if not self._api_key:
+ return {
+ "ok": False,
+ "error": "XAI_API_KEY not configured",
+ "backend": "grok",
+ "model": self._model,
+ }
+
+ try:
+ client = self._get_client()
+ # Lightweight check — list models
+ client.models.list()
+ return {
+ "ok": True,
+ "error": None,
+ "backend": "grok",
+ "model": self._model,
+ "stats": {
+ "total_requests": self.stats.total_requests,
+ "estimated_cost_sats": self.stats.estimated_cost_sats,
+ },
+ }
+ except Exception as exc:
+ return {
+ "ok": False,
+ "error": str(exc),
+ "backend": "grok",
+ "model": self._model,
+ }
+
+ @property
+ def estimated_cost(self) -> int:
+ """Return estimated cost in sats for all requests so far."""
+ return self.stats.estimated_cost_sats
+
+ # ── Private helpers ───────────────────────────────────────────────────
+
+ def _build_messages(self, message: str) -> list[dict[str, str]]:
+ """Build the messages array for the API call."""
+ messages = [{"role": "system", "content": TIMMY_SYSTEM_PROMPT}]
+ # Include conversation history for context
+ messages.extend(self._history[-10:])
+ messages.append({"role": "user", "content": message})
+ return messages
+
+
+# ── Module-level Grok singleton ─────────────────────────────────────────────
+
+_grok_backend: Optional[GrokBackend] = None
+
+
+def get_grok_backend() -> GrokBackend:
+ """Get or create the Grok backend singleton."""
+ global _grok_backend
+ if _grok_backend is None:
+ _grok_backend = GrokBackend()
+ return _grok_backend
+
+
+def grok_available() -> bool:
+ """Return True when Grok is enabled and API key is configured."""
+ try:
+ from config import settings
+ return settings.grok_enabled and bool(settings.xai_api_key)
+ except Exception:
+ return False
diff --git a/src/timmy/tools.py b/src/timmy/tools.py
index cfde2e01..3eb7f6e1 100644
--- a/src/timmy/tools.py
+++ b/src/timmy/tools.py
@@ -278,39 +278,104 @@ def create_devops_tools(base_dir: str | Path | None = None):
return toolkit
+def consult_grok(query: str) -> str:
+ """Consult Grok (xAI) for frontier reasoning on complex questions.
+
+ Use this tool when a question requires advanced reasoning, real-time
+ knowledge, or capabilities beyond the local model. Grok is a premium
+ cloud backend — use sparingly and only for high-complexity queries.
+
+ Args:
+ query: The question or reasoning task to send to Grok.
+
+ Returns:
+ Grok's response text, or an error/status message.
+ """
+ from config import settings
+ from timmy.backends import grok_available, get_grok_backend
+
+ if not grok_available():
+ return (
+ "Grok is not available. Enable with GROK_ENABLED=true "
+ "and set XAI_API_KEY in your .env file."
+ )
+
+ backend = get_grok_backend()
+
+ # Log to Spark if available
+ try:
+ from spark.engine import spark_engine
+ spark_engine.on_tool_executed(
+ agent_id="timmy",
+ tool_name="consult_grok",
+ success=True,
+ )
+ except Exception:
+ pass
+
+ # Generate Lightning invoice for monetization (unless free mode)
+ invoice_info = ""
+ if not settings.grok_free:
+ try:
+ from lightning.factory import get_backend as get_ln_backend
+ ln = get_ln_backend()
+ sats = min(settings.grok_max_sats_per_query, 100)
+ inv = ln.create_invoice(sats, f"Grok query: {query[:50]}")
+ invoice_info = f"\n[Lightning invoice: {sats} sats — {inv.payment_request[:40]}...]"
+ except Exception:
+ pass
+
+ result = backend.run(query)
+
+ response = result.content
+ if invoice_info:
+ response += invoice_info
+
+ return response
+
+
def create_full_toolkit(base_dir: str | Path | None = None):
"""Create a full toolkit with all available tools (for Timmy).
-
+
Includes: web search, file read/write, shell commands, python execution,
- and memory search for contextual recall.
+ memory search for contextual recall, and Grok consultation.
"""
if not _AGNO_TOOLS_AVAILABLE:
# Return None when tools aren't available (tests)
return None
toolkit = Toolkit(name="full")
-
+
# Web search
search_tools = DuckDuckGoTools()
toolkit.register(search_tools.web_search, name="web_search")
-
+
# Python execution
python_tools = PythonTools()
toolkit.register(python_tools.run_python_code, name="python")
-
+
# Shell commands
shell_tools = ShellTools()
toolkit.register(shell_tools.run_shell_command, name="shell")
-
+
# File operations
base_path = Path(base_dir) if base_dir else Path.cwd()
file_tools = FileTools(base_dir=base_path)
toolkit.register(file_tools.read_file, name="read_file")
toolkit.register(file_tools.save_file, name="write_file")
toolkit.register(file_tools.list_files, name="list_files")
-
+
# Calculator — exact arithmetic (never let the LLM guess)
toolkit.register(calculator, name="calculator")
+ # Grok consultation — premium frontier reasoning (opt-in)
+ try:
+ from timmy.backends import grok_available
+ if grok_available():
+ toolkit.register(consult_grok, name="consult_grok")
+ logger.info("Grok consultation tool registered")
+ except Exception:
+ logger.debug("Grok tool not available")
+
# Memory search - semantic recall
try:
from timmy.semantic_memory import memory_search
@@ -407,6 +472,11 @@ def get_all_available_tools() -> dict[str, dict]:
"description": "Evaluate mathematical expressions with exact results",
"available_in": ["timmy"],
},
+ "consult_grok": {
+ "name": "Consult Grok",
+ "description": "Premium frontier reasoning via xAI Grok (opt-in, Lightning-payable)",
+ "available_in": ["timmy"],
+ },
}
# ── Git tools ─────────────────────────────────────────────────────────────
diff --git a/tests/timmy/test_grok_backend.py b/tests/timmy/test_grok_backend.py
new file mode 100644
index 00000000..688ded4a
--- /dev/null
+++ b/tests/timmy/test_grok_backend.py
@@ -0,0 +1,284 @@
+"""Tests for GrokBackend in src/timmy/backends.py and Grok dashboard routes."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ── grok_available ───────────────────────────────────────────────────────────
+
+def test_grok_available_false_when_disabled():
+ """Grok not available when GROK_ENABLED is false."""
+ with patch("config.settings") as mock_settings:
+ mock_settings.grok_enabled = False
+ mock_settings.xai_api_key = "xai-test-key"
+ from timmy.backends import grok_available
+ assert grok_available() is False
+
+
+def test_grok_available_false_when_no_key():
+ """Grok not available when XAI_API_KEY is empty."""
+ with patch("config.settings") as mock_settings:
+ mock_settings.grok_enabled = True
+ mock_settings.xai_api_key = ""
+ from timmy.backends import grok_available
+ assert grok_available() is False
+
+
+def test_grok_available_true_when_enabled_and_key_set():
+ """Grok available when both enabled and key are set."""
+ with patch("config.settings") as mock_settings:
+ mock_settings.grok_enabled = True
+ mock_settings.xai_api_key = "xai-test-key"
+ from timmy.backends import grok_available
+ assert grok_available() is True
+
+
+# ── GrokBackend construction ────────────────────────────────────────────────
+
+def test_grok_backend_init_with_explicit_params():
+ """GrokBackend can be created with explicit api_key and model."""
+ from timmy.backends import GrokBackend
+ backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+ assert backend._api_key == "xai-test"
+ assert backend._model == "grok-3-fast"
+ assert backend.stats.total_requests == 0
+
+
+def test_grok_backend_init_from_settings():
+ """GrokBackend reads from config.settings when no params given."""
+ with patch("config.settings") as mock_settings:
+ mock_settings.xai_api_key = "xai-from-env"
+ mock_settings.grok_default_model = "grok-3"
+ from timmy.backends import GrokBackend
+ backend = GrokBackend()
+ assert backend._api_key == "xai-from-env"
+ assert backend._model == "grok-3"
+
+
+def test_grok_backend_run_no_key_returns_error():
+ """run() gracefully returns error message when no API key."""
+ from timmy.backends import GrokBackend
+ backend = GrokBackend(api_key="", model="grok-3-fast")
+ result = backend.run("hello")
+ assert "not configured" in result.content
+
+
+def test_grok_backend_run_success():
+ """run() returns content from the API on success."""
+ from timmy.backends import GrokBackend
+
+ backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+
+ mock_response = MagicMock()
+ mock_response.choices = [MagicMock()]
+ mock_response.choices[0].message.content = "Grok says hello"
+ mock_response.usage = MagicMock()
+ mock_response.usage.prompt_tokens = 10
+ mock_response.usage.completion_tokens = 5
+ mock_response.model = "grok-3-fast"
+
+ mock_client = MagicMock()
+ mock_client.chat.completions.create.return_value = mock_response
+
+ with patch.object(backend, "_get_client", return_value=mock_client):
+ result = backend.run("hello")
+
+ assert result.content == "Grok says hello"
+ assert backend.stats.total_requests == 1
+ assert backend.stats.total_prompt_tokens == 10
+ assert backend.stats.total_completion_tokens == 5
+
+
+def test_grok_backend_run_api_error():
+ """run() returns error message on API failure."""
+ from timmy.backends import GrokBackend
+
+ backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+
+ mock_client = MagicMock()
+ mock_client.chat.completions.create.side_effect = Exception("API timeout")
+
+ with patch.object(backend, "_get_client", return_value=mock_client):
+ result = backend.run("hello")
+
+ assert "unavailable" in result.content
+ assert backend.stats.errors == 1
+
+
+def test_grok_backend_history_management():
+ """GrokBackend maintains conversation history."""
+ from timmy.backends import GrokBackend
+
+ backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+
+ mock_response = MagicMock()
+ mock_response.choices = [MagicMock()]
+ mock_response.choices[0].message.content = "response"
+ mock_response.usage = MagicMock()
+ mock_response.usage.prompt_tokens = 10
+ mock_response.usage.completion_tokens = 5
+
+ mock_client = MagicMock()
+ mock_client.chat.completions.create.return_value = mock_response
+
+ with patch.object(backend, "_get_client", return_value=mock_client):
+ backend.run("first message")
+ backend.run("second message")
+
+ assert len(backend._history) == 4 # 2 user + 2 assistant
+ assert backend._history[0]["role"] == "user"
+ assert backend._history[1]["role"] == "assistant"
+
+
+def test_grok_backend_health_check_no_key():
+ """health_check() returns not-ok when no API key."""
+ from timmy.backends import GrokBackend
+
+ backend = GrokBackend(api_key="", model="grok-3-fast")
+ health = backend.health_check()
+ assert health["ok"] is False
+ assert "not configured" in health["error"]
+
+
+def test_grok_backend_health_check_success():
+ """health_check() returns ok when API key is set and models endpoint works."""
+ from timmy.backends import GrokBackend
+
+ backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+
+ mock_client = MagicMock()
+ mock_client.models.list.return_value = []
+
+ with patch.object(backend, "_get_client", return_value=mock_client):
+ health = backend.health_check()
+
+ assert health["ok"] is True
+ assert health["backend"] == "grok"
+
+
+def test_grok_backend_estimated_cost():
+ """estimated_cost property calculates sats from token usage."""
+ from timmy.backends import GrokUsageStats
+
+ stats = GrokUsageStats(
+ total_prompt_tokens=1_000_000,
+ total_completion_tokens=500_000,
+ )
+ # Input: 1M tokens * $5/1M = $5
+ # Output: 500K tokens * $15/1M = $7.50
+ # Total: $12.50 / $0.001 = 12,500 sats
+ assert stats.estimated_cost_sats == 12500
+
+
+def test_grok_backend_build_messages():
+ """_build_messages includes system prompt and history."""
+ from timmy.backends import GrokBackend
+
+ backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+ backend._history = [
+ {"role": "user", "content": "previous"},
+ {"role": "assistant", "content": "yes"},
+ ]
+
+ messages = backend._build_messages("new question")
+ assert messages[0]["role"] == "system"
+ assert messages[1]["role"] == "user"
+ assert messages[1]["content"] == "previous"
+ assert messages[-1]["role"] == "user"
+ assert messages[-1]["content"] == "new question"
+
+
+# ── get_grok_backend singleton ──────────────────────────────────────────────
+
+def test_get_grok_backend_returns_singleton():
+ """get_grok_backend returns the same instance on repeated calls."""
+ import timmy.backends as backends_mod
+
+ # Reset singleton
+ backends_mod._grok_backend = None
+
+ b1 = backends_mod.get_grok_backend()
+ b2 = backends_mod.get_grok_backend()
+ assert b1 is b2
+
+ # Cleanup
+ backends_mod._grok_backend = None
+
+
+# ── GROK_MODELS constant ───────────────────────────────────────────────────
+
+def test_grok_models_dict_has_expected_entries():
+ from timmy.backends import GROK_MODELS
+ assert "grok-3-fast" in GROK_MODELS
+ assert "grok-3" in GROK_MODELS
+
+
+# ── consult_grok tool ──────────────────────────────────────────────────────
+
+def test_consult_grok_returns_unavailable_when_disabled():
+ """consult_grok tool returns error when Grok is not available."""
+ with patch("timmy.backends.grok_available", return_value=False):
+ from timmy.tools import consult_grok
+ result = consult_grok("test query")
+ assert "not available" in result
+
+
+def test_consult_grok_calls_backend_when_available():
+ """consult_grok tool calls the Grok backend when available."""
+ from timmy.backends import RunResult
+
+ mock_backend = MagicMock()
+ mock_backend.run.return_value = RunResult(content="Grok answer")
+ mock_backend.stats = MagicMock()
+ mock_backend.stats.total_latency_ms = 100
+
+ with patch("timmy.backends.grok_available", return_value=True), \
+ patch("timmy.backends.get_grok_backend", return_value=mock_backend), \
+ patch("config.settings") as mock_settings:
+ mock_settings.grok_free = True
+ mock_settings.grok_enabled = True
+ mock_settings.xai_api_key = "xai-test"
+ from timmy.tools import consult_grok
+ result = consult_grok("complex question")
+
+ assert "Grok answer" in result
+ mock_backend.run.assert_called_once_with("complex question")
+
+
+# ── Grok dashboard route tests ─────────────────────────────────────────────
+
+def test_grok_status_endpoint(client):
+ """GET /grok/status returns JSON with Grok configuration."""
+ response = client.get("/grok/status")
+ assert response.status_code == 200
+ data = response.json()
+ assert "enabled" in data
+ assert "available" in data
+ assert "model" in data
+ assert "api_key_set" in data
+
+
+def test_grok_toggle_returns_html(client):
+ """POST /grok/toggle returns HTML response."""
+ response = client.post("/grok/toggle")
+ assert response.status_code == 200
+
+
+def test_grok_stats_endpoint(client):
+ """GET /grok/stats returns usage statistics."""
+ response = client.get("/grok/stats")
+ assert response.status_code == 200
+ data = response.json()
+ assert "total_requests" in data or "error" in data
+
+
+def test_grok_chat_without_key(client):
+ """POST /grok/chat returns error when Grok is not available."""
+ response = client.post(
+ "/grok/chat",
+ data={"message": "test query"},
+ )
+ assert response.status_code == 200
+ # Should contain error since GROK_ENABLED is false in test mode
+ assert "not available" in response.text.lower() or "error" in response.text.lower() or "grok" in response.text.lower()