Merge pull request #62 from AlexanderWhitestone/claude/grok-backend-monetization-iVc5i

2026-02-26 20:26:15 -05:00
parent bb31f322e5 17059bc0ea
commit e4d5ec5ed4
13 changed files with 1076 additions and 27 deletions
--- a/.env.example
+++ b/.env.example
@@ -30,6 +30,15 @@
 # 8b  ~16 GB RAM  |  70b  ~140 GB RAM  |  405b  ~810 GB RAM
 # AIRLLM_MODEL_SIZE=70b

+# ── Grok (xAI) — premium cloud augmentation ──────────────────────────────────
+# Enable Grok as an opt-in premium backend for frontier reasoning.
+# Local-first ethos is preserved — Grok only activates when explicitly enabled.
+# GROK_ENABLED=false
+# XAI_API_KEY=xai-...
+# GROK_DEFAULT_MODEL=grok-3-fast
+# GROK_MAX_SATS_PER_QUERY=200
+# GROK_FREE=false
+
 # ── L402 Lightning secrets ───────────────────────────────────────────────────
 # HMAC secret for invoice verification.  MUST be changed in production.
 # Generate with: python3 -c "import secrets; print(secrets.token_hex(32))"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -32,6 +32,10 @@ services:
      DEBUG: "true"
      # Point to host Ollama (Mac default).  Override in .env if different.
      OLLAMA_URL: "${OLLAMA_URL:-http://host.docker.internal:11434}"
+      # Grok (xAI) — opt-in premium cloud backend
+      GROK_ENABLED: "${GROK_ENABLED:-false}"
+      XAI_API_KEY: "${XAI_API_KEY:-}"
+      GROK_DEFAULT_MODEL: "${GROK_DEFAULT_MODEL:-grok-3-fast}"
    extra_hosts:
      - "host.docker.internal:host-gateway"  # Linux compatibility
    networks:
--- a/src/config.py
+++ b/src/config.py
@@ -24,13 +24,22 @@ class Settings(BaseSettings):
    # "airllm"  — always use AirLLM (requires pip install ".[bigbrain]")
    # "auto"    — use AirLLM on Apple Silicon if airllm is installed,
    #             fall back to Ollama otherwise
-    timmy_model_backend: Literal["ollama", "airllm", "auto"] = "ollama"
+    timmy_model_backend: Literal["ollama", "airllm", "grok", "auto"] = "ollama"

    # AirLLM model size when backend is airllm or auto.
    # Larger = smarter, but needs more RAM / disk.
    # 8b  ~16 GB  |  70b  ~140 GB  |  405b  ~810 GB
    airllm_model_size: Literal["8b", "70b", "405b"] = "70b"

+    # ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
+    # Grok is a premium augmentation layer — local-first ethos preserved.
+    # Only used when explicitly enabled and query complexity warrants it.
+    grok_enabled: bool = False
+    xai_api_key: str = ""
+    grok_default_model: str = "grok-3-fast"
+    grok_max_sats_per_query: int = 200
+    grok_free: bool = False  # Skip Lightning invoice when user has own API key
+
    # ── Spark Intelligence ────────────────────────────────────────────────
    # Enable/disable the Spark cognitive layer.
    # When enabled, Spark captures swarm events, runs EIDOS predictions,
--- a/src/dashboard/app.py
+++ b/src/dashboard/app.py
@@ -35,6 +35,7 @@ from dashboard.routes.scripture import router as scripture_router
 from dashboard.routes.self_coding import router as self_coding_router
 from dashboard.routes.self_coding import self_modify_router
 from dashboard.routes.hands import router as hands_router
+from dashboard.routes.grok import router as grok_router
 from infrastructure.router.api import router as cascade_router

 logging.basicConfig(
@@ -206,6 +207,7 @@ app.include_router(work_orders_router)
 app.include_router(tasks_router)
 app.include_router(scripture_router)
 app.include_router(hands_router)
+app.include_router(grok_router)
 app.include_router(cascade_router)


--- a/src/dashboard/routes/grok.py
+++ b/src/dashboard/routes/grok.py
@@ -0,0 +1,234 @@
+"""Grok (xAI) dashboard routes — premium cloud augmentation controls.
+
+Endpoints
+---------
+GET  /grok/status     — JSON status (API)
+POST /grok/toggle     — Enable/disable Grok Mode (HTMX)
+POST /grok/chat       — Direct Grok query (HTMX)
+GET  /grok/stats      — Usage statistics (JSON)
+"""
+
+import logging
+from pathlib import Path
+
+from fastapi import APIRouter, Form, Request
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.templating import Jinja2Templates
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/grok", tags=["grok"])
+templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))
+
+# In-memory toggle state (persists per process lifetime)
+_grok_mode_active: bool = False
+
+
+@router.get("/status")
+async def grok_status():
+    """Return Grok backend status as JSON."""
+    from timmy.backends import grok_available
+
+    status = {
+        "enabled": settings.grok_enabled,
+        "available": grok_available(),
+        "active": _grok_mode_active,
+        "model": settings.grok_default_model,
+        "free_mode": settings.grok_free,
+        "max_sats_per_query": settings.grok_max_sats_per_query,
+        "api_key_set": bool(settings.xai_api_key),
+    }
+
+    # Include usage stats if backend exists
+    try:
+        from timmy.backends import get_grok_backend
+        backend = get_grok_backend()
+        status["stats"] = {
+            "total_requests": backend.stats.total_requests,
+            "total_prompt_tokens": backend.stats.total_prompt_tokens,
+            "total_completion_tokens": backend.stats.total_completion_tokens,
+            "estimated_cost_sats": backend.stats.estimated_cost_sats,
+            "errors": backend.stats.errors,
+        }
+    except Exception:
+        status["stats"] = None
+
+    return status
+
+
+@router.post("/toggle")
+async def toggle_grok_mode(request: Request):
+    """Toggle Grok Mode on/off. Returns HTMX partial for the toggle card."""
+    global _grok_mode_active
+
+    from timmy.backends import grok_available
+
+    if not grok_available():
+        return HTMLResponse(
+            '<div class="alert" style="color: var(--danger);">'
+            "Grok unavailable — set GROK_ENABLED=true and XAI_API_KEY in .env"
+            "</div>",
+            status_code=200,
+        )
+
+    _grok_mode_active = not _grok_mode_active
+    state = "ACTIVE" if _grok_mode_active else "STANDBY"
+
+    logger.info("Grok Mode toggled: %s", state)
+
+    # Log to Spark
+    try:
+        from spark.engine import spark_engine
+        import json
+
+        spark_engine.on_tool_executed(
+            agent_id="timmy",
+            tool_name="grok_mode_toggle",
+            success=True,
+        )
+    except Exception:
+        pass
+
+    return HTMLResponse(
+        _render_toggle_card(_grok_mode_active),
+        status_code=200,
+    )
+
+
+@router.post("/chat", response_class=HTMLResponse)
+async def grok_chat(request: Request, message: str = Form(...)):
+    """Send a message directly to Grok and return HTMX chat partial."""
+    from timmy.backends import grok_available, get_grok_backend
+    from dashboard.store import message_log
+    from datetime import datetime
+
+    timestamp = datetime.now().strftime("%H:%M:%S")
+
+    if not grok_available():
+        error = "Grok is not available. Set GROK_ENABLED=true and XAI_API_KEY."
+        message_log.append(role="user", content=f"[Grok] {message}", timestamp=timestamp)
+        message_log.append(role="error", content=error, timestamp=timestamp)
+        return templates.TemplateResponse(
+            request,
+            "partials/chat_message.html",
+            {
+                "user_message": f"[Grok] {message}",
+                "response": None,
+                "error": error,
+                "timestamp": timestamp,
+            },
+        )
+
+    backend = get_grok_backend()
+
+    # Generate invoice if monetization is active
+    invoice_note = ""
+    if not settings.grok_free:
+        try:
+            from lightning.factory import get_backend as get_ln_backend
+
+            ln = get_ln_backend()
+            sats = min(settings.grok_max_sats_per_query, 100)
+            inv = ln.create_invoice(sats, f"Grok: {message[:50]}")
+            invoice_note = f" | {sats} sats"
+        except Exception:
+            pass
+
+    try:
+        result = backend.run(message)
+        response_text = f"**[Grok]{invoice_note}:** {result.content}"
+    except Exception as exc:
+        response_text = None
+        error = f"Grok error: {exc}"
+
+    message_log.append(
+        role="user", content=f"[Ask Grok] {message}", timestamp=timestamp
+    )
+    if response_text:
+        message_log.append(role="agent", content=response_text, timestamp=timestamp)
+        return templates.TemplateResponse(
+            request,
+            "partials/chat_message.html",
+            {
+                "user_message": f"[Ask Grok] {message}",
+                "response": response_text,
+                "error": None,
+                "timestamp": timestamp,
+            },
+        )
+    else:
+        message_log.append(role="error", content=error, timestamp=timestamp)
+        return templates.TemplateResponse(
+            request,
+            "partials/chat_message.html",
+            {
+                "user_message": f"[Ask Grok] {message}",
+                "response": None,
+                "error": error,
+                "timestamp": timestamp,
+            },
+        )
+
+
+@router.get("/stats")
+async def grok_stats():
+    """Return detailed Grok usage statistics."""
+    try:
+        from timmy.backends import get_grok_backend
+
+        backend = get_grok_backend()
+        return {
+            "total_requests": backend.stats.total_requests,
+            "total_prompt_tokens": backend.stats.total_prompt_tokens,
+            "total_completion_tokens": backend.stats.total_completion_tokens,
+            "total_latency_ms": round(backend.stats.total_latency_ms, 2),
+            "avg_latency_ms": round(
+                backend.stats.total_latency_ms / max(backend.stats.total_requests, 1),
+                2,
+            ),
+            "estimated_cost_sats": backend.stats.estimated_cost_sats,
+            "errors": backend.stats.errors,
+            "model": settings.grok_default_model,
+        }
+    except Exception as exc:
+        return {"error": str(exc)}
+
+
+def _render_toggle_card(active: bool) -> str:
+    """Render the Grok Mode toggle card HTML."""
+    color = "#00ff88" if active else "#666"
+    state = "ACTIVE" if active else "STANDBY"
+    glow = "0 0 20px rgba(0, 255, 136, 0.4)" if active else "none"
+
+    return f"""
+    <div id="grok-toggle-card"
+         style="border: 2px solid {color}; border-radius: 12px; padding: 16px;
+                background: var(--bg-secondary); box-shadow: {glow};
+                transition: all 0.3s ease;">
+        <div style="display: flex; justify-content: space-between; align-items: center;">
+            <div>
+                <div style="font-weight: 700; font-size: 1.1rem; color: {color};">
+                    GROK MODE: {state}
+                </div>
+                <div style="font-size: 0.8rem; color: var(--text-muted); margin-top: 4px;">
+                    xAI frontier reasoning | {settings.grok_default_model}
+                </div>
+            </div>
+            <button hx-post="/grok/toggle"
+                    hx-target="#grok-toggle-card"
+                    hx-swap="outerHTML"
+                    style="background: {color}; color: #000; border: none;
+                           border-radius: 8px; padding: 8px 20px; cursor: pointer;
+                           font-weight: 700; font-family: inherit;">
+                {'DEACTIVATE' if active else 'ACTIVATE'}
+            </button>
+        </div>
+    </div>
+    """
+
+
+def is_grok_mode_active() -> bool:
+    """Check if Grok Mode is currently active (used by other modules)."""
+    return _grok_mode_active
--- a/src/dashboard/templates/base.html
+++ b/src/dashboard/templates/base.html
@@ -39,6 +39,7 @@
      <a href="/lightning/ledger" class="mc-test-link">LEDGER</a>
      <a href="/memory" class="mc-test-link">MEMORY</a>
      <a href="/router/status" class="mc-test-link">ROUTER</a>
+      <a href="/grok/status" class="mc-test-link" style="color:#00ff88;">GROK</a>
      <a href="/self-modify/queue" class="mc-test-link">UPGRADES</a>
      <a href="/self-coding" class="mc-test-link">SELF-CODING</a>
      <a href="/hands" class="mc-test-link">HANDS</a>
--- a/src/dashboard/templates/mission_control.html
+++ b/src/dashboard/templates/mission_control.html
@@ -59,10 +59,61 @@
    </div>
 </div>

+<!-- Grok Mode Toggle -->
+<div class="card" style="margin-top: 24px;">
+    <div class="card-header">
+        <h2 class="card-title">Grok Mode</h2>
+        <div>
+            <span class="badge" id="grok-badge" style="background: #666;">STANDBY</span>
+        </div>
+    </div>
+    <div id="grok-toggle-card"
+         hx-get="/grok/status"
+         hx-trigger="load"
+         hx-target="#grok-toggle-card"
+         hx-swap="innerHTML">
+        <div style="border: 2px solid #666; border-radius: 12px; padding: 16px;
+                    background: var(--bg-secondary);">
+            <div style="display: flex; justify-content: space-between; align-items: center;">
+                <div>
+                    <div style="font-weight: 700; font-size: 1.1rem; color: #666;">
+                        GROK MODE: LOADING...
+                    </div>
+                    <div style="font-size: 0.8rem; color: var(--text-muted); margin-top: 4px;">
+                        xAI frontier reasoning augmentation
+                    </div>
+                </div>
+                <button hx-post="/grok/toggle"
+                        hx-target="#grok-toggle-card"
+                        hx-swap="outerHTML"
+                        style="background: #666; color: #000; border: none;
+                               border-radius: 8px; padding: 8px 20px; cursor: pointer;
+                               font-weight: 700; font-family: inherit;">
+                    ACTIVATE
+                </button>
+            </div>
+        </div>
+    </div>
+    <div class="grid grid-3" style="margin-top: 12px;">
+        <div class="stat">
+            <div class="stat-value" id="grok-requests">0</div>
+            <div class="stat-label">Grok Queries</div>
+        </div>
+        <div class="stat">
+            <div class="stat-value" id="grok-tokens">0</div>
+            <div class="stat-label">Tokens Used</div>
+        </div>
+        <div class="stat">
+            <div class="stat-value" id="grok-cost">0</div>
+            <div class="stat-label">Est. Cost (sats)</div>
+        </div>
+    </div>
+</div>
+
 <!-- Heartbeat Monitor -->
 <div class="card" style="margin-top: 24px;">
    <div class="card-header">
-        <h2 class="card-title">💓 Heartbeat Monitor</h2>
+        <h2 class="card-title">Heartbeat Monitor</h2>
        <div>
            <span class="badge" id="heartbeat-status">Checking...</span>
        </div>
@@ -318,11 +369,40 @@ async function loadChatHistory() {
    }
 }

+// Load Grok stats
+async function loadGrokStats() {
+    try {
+        const response = await fetch('/grok/status');
+        const data = await response.json();
+
+        if (data.stats) {
+            document.getElementById('grok-requests').textContent = data.stats.total_requests || 0;
+            document.getElementById('grok-tokens').textContent =
+                (data.stats.total_prompt_tokens || 0) + (data.stats.total_completion_tokens || 0);
+            document.getElementById('grok-cost').textContent = data.stats.estimated_cost_sats || 0;
+        }
+
+        const badge = document.getElementById('grok-badge');
+        if (data.active) {
+            badge.textContent = 'ACTIVE';
+            badge.style.background = '#00ff88';
+            badge.style.color = '#000';
+        } else {
+            badge.textContent = 'STANDBY';
+            badge.style.background = '#666';
+            badge.style.color = '#fff';
+        }
+    } catch (error) {
+        // Grok endpoint may not respond — silent fallback
+    }
+}
+
 // Initial load
 loadSovereignty();
 loadHealth();
 loadSwarmStats();
 loadLightningStats();
+loadGrokStats();
 loadChatHistory();

 // Periodic updates
@@ -330,5 +410,6 @@ setInterval(loadSovereignty, 30000);  // Every 30s
 setInterval(loadHealth, 10000);       // Every 10s
 setInterval(loadSwarmStats, 5000);    // Every 5s
 setInterval(updateHeartbeat, 5000);   // Heartbeat every 5s
+setInterval(loadGrokStats, 10000);   // Grok stats every 10s
 </script>
 {% endblock %}
--- a/src/dashboard/templates/partials/timmy_panel.html
+++ b/src/dashboard/templates/partials/timmy_panel.html
@@ -30,7 +30,8 @@
            hx-disabled-elt="find button"
            hx-on::after-settle="scrollChat()"
            hx-on::after-request="if(event.detail.successful){this.querySelector('[name=message]').value='';}"
-            class="d-flex gap-2">
+            class="d-flex gap-2"
+            id="timmy-chat-form">
        <input type="text"
               name="message"
               class="form-control mc-input"
@@ -40,11 +41,20 @@
               autocapitalize="none"
               spellcheck="false"
               enterkeyhint="send"
-               required />
+               required
+               id="timmy-chat-input" />
        <button type="submit" class="btn mc-btn-send">
          SEND
          <span id="send-indicator" class="htmx-indicator">&#x25FC;</span>
        </button>
+        <button type="button"
+                class="btn"
+                style="background: #1a1a2e; color: #00ff88; border: 1px solid #00ff88;
+                       font-size: 0.7rem; white-space: nowrap; padding: 4px 10px;"
+                onclick="askGrok()"
+                title="Send directly to Grok (xAI)">
+          GROK
+        </button>
      </form>
    </div>

@@ -61,4 +71,20 @@
    }
  }
  scrollChat();
+
+  function askGrok() {
+    var input = document.getElementById('timmy-chat-input');
+    if (!input || !input.value.trim()) return;
+    var form = document.getElementById('timmy-chat-form');
+    // Temporarily redirect form to Grok endpoint
+    var originalAction = form.getAttribute('hx-post');
+    form.setAttribute('hx-post', '/grok/chat');
+    htmx.process(form);
+    htmx.trigger(form, 'submit');
+    // Restore original action after submission
+    setTimeout(function() {
+      form.setAttribute('hx-post', originalAction);
+      htmx.process(form);
+    }, 100);
+  }
 </script>
--- a/src/infrastructure/router/cascade.py
+++ b/src/infrastructure/router/cascade.py
@@ -220,10 +220,10 @@ class CascadeRouter:
            except ImportError:
                return False
        
-        elif provider.type in ("openai", "anthropic"):
+        elif provider.type in ("openai", "anthropic", "grok"):
            # Check if API key is set
            return provider.api_key is not None and provider.api_key != ""
-        
+
        return True
    
    async def complete(
@@ -337,6 +337,14 @@ class CascadeRouter:
                temperature=temperature,
                max_tokens=max_tokens,
            )
+        elif provider.type == "grok":
+            result = await self._call_grok(
+                provider=provider,
+                messages=messages,
+                model=model or provider.get_default_model(),
+                temperature=temperature,
+                max_tokens=max_tokens,
+            )
        else:
            raise ValueError(f"Unknown provider type: {provider.type}")
        
@@ -455,7 +463,40 @@ class CascadeRouter:
            "content": response.content[0].text,
            "model": response.model,
        }
-    
+
+    async def _call_grok(
+        self,
+        provider: Provider,
+        messages: list[dict],
+        model: str,
+        temperature: float,
+        max_tokens: Optional[int],
+    ) -> dict:
+        """Call xAI Grok API via OpenAI-compatible SDK."""
+        import httpx
+        import openai
+
+        client = openai.AsyncOpenAI(
+            api_key=provider.api_key,
+            base_url=provider.base_url or "https://api.x.ai/v1",
+            timeout=httpx.Timeout(300.0),
+        )
+
+        kwargs = {
+            "model": model,
+            "messages": messages,
+            "temperature": temperature,
+        }
+        if max_tokens:
+            kwargs["max_tokens"] = max_tokens
+
+        response = await client.chat.completions.create(**kwargs)
+
+        return {
+            "content": response.choices[0].message.content,
+            "model": response.model,
+        }
+
    def _record_success(self, provider: Provider, latency_ms: float) -> None:
        """Record a successful request."""
        provider.metrics.total_requests += 1
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -20,12 +20,12 @@ from timmy.prompts import get_system_prompt
 from timmy.tools import create_full_toolkit

 if TYPE_CHECKING:
-    from timmy.backends import TimmyAirLLMAgent
+    from timmy.backends import GrokBackend, TimmyAirLLMAgent

 logger = logging.getLogger(__name__)

 # Union type for callers that want to hint the return type.
-TimmyAgent = Union[Agent, "TimmyAirLLMAgent"]
+TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend"]

 # Models known to be too small for reliable tool calling.
 # These hallucinate tool calls as text, invoke tools randomly,
@@ -68,12 +68,12 @@ def _resolve_backend(requested: str | None) -> str:
    if requested is not None:
        return requested

-    configured = settings.timmy_model_backend  # "ollama" | "airllm" | "auto"
+    configured = settings.timmy_model_backend  # "ollama" | "airllm" | "grok" | "auto"
    if configured != "auto":
        return configured

    # "auto" path — lazy import to keep startup fast and tests clean.
-    from timmy.backends import airllm_available, is_apple_silicon
+    from timmy.backends import airllm_available, grok_available, is_apple_silicon
    if is_apple_silicon() and airllm_available():
        return "airllm"
    return "ollama"
@@ -97,6 +97,10 @@ def create_timmy(
    resolved = _resolve_backend(backend)
    size = model_size or settings.airllm_model_size

+    if resolved == "grok":
+        from timmy.backends import GrokBackend
+        return GrokBackend()
+
    if resolved == "airllm":
        from timmy.backends import TimmyAirLLMAgent
        return TimmyAirLLMAgent(model_size=size)
--- a/src/timmy/backends.py
+++ b/src/timmy/backends.py
@@ -1,20 +1,26 @@
-"""AirLLM backend — only imported when the airllm extra is installed.
+"""LLM backends — AirLLM (local big models) and Grok (xAI premium cloud).

-Provides TimmyAirLLMAgent: a drop-in replacement for an Agno Agent that
-exposes both the run(message, stream) → RunResult interface used by the
-dashboard and the print_response(message, stream) interface used by the CLI.
-On Apple Silicon (arm64 Darwin) the MLX backend is selected automatically;
-everywhere else AutoModel (PyTorch) is used.
+Provides drop-in replacements for the Agno Agent that expose the same
+run(message, stream) → RunResult interface used by the dashboard and the
+print_response(message, stream) interface used by the CLI.

-No cloud.  No telemetry.  Sats are sovereignty, boss.
+Backends:
+  - TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch)
+  - GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium)
+
+No cloud by default.  No telemetry.  Sats are sovereignty, boss.
 """

+import logging
 import platform
-from dataclasses import dataclass
-from typing import Literal
+import time
+from dataclasses import dataclass, field
+from typing import Literal, Optional

 from timmy.prompts import TIMMY_SYSTEM_PROMPT

+logger = logging.getLogger(__name__)
+
 # HuggingFace model IDs for each supported size.
 _AIRLLM_MODELS: dict[str, str] = {
    "8b":   "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -133,3 +139,281 @@ class TimmyAirLLMAgent:
            Console().print(Markdown(text))
        except ImportError:
            print(text)
+
+
+# ── Grok (xAI) Backend ─────────────────────────────────────────────────────
+# Premium cloud augmentation — opt-in only, never the default path.
+
+# Available Grok models (configurable via GROK_DEFAULT_MODEL)
+GROK_MODELS: dict[str, str] = {
+    "grok-3-fast": "grok-3-fast",
+    "grok-3": "grok-3",
+    "grok-3-mini": "grok-3-mini",
+    "grok-3-mini-fast": "grok-3-mini-fast",
+}
+
+
+@dataclass
+class GrokUsageStats:
+    """Tracks Grok API usage for cost monitoring and Spark logging."""
+    total_requests: int = 0
+    total_prompt_tokens: int = 0
+    total_completion_tokens: int = 0
+    total_latency_ms: float = 0.0
+    errors: int = 0
+    last_request_at: Optional[float] = None
+
+    @property
+    def estimated_cost_sats(self) -> int:
+        """Rough cost estimate in sats based on token usage."""
+        # ~$5/1M input tokens, ~$15/1M output tokens for Grok
+        # At ~$100k/BTC, 1 sat ≈ $0.001
+        input_cost = (self.total_prompt_tokens / 1_000_000) * 5
+        output_cost = (self.total_completion_tokens / 1_000_000) * 15
+        total_usd = input_cost + output_cost
+        return int(total_usd / 0.001)  # Convert to sats
+
+
+class GrokBackend:
+    """xAI Grok backend — premium cloud augmentation for frontier reasoning.
+
+    Uses the OpenAI-compatible SDK to connect to xAI's API.
+    Only activated when GROK_ENABLED=true and XAI_API_KEY is set.
+
+    Exposes the same interface as TimmyAirLLMAgent and Agno Agent:
+      run(message, stream)           → RunResult  [dashboard]
+      print_response(message, stream) → None       [CLI]
+      health_check()                 → dict        [monitoring]
+    """
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+    ) -> None:
+        from config import settings
+
+        self._api_key = api_key or settings.xai_api_key
+        self._model = model or settings.grok_default_model
+        self._history: list[dict[str, str]] = []
+        self.stats = GrokUsageStats()
+
+        if not self._api_key:
+            logger.warning(
+                "GrokBackend created without XAI_API_KEY — "
+                "calls will fail until key is configured"
+            )
+
+    def _get_client(self):
+        """Create OpenAI client configured for xAI endpoint."""
+        import httpx
+        from openai import OpenAI
+
+        return OpenAI(
+            api_key=self._api_key,
+            base_url="https://api.x.ai/v1",
+            timeout=httpx.Timeout(300.0),
+        )
+
+    async def _get_async_client(self):
+        """Create async OpenAI client configured for xAI endpoint."""
+        import httpx
+        from openai import AsyncOpenAI
+
+        return AsyncOpenAI(
+            api_key=self._api_key,
+            base_url="https://api.x.ai/v1",
+            timeout=httpx.Timeout(300.0),
+        )
+
+    # ── Public interface (mirrors Agno Agent) ─────────────────────────────
+
+    def run(self, message: str, *, stream: bool = False) -> RunResult:
+        """Synchronous inference via Grok API.
+
+        Args:
+            message: User prompt
+            stream: Accepted for API compat; Grok returns full response
+
+        Returns:
+            RunResult with response content
+        """
+        if not self._api_key:
+            return RunResult(
+                content="Grok is not configured. Set XAI_API_KEY to enable."
+            )
+
+        start = time.time()
+        messages = self._build_messages(message)
+
+        try:
+            client = self._get_client()
+            response = client.chat.completions.create(
+                model=self._model,
+                messages=messages,
+                temperature=0.7,
+            )
+
+            content = response.choices[0].message.content or ""
+            latency_ms = (time.time() - start) * 1000
+
+            # Track usage
+            self.stats.total_requests += 1
+            self.stats.total_latency_ms += latency_ms
+            self.stats.last_request_at = time.time()
+            if response.usage:
+                self.stats.total_prompt_tokens += response.usage.prompt_tokens
+                self.stats.total_completion_tokens += response.usage.completion_tokens
+
+            # Update conversation history
+            self._history.append({"role": "user", "content": message})
+            self._history.append({"role": "assistant", "content": content})
+            # Keep last 10 turns
+            if len(self._history) > 20:
+                self._history = self._history[-20:]
+
+            logger.info(
+                "Grok response: %d tokens in %.0fms (model=%s)",
+                response.usage.completion_tokens if response.usage else 0,
+                latency_ms,
+                self._model,
+            )
+
+            return RunResult(content=content)
+
+        except Exception as exc:
+            self.stats.errors += 1
+            logger.error("Grok API error: %s", exc)
+            return RunResult(
+                content=f"Grok temporarily unavailable: {exc}"
+            )
+
+    async def arun(self, message: str) -> RunResult:
+        """Async inference via Grok API — used by cascade router and tools."""
+        if not self._api_key:
+            return RunResult(
+                content="Grok is not configured. Set XAI_API_KEY to enable."
+            )
+
+        start = time.time()
+        messages = self._build_messages(message)
+
+        try:
+            client = await self._get_async_client()
+            response = await client.chat.completions.create(
+                model=self._model,
+                messages=messages,
+                temperature=0.7,
+            )
+
+            content = response.choices[0].message.content or ""
+            latency_ms = (time.time() - start) * 1000
+
+            # Track usage
+            self.stats.total_requests += 1
+            self.stats.total_latency_ms += latency_ms
+            self.stats.last_request_at = time.time()
+            if response.usage:
+                self.stats.total_prompt_tokens += response.usage.prompt_tokens
+                self.stats.total_completion_tokens += response.usage.completion_tokens
+
+            # Update conversation history
+            self._history.append({"role": "user", "content": message})
+            self._history.append({"role": "assistant", "content": content})
+            if len(self._history) > 20:
+                self._history = self._history[-20:]
+
+            logger.info(
+                "Grok async response: %d tokens in %.0fms (model=%s)",
+                response.usage.completion_tokens if response.usage else 0,
+                latency_ms,
+                self._model,
+            )
+
+            return RunResult(content=content)
+
+        except Exception as exc:
+            self.stats.errors += 1
+            logger.error("Grok async API error: %s", exc)
+            return RunResult(
+                content=f"Grok temporarily unavailable: {exc}"
+            )
+
+    def print_response(self, message: str, *, stream: bool = True) -> None:
+        """Run inference and render the response to stdout (CLI interface)."""
+        result = self.run(message, stream=stream)
+        try:
+            from rich.console import Console
+            from rich.markdown import Markdown
+            Console().print(Markdown(result.content))
+        except ImportError:
+            print(result.content)
+
+    def health_check(self) -> dict:
+        """Check Grok API connectivity and return status."""
+        if not self._api_key:
+            return {
+                "ok": False,
+                "error": "XAI_API_KEY not configured",
+                "backend": "grok",
+                "model": self._model,
+            }
+
+        try:
+            client = self._get_client()
+            # Lightweight check — list models
+            client.models.list()
+            return {
+                "ok": True,
+                "error": None,
+                "backend": "grok",
+                "model": self._model,
+                "stats": {
+                    "total_requests": self.stats.total_requests,
+                    "estimated_cost_sats": self.stats.estimated_cost_sats,
+                },
+            }
+        except Exception as exc:
+            return {
+                "ok": False,
+                "error": str(exc),
+                "backend": "grok",
+                "model": self._model,
+            }
+
+    @property
+    def estimated_cost(self) -> int:
+        """Return estimated cost in sats for all requests so far."""
+        return self.stats.estimated_cost_sats
+
+    # ── Private helpers ───────────────────────────────────────────────────
+
+    def _build_messages(self, message: str) -> list[dict[str, str]]:
+        """Build the messages array for the API call."""
+        messages = [{"role": "system", "content": TIMMY_SYSTEM_PROMPT}]
+        # Include conversation history for context
+        messages.extend(self._history[-10:])
+        messages.append({"role": "user", "content": message})
+        return messages
+
+
+# ── Module-level Grok singleton ─────────────────────────────────────────────
+
+_grok_backend: Optional[GrokBackend] = None
+
+
+def get_grok_backend() -> GrokBackend:
+    """Get or create the Grok backend singleton."""
+    global _grok_backend
+    if _grok_backend is None:
+        _grok_backend = GrokBackend()
+    return _grok_backend
+
+
+def grok_available() -> bool:
+    """Return True when Grok is enabled and API key is configured."""
+    try:
+        from config import settings
+        return settings.grok_enabled and bool(settings.xai_api_key)
+    except Exception:
+        return False
--- a/src/timmy/tools.py
+++ b/src/timmy/tools.py
@@ -278,39 +278,104 @@ def create_devops_tools(base_dir: str | Path | None = None):
    return toolkit


+def consult_grok(query: str) -> str:
+    """Consult Grok (xAI) for frontier reasoning on complex questions.
+
+    Use this tool when a question requires advanced reasoning, real-time
+    knowledge, or capabilities beyond the local model. Grok is a premium
+    cloud backend — use sparingly and only for high-complexity queries.
+
+    Args:
+        query: The question or reasoning task to send to Grok.
+
+    Returns:
+        Grok's response text, or an error/status message.
+    """
+    from config import settings
+    from timmy.backends import grok_available, get_grok_backend
+
+    if not grok_available():
+        return (
+            "Grok is not available. Enable with GROK_ENABLED=true "
+            "and set XAI_API_KEY in your .env file."
+        )
+
+    backend = get_grok_backend()
+
+    # Log to Spark if available
+    try:
+        from spark.engine import spark_engine
+        spark_engine.on_tool_executed(
+            agent_id="timmy",
+            tool_name="consult_grok",
+            success=True,
+        )
+    except Exception:
+        pass
+
+    # Generate Lightning invoice for monetization (unless free mode)
+    invoice_info = ""
+    if not settings.grok_free:
+        try:
+            from lightning.factory import get_backend as get_ln_backend
+            ln = get_ln_backend()
+            sats = min(settings.grok_max_sats_per_query, 100)
+            inv = ln.create_invoice(sats, f"Grok query: {query[:50]}")
+            invoice_info = f"\n[Lightning invoice: {sats} sats — {inv.payment_request[:40]}...]"
+        except Exception:
+            pass
+
+    result = backend.run(query)
+
+    response = result.content
+    if invoice_info:
+        response += invoice_info
+
+    return response
+
+
 def create_full_toolkit(base_dir: str | Path | None = None):
    """Create a full toolkit with all available tools (for Timmy).
-    
+
    Includes: web search, file read/write, shell commands, python execution,
-    and memory search for contextual recall.
+    memory search for contextual recall, and Grok consultation.
    """
    if not _AGNO_TOOLS_AVAILABLE:
        # Return None when tools aren't available (tests)
        return None
    toolkit = Toolkit(name="full")
-    
+
    # Web search
    search_tools = DuckDuckGoTools()
    toolkit.register(search_tools.web_search, name="web_search")
-    
+
    # Python execution
    python_tools = PythonTools()
    toolkit.register(python_tools.run_python_code, name="python")
-    
+
    # Shell commands
    shell_tools = ShellTools()
    toolkit.register(shell_tools.run_shell_command, name="shell")
-    
+
    # File operations
    base_path = Path(base_dir) if base_dir else Path.cwd()
    file_tools = FileTools(base_dir=base_path)
    toolkit.register(file_tools.read_file, name="read_file")
    toolkit.register(file_tools.save_file, name="write_file")
    toolkit.register(file_tools.list_files, name="list_files")
-    
+
    # Calculator — exact arithmetic (never let the LLM guess)
    toolkit.register(calculator, name="calculator")

+    # Grok consultation — premium frontier reasoning (opt-in)
+    try:
+        from timmy.backends import grok_available
+        if grok_available():
+            toolkit.register(consult_grok, name="consult_grok")
+            logger.info("Grok consultation tool registered")
+    except Exception:
+        logger.debug("Grok tool not available")
+
    # Memory search - semantic recall
    try:
        from timmy.semantic_memory import memory_search
@@ -407,6 +472,11 @@ def get_all_available_tools() -> dict[str, dict]:
            "description": "Evaluate mathematical expressions with exact results",
            "available_in": ["timmy"],
        },
+        "consult_grok": {
+            "name": "Consult Grok",
+            "description": "Premium frontier reasoning via xAI Grok (opt-in, Lightning-payable)",
+            "available_in": ["timmy"],
+        },
    }

    # ── Git tools ─────────────────────────────────────────────────────────────
--- a/tests/timmy/test_grok_backend.py
+++ b/tests/timmy/test_grok_backend.py
@@ -0,0 +1,284 @@
+"""Tests for GrokBackend in src/timmy/backends.py and Grok dashboard routes."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ── grok_available ───────────────────────────────────────────────────────────
+
+def test_grok_available_false_when_disabled():
+    """Grok not available when GROK_ENABLED is false."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.grok_enabled = False
+        mock_settings.xai_api_key = "xai-test-key"
+        from timmy.backends import grok_available
+        assert grok_available() is False
+
+
+def test_grok_available_false_when_no_key():
+    """Grok not available when XAI_API_KEY is empty."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.grok_enabled = True
+        mock_settings.xai_api_key = ""
+        from timmy.backends import grok_available
+        assert grok_available() is False
+
+
+def test_grok_available_true_when_enabled_and_key_set():
+    """Grok available when both enabled and key are set."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.grok_enabled = True
+        mock_settings.xai_api_key = "xai-test-key"
+        from timmy.backends import grok_available
+        assert grok_available() is True
+
+
+# ── GrokBackend construction ────────────────────────────────────────────────
+
+def test_grok_backend_init_with_explicit_params():
+    """GrokBackend can be created with explicit api_key and model."""
+    from timmy.backends import GrokBackend
+    backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+    assert backend._api_key == "xai-test"
+    assert backend._model == "grok-3-fast"
+    assert backend.stats.total_requests == 0
+
+
+def test_grok_backend_init_from_settings():
+    """GrokBackend reads from config.settings when no params given."""
+    with patch("config.settings") as mock_settings:
+        mock_settings.xai_api_key = "xai-from-env"
+        mock_settings.grok_default_model = "grok-3"
+        from timmy.backends import GrokBackend
+        backend = GrokBackend()
+        assert backend._api_key == "xai-from-env"
+        assert backend._model == "grok-3"
+
+
+def test_grok_backend_run_no_key_returns_error():
+    """run() gracefully returns error message when no API key."""
+    from timmy.backends import GrokBackend
+    backend = GrokBackend(api_key="", model="grok-3-fast")
+    result = backend.run("hello")
+    assert "not configured" in result.content
+
+
+def test_grok_backend_run_success():
+    """run() returns content from the API on success."""
+    from timmy.backends import GrokBackend
+
+    backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+
+    mock_response = MagicMock()
+    mock_response.choices = [MagicMock()]
+    mock_response.choices[0].message.content = "Grok says hello"
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+    mock_response.model = "grok-3-fast"
+
+    mock_client = MagicMock()
+    mock_client.chat.completions.create.return_value = mock_response
+
+    with patch.object(backend, "_get_client", return_value=mock_client):
+        result = backend.run("hello")
+
+    assert result.content == "Grok says hello"
+    assert backend.stats.total_requests == 1
+    assert backend.stats.total_prompt_tokens == 10
+    assert backend.stats.total_completion_tokens == 5
+
+
+def test_grok_backend_run_api_error():
+    """run() returns error message on API failure."""
+    from timmy.backends import GrokBackend
+
+    backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+
+    mock_client = MagicMock()
+    mock_client.chat.completions.create.side_effect = Exception("API timeout")
+
+    with patch.object(backend, "_get_client", return_value=mock_client):
+        result = backend.run("hello")
+
+    assert "unavailable" in result.content
+    assert backend.stats.errors == 1
+
+
+def test_grok_backend_history_management():
+    """GrokBackend maintains conversation history."""
+    from timmy.backends import GrokBackend
+
+    backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+
+    mock_response = MagicMock()
+    mock_response.choices = [MagicMock()]
+    mock_response.choices[0].message.content = "response"
+    mock_response.usage = MagicMock()
+    mock_response.usage.prompt_tokens = 10
+    mock_response.usage.completion_tokens = 5
+
+    mock_client = MagicMock()
+    mock_client.chat.completions.create.return_value = mock_response
+
+    with patch.object(backend, "_get_client", return_value=mock_client):
+        backend.run("first message")
+        backend.run("second message")
+
+    assert len(backend._history) == 4  # 2 user + 2 assistant
+    assert backend._history[0]["role"] == "user"
+    assert backend._history[1]["role"] == "assistant"
+
+
+def test_grok_backend_health_check_no_key():
+    """health_check() returns not-ok when no API key."""
+    from timmy.backends import GrokBackend
+
+    backend = GrokBackend(api_key="", model="grok-3-fast")
+    health = backend.health_check()
+    assert health["ok"] is False
+    assert "not configured" in health["error"]
+
+
+def test_grok_backend_health_check_success():
+    """health_check() returns ok when API key is set and models endpoint works."""
+    from timmy.backends import GrokBackend
+
+    backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+
+    mock_client = MagicMock()
+    mock_client.models.list.return_value = []
+
+    with patch.object(backend, "_get_client", return_value=mock_client):
+        health = backend.health_check()
+
+    assert health["ok"] is True
+    assert health["backend"] == "grok"
+
+
+def test_grok_backend_estimated_cost():
+    """estimated_cost property calculates sats from token usage."""
+    from timmy.backends import GrokUsageStats
+
+    stats = GrokUsageStats(
+        total_prompt_tokens=1_000_000,
+        total_completion_tokens=500_000,
+    )
+    # Input: 1M tokens * $5/1M = $5
+    # Output: 500K tokens * $15/1M = $7.50
+    # Total: $12.50 / $0.001 = 12,500 sats
+    assert stats.estimated_cost_sats == 12500
+
+
+def test_grok_backend_build_messages():
+    """_build_messages includes system prompt and history."""
+    from timmy.backends import GrokBackend
+
+    backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
+    backend._history = [
+        {"role": "user", "content": "previous"},
+        {"role": "assistant", "content": "yes"},
+    ]
+
+    messages = backend._build_messages("new question")
+    assert messages[0]["role"] == "system"
+    assert messages[1]["role"] == "user"
+    assert messages[1]["content"] == "previous"
+    assert messages[-1]["role"] == "user"
+    assert messages[-1]["content"] == "new question"
+
+
+# ── get_grok_backend singleton ──────────────────────────────────────────────
+
+def test_get_grok_backend_returns_singleton():
+    """get_grok_backend returns the same instance on repeated calls."""
+    import timmy.backends as backends_mod
+
+    # Reset singleton
+    backends_mod._grok_backend = None
+
+    b1 = backends_mod.get_grok_backend()
+    b2 = backends_mod.get_grok_backend()
+    assert b1 is b2
+
+    # Cleanup
+    backends_mod._grok_backend = None
+
+
+# ── GROK_MODELS constant ───────────────────────────────────────────────────
+
+def test_grok_models_dict_has_expected_entries():
+    from timmy.backends import GROK_MODELS
+    assert "grok-3-fast" in GROK_MODELS
+    assert "grok-3" in GROK_MODELS
+
+
+# ── consult_grok tool ──────────────────────────────────────────────────────
+
+def test_consult_grok_returns_unavailable_when_disabled():
+    """consult_grok tool returns error when Grok is not available."""
+    with patch("timmy.backends.grok_available", return_value=False):
+        from timmy.tools import consult_grok
+        result = consult_grok("test query")
+        assert "not available" in result
+
+
+def test_consult_grok_calls_backend_when_available():
+    """consult_grok tool calls the Grok backend when available."""
+    from timmy.backends import RunResult
+
+    mock_backend = MagicMock()
+    mock_backend.run.return_value = RunResult(content="Grok answer")
+    mock_backend.stats = MagicMock()
+    mock_backend.stats.total_latency_ms = 100
+
+    with patch("timmy.backends.grok_available", return_value=True), \
+         patch("timmy.backends.get_grok_backend", return_value=mock_backend), \
+         patch("config.settings") as mock_settings:
+        mock_settings.grok_free = True
+        mock_settings.grok_enabled = True
+        mock_settings.xai_api_key = "xai-test"
+        from timmy.tools import consult_grok
+        result = consult_grok("complex question")
+
+    assert "Grok answer" in result
+    mock_backend.run.assert_called_once_with("complex question")
+
+
+# ── Grok dashboard route tests ─────────────────────────────────────────────
+
+def test_grok_status_endpoint(client):
+    """GET /grok/status returns JSON with Grok configuration."""
+    response = client.get("/grok/status")
+    assert response.status_code == 200
+    data = response.json()
+    assert "enabled" in data
+    assert "available" in data
+    assert "model" in data
+    assert "api_key_set" in data
+
+
+def test_grok_toggle_returns_html(client):
+    """POST /grok/toggle returns HTML response."""
+    response = client.post("/grok/toggle")
+    assert response.status_code == 200
+
+
+def test_grok_stats_endpoint(client):
+    """GET /grok/stats returns usage statistics."""
+    response = client.get("/grok/stats")
+    assert response.status_code == 200
+    data = response.json()
+    assert "total_requests" in data or "error" in data
+
+
+def test_grok_chat_without_key(client):
+    """POST /grok/chat returns error when Grok is not available."""
+    response = client.post(
+        "/grok/chat",
+        data={"message": "test query"},
+    )
+    assert response.status_code == 200
+    # Should contain error since GROK_ENABLED is false in test mode
+    assert "not available" in response.text.lower() or "error" in response.text.lower() or "grok" in response.text.lower()