Merge pull request #62 from AlexanderWhitestone/claude/grok-backend-monetization-iVc5i

This commit is contained in:
Alexander Whitestone
2026-02-26 20:26:15 -05:00
committed by GitHub
13 changed files with 1076 additions and 27 deletions

View File

@@ -30,6 +30,15 @@
# 8b ~16 GB RAM | 70b ~140 GB RAM | 405b ~810 GB RAM
# AIRLLM_MODEL_SIZE=70b
# ── Grok (xAI) — premium cloud augmentation ──────────────────────────────────
# Enable Grok as an opt-in premium backend for frontier reasoning.
# Local-first ethos is preserved — Grok only activates when explicitly enabled.
# GROK_ENABLED=false
# XAI_API_KEY=xai-...
# GROK_DEFAULT_MODEL=grok-3-fast
# GROK_MAX_SATS_PER_QUERY=200
# GROK_FREE=false
# ── L402 Lightning secrets ───────────────────────────────────────────────────
# HMAC secret for invoice verification. MUST be changed in production.
# Generate with: python3 -c "import secrets; print(secrets.token_hex(32))"

View File

@@ -32,6 +32,10 @@ services:
DEBUG: "true"
# Point to host Ollama (Mac default). Override in .env if different.
OLLAMA_URL: "${OLLAMA_URL:-http://host.docker.internal:11434}"
# Grok (xAI) — opt-in premium cloud backend
GROK_ENABLED: "${GROK_ENABLED:-false}"
XAI_API_KEY: "${XAI_API_KEY:-}"
GROK_DEFAULT_MODEL: "${GROK_DEFAULT_MODEL:-grok-3-fast}"
extra_hosts:
- "host.docker.internal:host-gateway" # Linux compatibility
networks:

View File

@@ -24,13 +24,22 @@ class Settings(BaseSettings):
# "airllm" — always use AirLLM (requires pip install ".[bigbrain]")
# "auto" — use AirLLM on Apple Silicon if airllm is installed,
# fall back to Ollama otherwise
timmy_model_backend: Literal["ollama", "airllm", "auto"] = "ollama"
timmy_model_backend: Literal["ollama", "airllm", "grok", "auto"] = "ollama"
# AirLLM model size when backend is airllm or auto.
# Larger = smarter, but needs more RAM / disk.
# 8b ~16 GB | 70b ~140 GB | 405b ~810 GB
airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
# ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
# Grok is a premium augmentation layer — local-first ethos preserved.
# Only used when explicitly enabled and query complexity warrants it.
grok_enabled: bool = False
xai_api_key: str = ""
grok_default_model: str = "grok-3-fast"
grok_max_sats_per_query: int = 200
grok_free: bool = False # Skip Lightning invoice when user has own API key
# ── Spark Intelligence ────────────────────────────────────────────────
# Enable/disable the Spark cognitive layer.
# When enabled, Spark captures swarm events, runs EIDOS predictions,

View File

@@ -35,6 +35,7 @@ from dashboard.routes.scripture import router as scripture_router
from dashboard.routes.self_coding import router as self_coding_router
from dashboard.routes.self_coding import self_modify_router
from dashboard.routes.hands import router as hands_router
from dashboard.routes.grok import router as grok_router
from infrastructure.router.api import router as cascade_router
logging.basicConfig(
@@ -206,6 +207,7 @@ app.include_router(work_orders_router)
app.include_router(tasks_router)
app.include_router(scripture_router)
app.include_router(hands_router)
app.include_router(grok_router)
app.include_router(cascade_router)

View File

@@ -0,0 +1,234 @@
"""Grok (xAI) dashboard routes — premium cloud augmentation controls.
Endpoints
---------
GET /grok/status — JSON status (API)
POST /grok/toggle — Enable/disable Grok Mode (HTMX)
POST /grok/chat — Direct Grok query (HTMX)
GET /grok/stats — Usage statistics (JSON)
"""
import logging
from pathlib import Path
from fastapi import APIRouter, Form, Request
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.templating import Jinja2Templates
from config import settings
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/grok", tags=["grok"])
templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))
# In-memory toggle state (persists per process lifetime)
_grok_mode_active: bool = False
@router.get("/status")
async def grok_status():
"""Return Grok backend status as JSON."""
from timmy.backends import grok_available
status = {
"enabled": settings.grok_enabled,
"available": grok_available(),
"active": _grok_mode_active,
"model": settings.grok_default_model,
"free_mode": settings.grok_free,
"max_sats_per_query": settings.grok_max_sats_per_query,
"api_key_set": bool(settings.xai_api_key),
}
# Include usage stats if backend exists
try:
from timmy.backends import get_grok_backend
backend = get_grok_backend()
status["stats"] = {
"total_requests": backend.stats.total_requests,
"total_prompt_tokens": backend.stats.total_prompt_tokens,
"total_completion_tokens": backend.stats.total_completion_tokens,
"estimated_cost_sats": backend.stats.estimated_cost_sats,
"errors": backend.stats.errors,
}
except Exception:
status["stats"] = None
return status
@router.post("/toggle")
async def toggle_grok_mode(request: Request):
"""Toggle Grok Mode on/off. Returns HTMX partial for the toggle card."""
global _grok_mode_active
from timmy.backends import grok_available
if not grok_available():
return HTMLResponse(
'<div class="alert" style="color: var(--danger);">'
"Grok unavailable — set GROK_ENABLED=true and XAI_API_KEY in .env"
"</div>",
status_code=200,
)
_grok_mode_active = not _grok_mode_active
state = "ACTIVE" if _grok_mode_active else "STANDBY"
logger.info("Grok Mode toggled: %s", state)
# Log to Spark
try:
from spark.engine import spark_engine
import json
spark_engine.on_tool_executed(
agent_id="timmy",
tool_name="grok_mode_toggle",
success=True,
)
except Exception:
pass
return HTMLResponse(
_render_toggle_card(_grok_mode_active),
status_code=200,
)
@router.post("/chat", response_class=HTMLResponse)
async def grok_chat(request: Request, message: str = Form(...)):
"""Send a message directly to Grok and return HTMX chat partial."""
from timmy.backends import grok_available, get_grok_backend
from dashboard.store import message_log
from datetime import datetime
timestamp = datetime.now().strftime("%H:%M:%S")
if not grok_available():
error = "Grok is not available. Set GROK_ENABLED=true and XAI_API_KEY."
message_log.append(role="user", content=f"[Grok] {message}", timestamp=timestamp)
message_log.append(role="error", content=error, timestamp=timestamp)
return templates.TemplateResponse(
request,
"partials/chat_message.html",
{
"user_message": f"[Grok] {message}",
"response": None,
"error": error,
"timestamp": timestamp,
},
)
backend = get_grok_backend()
# Generate invoice if monetization is active
invoice_note = ""
if not settings.grok_free:
try:
from lightning.factory import get_backend as get_ln_backend
ln = get_ln_backend()
sats = min(settings.grok_max_sats_per_query, 100)
inv = ln.create_invoice(sats, f"Grok: {message[:50]}")
invoice_note = f" | {sats} sats"
except Exception:
pass
try:
result = backend.run(message)
response_text = f"**[Grok]{invoice_note}:** {result.content}"
except Exception as exc:
response_text = None
error = f"Grok error: {exc}"
message_log.append(
role="user", content=f"[Ask Grok] {message}", timestamp=timestamp
)
if response_text:
message_log.append(role="agent", content=response_text, timestamp=timestamp)
return templates.TemplateResponse(
request,
"partials/chat_message.html",
{
"user_message": f"[Ask Grok] {message}",
"response": response_text,
"error": None,
"timestamp": timestamp,
},
)
else:
message_log.append(role="error", content=error, timestamp=timestamp)
return templates.TemplateResponse(
request,
"partials/chat_message.html",
{
"user_message": f"[Ask Grok] {message}",
"response": None,
"error": error,
"timestamp": timestamp,
},
)
@router.get("/stats")
async def grok_stats():
"""Return detailed Grok usage statistics."""
try:
from timmy.backends import get_grok_backend
backend = get_grok_backend()
return {
"total_requests": backend.stats.total_requests,
"total_prompt_tokens": backend.stats.total_prompt_tokens,
"total_completion_tokens": backend.stats.total_completion_tokens,
"total_latency_ms": round(backend.stats.total_latency_ms, 2),
"avg_latency_ms": round(
backend.stats.total_latency_ms / max(backend.stats.total_requests, 1),
2,
),
"estimated_cost_sats": backend.stats.estimated_cost_sats,
"errors": backend.stats.errors,
"model": settings.grok_default_model,
}
except Exception as exc:
return {"error": str(exc)}
def _render_toggle_card(active: bool) -> str:
"""Render the Grok Mode toggle card HTML."""
color = "#00ff88" if active else "#666"
state = "ACTIVE" if active else "STANDBY"
glow = "0 0 20px rgba(0, 255, 136, 0.4)" if active else "none"
return f"""
<div id="grok-toggle-card"
style="border: 2px solid {color}; border-radius: 12px; padding: 16px;
background: var(--bg-secondary); box-shadow: {glow};
transition: all 0.3s ease;">
<div style="display: flex; justify-content: space-between; align-items: center;">
<div>
<div style="font-weight: 700; font-size: 1.1rem; color: {color};">
GROK MODE: {state}
</div>
<div style="font-size: 0.8rem; color: var(--text-muted); margin-top: 4px;">
xAI frontier reasoning | {settings.grok_default_model}
</div>
</div>
<button hx-post="/grok/toggle"
hx-target="#grok-toggle-card"
hx-swap="outerHTML"
style="background: {color}; color: #000; border: none;
border-radius: 8px; padding: 8px 20px; cursor: pointer;
font-weight: 700; font-family: inherit;">
{'DEACTIVATE' if active else 'ACTIVATE'}
</button>
</div>
</div>
"""
def is_grok_mode_active() -> bool:
"""Check if Grok Mode is currently active (used by other modules)."""
return _grok_mode_active

View File

@@ -39,6 +39,7 @@
<a href="/lightning/ledger" class="mc-test-link">LEDGER</a>
<a href="/memory" class="mc-test-link">MEMORY</a>
<a href="/router/status" class="mc-test-link">ROUTER</a>
<a href="/grok/status" class="mc-test-link" style="color:#00ff88;">GROK</a>
<a href="/self-modify/queue" class="mc-test-link">UPGRADES</a>
<a href="/self-coding" class="mc-test-link">SELF-CODING</a>
<a href="/hands" class="mc-test-link">HANDS</a>

View File

@@ -59,10 +59,61 @@
</div>
</div>
<!-- Grok Mode Toggle -->
<div class="card" style="margin-top: 24px;">
<div class="card-header">
<h2 class="card-title">Grok Mode</h2>
<div>
<span class="badge" id="grok-badge" style="background: #666;">STANDBY</span>
</div>
</div>
<div id="grok-toggle-card"
hx-get="/grok/status"
hx-trigger="load"
hx-target="#grok-toggle-card"
hx-swap="innerHTML">
<div style="border: 2px solid #666; border-radius: 12px; padding: 16px;
background: var(--bg-secondary);">
<div style="display: flex; justify-content: space-between; align-items: center;">
<div>
<div style="font-weight: 700; font-size: 1.1rem; color: #666;">
GROK MODE: LOADING...
</div>
<div style="font-size: 0.8rem; color: var(--text-muted); margin-top: 4px;">
xAI frontier reasoning augmentation
</div>
</div>
<button hx-post="/grok/toggle"
hx-target="#grok-toggle-card"
hx-swap="outerHTML"
style="background: #666; color: #000; border: none;
border-radius: 8px; padding: 8px 20px; cursor: pointer;
font-weight: 700; font-family: inherit;">
ACTIVATE
</button>
</div>
</div>
</div>
<div class="grid grid-3" style="margin-top: 12px;">
<div class="stat">
<div class="stat-value" id="grok-requests">0</div>
<div class="stat-label">Grok Queries</div>
</div>
<div class="stat">
<div class="stat-value" id="grok-tokens">0</div>
<div class="stat-label">Tokens Used</div>
</div>
<div class="stat">
<div class="stat-value" id="grok-cost">0</div>
<div class="stat-label">Est. Cost (sats)</div>
</div>
</div>
</div>
<!-- Heartbeat Monitor -->
<div class="card" style="margin-top: 24px;">
<div class="card-header">
<h2 class="card-title">💓 Heartbeat Monitor</h2>
<h2 class="card-title">Heartbeat Monitor</h2>
<div>
<span class="badge" id="heartbeat-status">Checking...</span>
</div>
@@ -318,11 +369,40 @@ async function loadChatHistory() {
}
}
// Load Grok stats
async function loadGrokStats() {
try {
const response = await fetch('/grok/status');
const data = await response.json();
if (data.stats) {
document.getElementById('grok-requests').textContent = data.stats.total_requests || 0;
document.getElementById('grok-tokens').textContent =
(data.stats.total_prompt_tokens || 0) + (data.stats.total_completion_tokens || 0);
document.getElementById('grok-cost').textContent = data.stats.estimated_cost_sats || 0;
}
const badge = document.getElementById('grok-badge');
if (data.active) {
badge.textContent = 'ACTIVE';
badge.style.background = '#00ff88';
badge.style.color = '#000';
} else {
badge.textContent = 'STANDBY';
badge.style.background = '#666';
badge.style.color = '#fff';
}
} catch (error) {
// Grok endpoint may not respond — silent fallback
}
}
// Initial load
loadSovereignty();
loadHealth();
loadSwarmStats();
loadLightningStats();
loadGrokStats();
loadChatHistory();
// Periodic updates
@@ -330,5 +410,6 @@ setInterval(loadSovereignty, 30000); // Every 30s
setInterval(loadHealth, 10000); // Every 10s
setInterval(loadSwarmStats, 5000); // Every 5s
setInterval(updateHeartbeat, 5000); // Heartbeat every 5s
setInterval(loadGrokStats, 10000); // Grok stats every 10s
</script>
{% endblock %}

View File

@@ -30,7 +30,8 @@
hx-disabled-elt="find button"
hx-on::after-settle="scrollChat()"
hx-on::after-request="if(event.detail.successful){this.querySelector('[name=message]').value='';}"
class="d-flex gap-2">
class="d-flex gap-2"
id="timmy-chat-form">
<input type="text"
name="message"
class="form-control mc-input"
@@ -40,11 +41,20 @@
autocapitalize="none"
spellcheck="false"
enterkeyhint="send"
required />
required
id="timmy-chat-input" />
<button type="submit" class="btn mc-btn-send">
SEND
<span id="send-indicator" class="htmx-indicator">&#x25FC;</span>
</button>
<button type="button"
class="btn"
style="background: #1a1a2e; color: #00ff88; border: 1px solid #00ff88;
font-size: 0.7rem; white-space: nowrap; padding: 4px 10px;"
onclick="askGrok()"
title="Send directly to Grok (xAI)">
GROK
</button>
</form>
</div>
@@ -61,4 +71,20 @@
}
}
scrollChat();
function askGrok() {
var input = document.getElementById('timmy-chat-input');
if (!input || !input.value.trim()) return;
var form = document.getElementById('timmy-chat-form');
// Temporarily redirect form to Grok endpoint
var originalAction = form.getAttribute('hx-post');
form.setAttribute('hx-post', '/grok/chat');
htmx.process(form);
htmx.trigger(form, 'submit');
// Restore original action after submission
setTimeout(function() {
form.setAttribute('hx-post', originalAction);
htmx.process(form);
}, 100);
}
</script>

View File

@@ -220,10 +220,10 @@ class CascadeRouter:
except ImportError:
return False
elif provider.type in ("openai", "anthropic"):
elif provider.type in ("openai", "anthropic", "grok"):
# Check if API key is set
return provider.api_key is not None and provider.api_key != ""
return True
async def complete(
@@ -337,6 +337,14 @@ class CascadeRouter:
temperature=temperature,
max_tokens=max_tokens,
)
elif provider.type == "grok":
result = await self._call_grok(
provider=provider,
messages=messages,
model=model or provider.get_default_model(),
temperature=temperature,
max_tokens=max_tokens,
)
else:
raise ValueError(f"Unknown provider type: {provider.type}")
@@ -455,7 +463,40 @@ class CascadeRouter:
"content": response.content[0].text,
"model": response.model,
}
async def _call_grok(
self,
provider: Provider,
messages: list[dict],
model: str,
temperature: float,
max_tokens: Optional[int],
) -> dict:
"""Call xAI Grok API via OpenAI-compatible SDK."""
import httpx
import openai
client = openai.AsyncOpenAI(
api_key=provider.api_key,
base_url=provider.base_url or "https://api.x.ai/v1",
timeout=httpx.Timeout(300.0),
)
kwargs = {
"model": model,
"messages": messages,
"temperature": temperature,
}
if max_tokens:
kwargs["max_tokens"] = max_tokens
response = await client.chat.completions.create(**kwargs)
return {
"content": response.choices[0].message.content,
"model": response.model,
}
def _record_success(self, provider: Provider, latency_ms: float) -> None:
"""Record a successful request."""
provider.metrics.total_requests += 1

View File

@@ -20,12 +20,12 @@ from timmy.prompts import get_system_prompt
from timmy.tools import create_full_toolkit
if TYPE_CHECKING:
from timmy.backends import TimmyAirLLMAgent
from timmy.backends import GrokBackend, TimmyAirLLMAgent
logger = logging.getLogger(__name__)
# Union type for callers that want to hint the return type.
TimmyAgent = Union[Agent, "TimmyAirLLMAgent"]
TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend"]
# Models known to be too small for reliable tool calling.
# These hallucinate tool calls as text, invoke tools randomly,
@@ -68,12 +68,12 @@ def _resolve_backend(requested: str | None) -> str:
if requested is not None:
return requested
configured = settings.timmy_model_backend # "ollama" | "airllm" | "auto"
configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "auto"
if configured != "auto":
return configured
# "auto" path — lazy import to keep startup fast and tests clean.
from timmy.backends import airllm_available, is_apple_silicon
from timmy.backends import airllm_available, grok_available, is_apple_silicon
if is_apple_silicon() and airllm_available():
return "airllm"
return "ollama"
@@ -97,6 +97,10 @@ def create_timmy(
resolved = _resolve_backend(backend)
size = model_size or settings.airllm_model_size
if resolved == "grok":
from timmy.backends import GrokBackend
return GrokBackend()
if resolved == "airllm":
from timmy.backends import TimmyAirLLMAgent
return TimmyAirLLMAgent(model_size=size)

View File

@@ -1,20 +1,26 @@
"""AirLLM backend — only imported when the airllm extra is installed.
"""LLM backendsAirLLM (local big models) and Grok (xAI premium cloud).
Provides TimmyAirLLMAgent: a drop-in replacement for an Agno Agent that
exposes both the run(message, stream) → RunResult interface used by the
dashboard and the print_response(message, stream) interface used by the CLI.
On Apple Silicon (arm64 Darwin) the MLX backend is selected automatically;
everywhere else AutoModel (PyTorch) is used.
Provides drop-in replacements for the Agno Agent that expose the same
run(message, stream) → RunResult interface used by the dashboard and the
print_response(message, stream) interface used by the CLI.
No cloud. No telemetry. Sats are sovereignty, boss.
Backends:
- TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch)
- GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium)
No cloud by default. No telemetry. Sats are sovereignty, boss.
"""
import logging
import platform
from dataclasses import dataclass
from typing import Literal
import time
from dataclasses import dataclass, field
from typing import Literal, Optional
from timmy.prompts import TIMMY_SYSTEM_PROMPT
logger = logging.getLogger(__name__)
# HuggingFace model IDs for each supported size.
_AIRLLM_MODELS: dict[str, str] = {
"8b": "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -133,3 +139,281 @@ class TimmyAirLLMAgent:
Console().print(Markdown(text))
except ImportError:
print(text)
# ── Grok (xAI) Backend ─────────────────────────────────────────────────────
# Premium cloud augmentation — opt-in only, never the default path.
# Available Grok models (configurable via GROK_DEFAULT_MODEL)
GROK_MODELS: dict[str, str] = {
"grok-3-fast": "grok-3-fast",
"grok-3": "grok-3",
"grok-3-mini": "grok-3-mini",
"grok-3-mini-fast": "grok-3-mini-fast",
}
@dataclass
class GrokUsageStats:
"""Tracks Grok API usage for cost monitoring and Spark logging."""
total_requests: int = 0
total_prompt_tokens: int = 0
total_completion_tokens: int = 0
total_latency_ms: float = 0.0
errors: int = 0
last_request_at: Optional[float] = None
@property
def estimated_cost_sats(self) -> int:
"""Rough cost estimate in sats based on token usage."""
# ~$5/1M input tokens, ~$15/1M output tokens for Grok
# At ~$100k/BTC, 1 sat ≈ $0.001
input_cost = (self.total_prompt_tokens / 1_000_000) * 5
output_cost = (self.total_completion_tokens / 1_000_000) * 15
total_usd = input_cost + output_cost
return int(total_usd / 0.001) # Convert to sats
class GrokBackend:
"""xAI Grok backend — premium cloud augmentation for frontier reasoning.
Uses the OpenAI-compatible SDK to connect to xAI's API.
Only activated when GROK_ENABLED=true and XAI_API_KEY is set.
Exposes the same interface as TimmyAirLLMAgent and Agno Agent:
run(message, stream) → RunResult [dashboard]
print_response(message, stream) → None [CLI]
health_check() → dict [monitoring]
"""
def __init__(
self,
api_key: Optional[str] = None,
model: Optional[str] = None,
) -> None:
from config import settings
self._api_key = api_key or settings.xai_api_key
self._model = model or settings.grok_default_model
self._history: list[dict[str, str]] = []
self.stats = GrokUsageStats()
if not self._api_key:
logger.warning(
"GrokBackend created without XAI_API_KEY — "
"calls will fail until key is configured"
)
def _get_client(self):
"""Create OpenAI client configured for xAI endpoint."""
import httpx
from openai import OpenAI
return OpenAI(
api_key=self._api_key,
base_url="https://api.x.ai/v1",
timeout=httpx.Timeout(300.0),
)
async def _get_async_client(self):
"""Create async OpenAI client configured for xAI endpoint."""
import httpx
from openai import AsyncOpenAI
return AsyncOpenAI(
api_key=self._api_key,
base_url="https://api.x.ai/v1",
timeout=httpx.Timeout(300.0),
)
# ── Public interface (mirrors Agno Agent) ─────────────────────────────
def run(self, message: str, *, stream: bool = False) -> RunResult:
"""Synchronous inference via Grok API.
Args:
message: User prompt
stream: Accepted for API compat; Grok returns full response
Returns:
RunResult with response content
"""
if not self._api_key:
return RunResult(
content="Grok is not configured. Set XAI_API_KEY to enable."
)
start = time.time()
messages = self._build_messages(message)
try:
client = self._get_client()
response = client.chat.completions.create(
model=self._model,
messages=messages,
temperature=0.7,
)
content = response.choices[0].message.content or ""
latency_ms = (time.time() - start) * 1000
# Track usage
self.stats.total_requests += 1
self.stats.total_latency_ms += latency_ms
self.stats.last_request_at = time.time()
if response.usage:
self.stats.total_prompt_tokens += response.usage.prompt_tokens
self.stats.total_completion_tokens += response.usage.completion_tokens
# Update conversation history
self._history.append({"role": "user", "content": message})
self._history.append({"role": "assistant", "content": content})
# Keep last 10 turns
if len(self._history) > 20:
self._history = self._history[-20:]
logger.info(
"Grok response: %d tokens in %.0fms (model=%s)",
response.usage.completion_tokens if response.usage else 0,
latency_ms,
self._model,
)
return RunResult(content=content)
except Exception as exc:
self.stats.errors += 1
logger.error("Grok API error: %s", exc)
return RunResult(
content=f"Grok temporarily unavailable: {exc}"
)
async def arun(self, message: str) -> RunResult:
"""Async inference via Grok API — used by cascade router and tools."""
if not self._api_key:
return RunResult(
content="Grok is not configured. Set XAI_API_KEY to enable."
)
start = time.time()
messages = self._build_messages(message)
try:
client = await self._get_async_client()
response = await client.chat.completions.create(
model=self._model,
messages=messages,
temperature=0.7,
)
content = response.choices[0].message.content or ""
latency_ms = (time.time() - start) * 1000
# Track usage
self.stats.total_requests += 1
self.stats.total_latency_ms += latency_ms
self.stats.last_request_at = time.time()
if response.usage:
self.stats.total_prompt_tokens += response.usage.prompt_tokens
self.stats.total_completion_tokens += response.usage.completion_tokens
# Update conversation history
self._history.append({"role": "user", "content": message})
self._history.append({"role": "assistant", "content": content})
if len(self._history) > 20:
self._history = self._history[-20:]
logger.info(
"Grok async response: %d tokens in %.0fms (model=%s)",
response.usage.completion_tokens if response.usage else 0,
latency_ms,
self._model,
)
return RunResult(content=content)
except Exception as exc:
self.stats.errors += 1
logger.error("Grok async API error: %s", exc)
return RunResult(
content=f"Grok temporarily unavailable: {exc}"
)
def print_response(self, message: str, *, stream: bool = True) -> None:
"""Run inference and render the response to stdout (CLI interface)."""
result = self.run(message, stream=stream)
try:
from rich.console import Console
from rich.markdown import Markdown
Console().print(Markdown(result.content))
except ImportError:
print(result.content)
def health_check(self) -> dict:
"""Check Grok API connectivity and return status."""
if not self._api_key:
return {
"ok": False,
"error": "XAI_API_KEY not configured",
"backend": "grok",
"model": self._model,
}
try:
client = self._get_client()
# Lightweight check — list models
client.models.list()
return {
"ok": True,
"error": None,
"backend": "grok",
"model": self._model,
"stats": {
"total_requests": self.stats.total_requests,
"estimated_cost_sats": self.stats.estimated_cost_sats,
},
}
except Exception as exc:
return {
"ok": False,
"error": str(exc),
"backend": "grok",
"model": self._model,
}
@property
def estimated_cost(self) -> int:
"""Return estimated cost in sats for all requests so far."""
return self.stats.estimated_cost_sats
# ── Private helpers ───────────────────────────────────────────────────
def _build_messages(self, message: str) -> list[dict[str, str]]:
"""Build the messages array for the API call."""
messages = [{"role": "system", "content": TIMMY_SYSTEM_PROMPT}]
# Include conversation history for context
messages.extend(self._history[-10:])
messages.append({"role": "user", "content": message})
return messages
# ── Module-level Grok singleton ─────────────────────────────────────────────
_grok_backend: Optional[GrokBackend] = None
def get_grok_backend() -> GrokBackend:
"""Get or create the Grok backend singleton."""
global _grok_backend
if _grok_backend is None:
_grok_backend = GrokBackend()
return _grok_backend
def grok_available() -> bool:
"""Return True when Grok is enabled and API key is configured."""
try:
from config import settings
return settings.grok_enabled and bool(settings.xai_api_key)
except Exception:
return False

View File

@@ -278,39 +278,104 @@ def create_devops_tools(base_dir: str | Path | None = None):
return toolkit
def consult_grok(query: str) -> str:
"""Consult Grok (xAI) for frontier reasoning on complex questions.
Use this tool when a question requires advanced reasoning, real-time
knowledge, or capabilities beyond the local model. Grok is a premium
cloud backend — use sparingly and only for high-complexity queries.
Args:
query: The question or reasoning task to send to Grok.
Returns:
Grok's response text, or an error/status message.
"""
from config import settings
from timmy.backends import grok_available, get_grok_backend
if not grok_available():
return (
"Grok is not available. Enable with GROK_ENABLED=true "
"and set XAI_API_KEY in your .env file."
)
backend = get_grok_backend()
# Log to Spark if available
try:
from spark.engine import spark_engine
spark_engine.on_tool_executed(
agent_id="timmy",
tool_name="consult_grok",
success=True,
)
except Exception:
pass
# Generate Lightning invoice for monetization (unless free mode)
invoice_info = ""
if not settings.grok_free:
try:
from lightning.factory import get_backend as get_ln_backend
ln = get_ln_backend()
sats = min(settings.grok_max_sats_per_query, 100)
inv = ln.create_invoice(sats, f"Grok query: {query[:50]}")
invoice_info = f"\n[Lightning invoice: {sats} sats — {inv.payment_request[:40]}...]"
except Exception:
pass
result = backend.run(query)
response = result.content
if invoice_info:
response += invoice_info
return response
def create_full_toolkit(base_dir: str | Path | None = None):
"""Create a full toolkit with all available tools (for Timmy).
Includes: web search, file read/write, shell commands, python execution,
and memory search for contextual recall.
memory search for contextual recall, and Grok consultation.
"""
if not _AGNO_TOOLS_AVAILABLE:
# Return None when tools aren't available (tests)
return None
toolkit = Toolkit(name="full")
# Web search
search_tools = DuckDuckGoTools()
toolkit.register(search_tools.web_search, name="web_search")
# Python execution
python_tools = PythonTools()
toolkit.register(python_tools.run_python_code, name="python")
# Shell commands
shell_tools = ShellTools()
toolkit.register(shell_tools.run_shell_command, name="shell")
# File operations
base_path = Path(base_dir) if base_dir else Path.cwd()
file_tools = FileTools(base_dir=base_path)
toolkit.register(file_tools.read_file, name="read_file")
toolkit.register(file_tools.save_file, name="write_file")
toolkit.register(file_tools.list_files, name="list_files")
# Calculator — exact arithmetic (never let the LLM guess)
toolkit.register(calculator, name="calculator")
# Grok consultation — premium frontier reasoning (opt-in)
try:
from timmy.backends import grok_available
if grok_available():
toolkit.register(consult_grok, name="consult_grok")
logger.info("Grok consultation tool registered")
except Exception:
logger.debug("Grok tool not available")
# Memory search - semantic recall
try:
from timmy.semantic_memory import memory_search
@@ -407,6 +472,11 @@ def get_all_available_tools() -> dict[str, dict]:
"description": "Evaluate mathematical expressions with exact results",
"available_in": ["timmy"],
},
"consult_grok": {
"name": "Consult Grok",
"description": "Premium frontier reasoning via xAI Grok (opt-in, Lightning-payable)",
"available_in": ["timmy"],
},
}
# ── Git tools ─────────────────────────────────────────────────────────────

View File

@@ -0,0 +1,284 @@
"""Tests for GrokBackend in src/timmy/backends.py and Grok dashboard routes."""
from unittest.mock import MagicMock, patch
import pytest
# ── grok_available ───────────────────────────────────────────────────────────
def test_grok_available_false_when_disabled():
"""Grok not available when GROK_ENABLED is false."""
with patch("config.settings") as mock_settings:
mock_settings.grok_enabled = False
mock_settings.xai_api_key = "xai-test-key"
from timmy.backends import grok_available
assert grok_available() is False
def test_grok_available_false_when_no_key():
"""Grok not available when XAI_API_KEY is empty."""
with patch("config.settings") as mock_settings:
mock_settings.grok_enabled = True
mock_settings.xai_api_key = ""
from timmy.backends import grok_available
assert grok_available() is False
def test_grok_available_true_when_enabled_and_key_set():
"""Grok available when both enabled and key are set."""
with patch("config.settings") as mock_settings:
mock_settings.grok_enabled = True
mock_settings.xai_api_key = "xai-test-key"
from timmy.backends import grok_available
assert grok_available() is True
# ── GrokBackend construction ────────────────────────────────────────────────
def test_grok_backend_init_with_explicit_params():
"""GrokBackend can be created with explicit api_key and model."""
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
assert backend._api_key == "xai-test"
assert backend._model == "grok-3-fast"
assert backend.stats.total_requests == 0
def test_grok_backend_init_from_settings():
"""GrokBackend reads from config.settings when no params given."""
with patch("config.settings") as mock_settings:
mock_settings.xai_api_key = "xai-from-env"
mock_settings.grok_default_model = "grok-3"
from timmy.backends import GrokBackend
backend = GrokBackend()
assert backend._api_key == "xai-from-env"
assert backend._model == "grok-3"
def test_grok_backend_run_no_key_returns_error():
"""run() gracefully returns error message when no API key."""
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="", model="grok-3-fast")
result = backend.run("hello")
assert "not configured" in result.content
def test_grok_backend_run_success():
"""run() returns content from the API on success."""
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "Grok says hello"
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.model = "grok-3-fast"
mock_client = MagicMock()
mock_client.chat.completions.create.return_value = mock_response
with patch.object(backend, "_get_client", return_value=mock_client):
result = backend.run("hello")
assert result.content == "Grok says hello"
assert backend.stats.total_requests == 1
assert backend.stats.total_prompt_tokens == 10
assert backend.stats.total_completion_tokens == 5
def test_grok_backend_run_api_error():
"""run() returns error message on API failure."""
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
mock_client = MagicMock()
mock_client.chat.completions.create.side_effect = Exception("API timeout")
with patch.object(backend, "_get_client", return_value=mock_client):
result = backend.run("hello")
assert "unavailable" in result.content
assert backend.stats.errors == 1
def test_grok_backend_history_management():
"""GrokBackend maintains conversation history."""
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "response"
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_client = MagicMock()
mock_client.chat.completions.create.return_value = mock_response
with patch.object(backend, "_get_client", return_value=mock_client):
backend.run("first message")
backend.run("second message")
assert len(backend._history) == 4 # 2 user + 2 assistant
assert backend._history[0]["role"] == "user"
assert backend._history[1]["role"] == "assistant"
def test_grok_backend_health_check_no_key():
"""health_check() returns not-ok when no API key."""
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="", model="grok-3-fast")
health = backend.health_check()
assert health["ok"] is False
assert "not configured" in health["error"]
def test_grok_backend_health_check_success():
"""health_check() returns ok when API key is set and models endpoint works."""
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
mock_client = MagicMock()
mock_client.models.list.return_value = []
with patch.object(backend, "_get_client", return_value=mock_client):
health = backend.health_check()
assert health["ok"] is True
assert health["backend"] == "grok"
def test_grok_backend_estimated_cost():
"""estimated_cost property calculates sats from token usage."""
from timmy.backends import GrokUsageStats
stats = GrokUsageStats(
total_prompt_tokens=1_000_000,
total_completion_tokens=500_000,
)
# Input: 1M tokens * $5/1M = $5
# Output: 500K tokens * $15/1M = $7.50
# Total: $12.50 / $0.001 = 12,500 sats
assert stats.estimated_cost_sats == 12500
def test_grok_backend_build_messages():
"""_build_messages includes system prompt and history."""
from timmy.backends import GrokBackend
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
backend._history = [
{"role": "user", "content": "previous"},
{"role": "assistant", "content": "yes"},
]
messages = backend._build_messages("new question")
assert messages[0]["role"] == "system"
assert messages[1]["role"] == "user"
assert messages[1]["content"] == "previous"
assert messages[-1]["role"] == "user"
assert messages[-1]["content"] == "new question"
# ── get_grok_backend singleton ──────────────────────────────────────────────
def test_get_grok_backend_returns_singleton():
"""get_grok_backend returns the same instance on repeated calls."""
import timmy.backends as backends_mod
# Reset singleton
backends_mod._grok_backend = None
b1 = backends_mod.get_grok_backend()
b2 = backends_mod.get_grok_backend()
assert b1 is b2
# Cleanup
backends_mod._grok_backend = None
# ── GROK_MODELS constant ───────────────────────────────────────────────────
def test_grok_models_dict_has_expected_entries():
from timmy.backends import GROK_MODELS
assert "grok-3-fast" in GROK_MODELS
assert "grok-3" in GROK_MODELS
# ── consult_grok tool ──────────────────────────────────────────────────────
def test_consult_grok_returns_unavailable_when_disabled():
"""consult_grok tool returns error when Grok is not available."""
with patch("timmy.backends.grok_available", return_value=False):
from timmy.tools import consult_grok
result = consult_grok("test query")
assert "not available" in result
def test_consult_grok_calls_backend_when_available():
"""consult_grok tool calls the Grok backend when available."""
from timmy.backends import RunResult
mock_backend = MagicMock()
mock_backend.run.return_value = RunResult(content="Grok answer")
mock_backend.stats = MagicMock()
mock_backend.stats.total_latency_ms = 100
with patch("timmy.backends.grok_available", return_value=True), \
patch("timmy.backends.get_grok_backend", return_value=mock_backend), \
patch("config.settings") as mock_settings:
mock_settings.grok_free = True
mock_settings.grok_enabled = True
mock_settings.xai_api_key = "xai-test"
from timmy.tools import consult_grok
result = consult_grok("complex question")
assert "Grok answer" in result
mock_backend.run.assert_called_once_with("complex question")
# ── Grok dashboard route tests ─────────────────────────────────────────────
def test_grok_status_endpoint(client):
"""GET /grok/status returns JSON with Grok configuration."""
response = client.get("/grok/status")
assert response.status_code == 200
data = response.json()
assert "enabled" in data
assert "available" in data
assert "model" in data
assert "api_key_set" in data
def test_grok_toggle_returns_html(client):
"""POST /grok/toggle returns HTML response."""
response = client.post("/grok/toggle")
assert response.status_code == 200
def test_grok_stats_endpoint(client):
"""GET /grok/stats returns usage statistics."""
response = client.get("/grok/stats")
assert response.status_code == 200
data = response.json()
assert "total_requests" in data or "error" in data
def test_grok_chat_without_key(client):
"""POST /grok/chat returns error when Grok is not available."""
response = client.post(
"/grok/chat",
data={"message": "test query"},
)
assert response.status_code == 200
# Should contain error since GROK_ENABLED is false in test mode
assert "not available" in response.text.lower() or "error" in response.text.lower() or "grok" in response.text.lower()