Merge pull request #62 from AlexanderWhitestone/claude/grok-backend-monetization-iVc5i
This commit is contained in:
@@ -30,6 +30,15 @@
|
||||
# 8b ~16 GB RAM | 70b ~140 GB RAM | 405b ~810 GB RAM
|
||||
# AIRLLM_MODEL_SIZE=70b
|
||||
|
||||
# ── Grok (xAI) — premium cloud augmentation ──────────────────────────────────
|
||||
# Enable Grok as an opt-in premium backend for frontier reasoning.
|
||||
# Local-first ethos is preserved — Grok only activates when explicitly enabled.
|
||||
# GROK_ENABLED=false
|
||||
# XAI_API_KEY=xai-...
|
||||
# GROK_DEFAULT_MODEL=grok-3-fast
|
||||
# GROK_MAX_SATS_PER_QUERY=200
|
||||
# GROK_FREE=false
|
||||
|
||||
# ── L402 Lightning secrets ───────────────────────────────────────────────────
|
||||
# HMAC secret for invoice verification. MUST be changed in production.
|
||||
# Generate with: python3 -c "import secrets; print(secrets.token_hex(32))"
|
||||
|
||||
@@ -32,6 +32,10 @@ services:
|
||||
DEBUG: "true"
|
||||
# Point to host Ollama (Mac default). Override in .env if different.
|
||||
OLLAMA_URL: "${OLLAMA_URL:-http://host.docker.internal:11434}"
|
||||
# Grok (xAI) — opt-in premium cloud backend
|
||||
GROK_ENABLED: "${GROK_ENABLED:-false}"
|
||||
XAI_API_KEY: "${XAI_API_KEY:-}"
|
||||
GROK_DEFAULT_MODEL: "${GROK_DEFAULT_MODEL:-grok-3-fast}"
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway" # Linux compatibility
|
||||
networks:
|
||||
|
||||
@@ -24,13 +24,22 @@ class Settings(BaseSettings):
|
||||
# "airllm" — always use AirLLM (requires pip install ".[bigbrain]")
|
||||
# "auto" — use AirLLM on Apple Silicon if airllm is installed,
|
||||
# fall back to Ollama otherwise
|
||||
timmy_model_backend: Literal["ollama", "airllm", "auto"] = "ollama"
|
||||
timmy_model_backend: Literal["ollama", "airllm", "grok", "auto"] = "ollama"
|
||||
|
||||
# AirLLM model size when backend is airllm or auto.
|
||||
# Larger = smarter, but needs more RAM / disk.
|
||||
# 8b ~16 GB | 70b ~140 GB | 405b ~810 GB
|
||||
airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
|
||||
|
||||
# ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
|
||||
# Grok is a premium augmentation layer — local-first ethos preserved.
|
||||
# Only used when explicitly enabled and query complexity warrants it.
|
||||
grok_enabled: bool = False
|
||||
xai_api_key: str = ""
|
||||
grok_default_model: str = "grok-3-fast"
|
||||
grok_max_sats_per_query: int = 200
|
||||
grok_free: bool = False # Skip Lightning invoice when user has own API key
|
||||
|
||||
# ── Spark Intelligence ────────────────────────────────────────────────
|
||||
# Enable/disable the Spark cognitive layer.
|
||||
# When enabled, Spark captures swarm events, runs EIDOS predictions,
|
||||
|
||||
@@ -35,6 +35,7 @@ from dashboard.routes.scripture import router as scripture_router
|
||||
from dashboard.routes.self_coding import router as self_coding_router
|
||||
from dashboard.routes.self_coding import self_modify_router
|
||||
from dashboard.routes.hands import router as hands_router
|
||||
from dashboard.routes.grok import router as grok_router
|
||||
from infrastructure.router.api import router as cascade_router
|
||||
|
||||
logging.basicConfig(
|
||||
@@ -206,6 +207,7 @@ app.include_router(work_orders_router)
|
||||
app.include_router(tasks_router)
|
||||
app.include_router(scripture_router)
|
||||
app.include_router(hands_router)
|
||||
app.include_router(grok_router)
|
||||
app.include_router(cascade_router)
|
||||
|
||||
|
||||
|
||||
234
src/dashboard/routes/grok.py
Normal file
234
src/dashboard/routes/grok.py
Normal file
@@ -0,0 +1,234 @@
|
||||
"""Grok (xAI) dashboard routes — premium cloud augmentation controls.
|
||||
|
||||
Endpoints
|
||||
---------
|
||||
GET /grok/status — JSON status (API)
|
||||
POST /grok/toggle — Enable/disable Grok Mode (HTMX)
|
||||
POST /grok/chat — Direct Grok query (HTMX)
|
||||
GET /grok/stats — Usage statistics (JSON)
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Form, Request
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
from fastapi.templating import Jinja2Templates
|
||||
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/grok", tags=["grok"])
|
||||
templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))
|
||||
|
||||
# In-memory toggle state (persists per process lifetime)
|
||||
_grok_mode_active: bool = False
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
async def grok_status():
|
||||
"""Return Grok backend status as JSON."""
|
||||
from timmy.backends import grok_available
|
||||
|
||||
status = {
|
||||
"enabled": settings.grok_enabled,
|
||||
"available": grok_available(),
|
||||
"active": _grok_mode_active,
|
||||
"model": settings.grok_default_model,
|
||||
"free_mode": settings.grok_free,
|
||||
"max_sats_per_query": settings.grok_max_sats_per_query,
|
||||
"api_key_set": bool(settings.xai_api_key),
|
||||
}
|
||||
|
||||
# Include usage stats if backend exists
|
||||
try:
|
||||
from timmy.backends import get_grok_backend
|
||||
backend = get_grok_backend()
|
||||
status["stats"] = {
|
||||
"total_requests": backend.stats.total_requests,
|
||||
"total_prompt_tokens": backend.stats.total_prompt_tokens,
|
||||
"total_completion_tokens": backend.stats.total_completion_tokens,
|
||||
"estimated_cost_sats": backend.stats.estimated_cost_sats,
|
||||
"errors": backend.stats.errors,
|
||||
}
|
||||
except Exception:
|
||||
status["stats"] = None
|
||||
|
||||
return status
|
||||
|
||||
|
||||
@router.post("/toggle")
|
||||
async def toggle_grok_mode(request: Request):
|
||||
"""Toggle Grok Mode on/off. Returns HTMX partial for the toggle card."""
|
||||
global _grok_mode_active
|
||||
|
||||
from timmy.backends import grok_available
|
||||
|
||||
if not grok_available():
|
||||
return HTMLResponse(
|
||||
'<div class="alert" style="color: var(--danger);">'
|
||||
"Grok unavailable — set GROK_ENABLED=true and XAI_API_KEY in .env"
|
||||
"</div>",
|
||||
status_code=200,
|
||||
)
|
||||
|
||||
_grok_mode_active = not _grok_mode_active
|
||||
state = "ACTIVE" if _grok_mode_active else "STANDBY"
|
||||
|
||||
logger.info("Grok Mode toggled: %s", state)
|
||||
|
||||
# Log to Spark
|
||||
try:
|
||||
from spark.engine import spark_engine
|
||||
import json
|
||||
|
||||
spark_engine.on_tool_executed(
|
||||
agent_id="timmy",
|
||||
tool_name="grok_mode_toggle",
|
||||
success=True,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return HTMLResponse(
|
||||
_render_toggle_card(_grok_mode_active),
|
||||
status_code=200,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/chat", response_class=HTMLResponse)
|
||||
async def grok_chat(request: Request, message: str = Form(...)):
|
||||
"""Send a message directly to Grok and return HTMX chat partial."""
|
||||
from timmy.backends import grok_available, get_grok_backend
|
||||
from dashboard.store import message_log
|
||||
from datetime import datetime
|
||||
|
||||
timestamp = datetime.now().strftime("%H:%M:%S")
|
||||
|
||||
if not grok_available():
|
||||
error = "Grok is not available. Set GROK_ENABLED=true and XAI_API_KEY."
|
||||
message_log.append(role="user", content=f"[Grok] {message}", timestamp=timestamp)
|
||||
message_log.append(role="error", content=error, timestamp=timestamp)
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"partials/chat_message.html",
|
||||
{
|
||||
"user_message": f"[Grok] {message}",
|
||||
"response": None,
|
||||
"error": error,
|
||||
"timestamp": timestamp,
|
||||
},
|
||||
)
|
||||
|
||||
backend = get_grok_backend()
|
||||
|
||||
# Generate invoice if monetization is active
|
||||
invoice_note = ""
|
||||
if not settings.grok_free:
|
||||
try:
|
||||
from lightning.factory import get_backend as get_ln_backend
|
||||
|
||||
ln = get_ln_backend()
|
||||
sats = min(settings.grok_max_sats_per_query, 100)
|
||||
inv = ln.create_invoice(sats, f"Grok: {message[:50]}")
|
||||
invoice_note = f" | {sats} sats"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
result = backend.run(message)
|
||||
response_text = f"**[Grok]{invoice_note}:** {result.content}"
|
||||
except Exception as exc:
|
||||
response_text = None
|
||||
error = f"Grok error: {exc}"
|
||||
|
||||
message_log.append(
|
||||
role="user", content=f"[Ask Grok] {message}", timestamp=timestamp
|
||||
)
|
||||
if response_text:
|
||||
message_log.append(role="agent", content=response_text, timestamp=timestamp)
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"partials/chat_message.html",
|
||||
{
|
||||
"user_message": f"[Ask Grok] {message}",
|
||||
"response": response_text,
|
||||
"error": None,
|
||||
"timestamp": timestamp,
|
||||
},
|
||||
)
|
||||
else:
|
||||
message_log.append(role="error", content=error, timestamp=timestamp)
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"partials/chat_message.html",
|
||||
{
|
||||
"user_message": f"[Ask Grok] {message}",
|
||||
"response": None,
|
||||
"error": error,
|
||||
"timestamp": timestamp,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def grok_stats():
|
||||
"""Return detailed Grok usage statistics."""
|
||||
try:
|
||||
from timmy.backends import get_grok_backend
|
||||
|
||||
backend = get_grok_backend()
|
||||
return {
|
||||
"total_requests": backend.stats.total_requests,
|
||||
"total_prompt_tokens": backend.stats.total_prompt_tokens,
|
||||
"total_completion_tokens": backend.stats.total_completion_tokens,
|
||||
"total_latency_ms": round(backend.stats.total_latency_ms, 2),
|
||||
"avg_latency_ms": round(
|
||||
backend.stats.total_latency_ms / max(backend.stats.total_requests, 1),
|
||||
2,
|
||||
),
|
||||
"estimated_cost_sats": backend.stats.estimated_cost_sats,
|
||||
"errors": backend.stats.errors,
|
||||
"model": settings.grok_default_model,
|
||||
}
|
||||
except Exception as exc:
|
||||
return {"error": str(exc)}
|
||||
|
||||
|
||||
def _render_toggle_card(active: bool) -> str:
|
||||
"""Render the Grok Mode toggle card HTML."""
|
||||
color = "#00ff88" if active else "#666"
|
||||
state = "ACTIVE" if active else "STANDBY"
|
||||
glow = "0 0 20px rgba(0, 255, 136, 0.4)" if active else "none"
|
||||
|
||||
return f"""
|
||||
<div id="grok-toggle-card"
|
||||
style="border: 2px solid {color}; border-radius: 12px; padding: 16px;
|
||||
background: var(--bg-secondary); box-shadow: {glow};
|
||||
transition: all 0.3s ease;">
|
||||
<div style="display: flex; justify-content: space-between; align-items: center;">
|
||||
<div>
|
||||
<div style="font-weight: 700; font-size: 1.1rem; color: {color};">
|
||||
GROK MODE: {state}
|
||||
</div>
|
||||
<div style="font-size: 0.8rem; color: var(--text-muted); margin-top: 4px;">
|
||||
xAI frontier reasoning | {settings.grok_default_model}
|
||||
</div>
|
||||
</div>
|
||||
<button hx-post="/grok/toggle"
|
||||
hx-target="#grok-toggle-card"
|
||||
hx-swap="outerHTML"
|
||||
style="background: {color}; color: #000; border: none;
|
||||
border-radius: 8px; padding: 8px 20px; cursor: pointer;
|
||||
font-weight: 700; font-family: inherit;">
|
||||
{'DEACTIVATE' if active else 'ACTIVATE'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
def is_grok_mode_active() -> bool:
|
||||
"""Check if Grok Mode is currently active (used by other modules)."""
|
||||
return _grok_mode_active
|
||||
@@ -39,6 +39,7 @@
|
||||
<a href="/lightning/ledger" class="mc-test-link">LEDGER</a>
|
||||
<a href="/memory" class="mc-test-link">MEMORY</a>
|
||||
<a href="/router/status" class="mc-test-link">ROUTER</a>
|
||||
<a href="/grok/status" class="mc-test-link" style="color:#00ff88;">GROK</a>
|
||||
<a href="/self-modify/queue" class="mc-test-link">UPGRADES</a>
|
||||
<a href="/self-coding" class="mc-test-link">SELF-CODING</a>
|
||||
<a href="/hands" class="mc-test-link">HANDS</a>
|
||||
|
||||
@@ -59,10 +59,61 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Grok Mode Toggle -->
|
||||
<div class="card" style="margin-top: 24px;">
|
||||
<div class="card-header">
|
||||
<h2 class="card-title">Grok Mode</h2>
|
||||
<div>
|
||||
<span class="badge" id="grok-badge" style="background: #666;">STANDBY</span>
|
||||
</div>
|
||||
</div>
|
||||
<div id="grok-toggle-card"
|
||||
hx-get="/grok/status"
|
||||
hx-trigger="load"
|
||||
hx-target="#grok-toggle-card"
|
||||
hx-swap="innerHTML">
|
||||
<div style="border: 2px solid #666; border-radius: 12px; padding: 16px;
|
||||
background: var(--bg-secondary);">
|
||||
<div style="display: flex; justify-content: space-between; align-items: center;">
|
||||
<div>
|
||||
<div style="font-weight: 700; font-size: 1.1rem; color: #666;">
|
||||
GROK MODE: LOADING...
|
||||
</div>
|
||||
<div style="font-size: 0.8rem; color: var(--text-muted); margin-top: 4px;">
|
||||
xAI frontier reasoning augmentation
|
||||
</div>
|
||||
</div>
|
||||
<button hx-post="/grok/toggle"
|
||||
hx-target="#grok-toggle-card"
|
||||
hx-swap="outerHTML"
|
||||
style="background: #666; color: #000; border: none;
|
||||
border-radius: 8px; padding: 8px 20px; cursor: pointer;
|
||||
font-weight: 700; font-family: inherit;">
|
||||
ACTIVATE
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="grid grid-3" style="margin-top: 12px;">
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="grok-requests">0</div>
|
||||
<div class="stat-label">Grok Queries</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="grok-tokens">0</div>
|
||||
<div class="stat-label">Tokens Used</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="grok-cost">0</div>
|
||||
<div class="stat-label">Est. Cost (sats)</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Heartbeat Monitor -->
|
||||
<div class="card" style="margin-top: 24px;">
|
||||
<div class="card-header">
|
||||
<h2 class="card-title">💓 Heartbeat Monitor</h2>
|
||||
<h2 class="card-title">Heartbeat Monitor</h2>
|
||||
<div>
|
||||
<span class="badge" id="heartbeat-status">Checking...</span>
|
||||
</div>
|
||||
@@ -318,11 +369,40 @@ async function loadChatHistory() {
|
||||
}
|
||||
}
|
||||
|
||||
// Load Grok stats
|
||||
async function loadGrokStats() {
|
||||
try {
|
||||
const response = await fetch('/grok/status');
|
||||
const data = await response.json();
|
||||
|
||||
if (data.stats) {
|
||||
document.getElementById('grok-requests').textContent = data.stats.total_requests || 0;
|
||||
document.getElementById('grok-tokens').textContent =
|
||||
(data.stats.total_prompt_tokens || 0) + (data.stats.total_completion_tokens || 0);
|
||||
document.getElementById('grok-cost').textContent = data.stats.estimated_cost_sats || 0;
|
||||
}
|
||||
|
||||
const badge = document.getElementById('grok-badge');
|
||||
if (data.active) {
|
||||
badge.textContent = 'ACTIVE';
|
||||
badge.style.background = '#00ff88';
|
||||
badge.style.color = '#000';
|
||||
} else {
|
||||
badge.textContent = 'STANDBY';
|
||||
badge.style.background = '#666';
|
||||
badge.style.color = '#fff';
|
||||
}
|
||||
} catch (error) {
|
||||
// Grok endpoint may not respond — silent fallback
|
||||
}
|
||||
}
|
||||
|
||||
// Initial load
|
||||
loadSovereignty();
|
||||
loadHealth();
|
||||
loadSwarmStats();
|
||||
loadLightningStats();
|
||||
loadGrokStats();
|
||||
loadChatHistory();
|
||||
|
||||
// Periodic updates
|
||||
@@ -330,5 +410,6 @@ setInterval(loadSovereignty, 30000); // Every 30s
|
||||
setInterval(loadHealth, 10000); // Every 10s
|
||||
setInterval(loadSwarmStats, 5000); // Every 5s
|
||||
setInterval(updateHeartbeat, 5000); // Heartbeat every 5s
|
||||
setInterval(loadGrokStats, 10000); // Grok stats every 10s
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
||||
@@ -30,7 +30,8 @@
|
||||
hx-disabled-elt="find button"
|
||||
hx-on::after-settle="scrollChat()"
|
||||
hx-on::after-request="if(event.detail.successful){this.querySelector('[name=message]').value='';}"
|
||||
class="d-flex gap-2">
|
||||
class="d-flex gap-2"
|
||||
id="timmy-chat-form">
|
||||
<input type="text"
|
||||
name="message"
|
||||
class="form-control mc-input"
|
||||
@@ -40,11 +41,20 @@
|
||||
autocapitalize="none"
|
||||
spellcheck="false"
|
||||
enterkeyhint="send"
|
||||
required />
|
||||
required
|
||||
id="timmy-chat-input" />
|
||||
<button type="submit" class="btn mc-btn-send">
|
||||
SEND
|
||||
<span id="send-indicator" class="htmx-indicator">◼</span>
|
||||
</button>
|
||||
<button type="button"
|
||||
class="btn"
|
||||
style="background: #1a1a2e; color: #00ff88; border: 1px solid #00ff88;
|
||||
font-size: 0.7rem; white-space: nowrap; padding: 4px 10px;"
|
||||
onclick="askGrok()"
|
||||
title="Send directly to Grok (xAI)">
|
||||
GROK
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
@@ -61,4 +71,20 @@
|
||||
}
|
||||
}
|
||||
scrollChat();
|
||||
|
||||
function askGrok() {
|
||||
var input = document.getElementById('timmy-chat-input');
|
||||
if (!input || !input.value.trim()) return;
|
||||
var form = document.getElementById('timmy-chat-form');
|
||||
// Temporarily redirect form to Grok endpoint
|
||||
var originalAction = form.getAttribute('hx-post');
|
||||
form.setAttribute('hx-post', '/grok/chat');
|
||||
htmx.process(form);
|
||||
htmx.trigger(form, 'submit');
|
||||
// Restore original action after submission
|
||||
setTimeout(function() {
|
||||
form.setAttribute('hx-post', originalAction);
|
||||
htmx.process(form);
|
||||
}, 100);
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -220,10 +220,10 @@ class CascadeRouter:
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
elif provider.type in ("openai", "anthropic"):
|
||||
elif provider.type in ("openai", "anthropic", "grok"):
|
||||
# Check if API key is set
|
||||
return provider.api_key is not None and provider.api_key != ""
|
||||
|
||||
|
||||
return True
|
||||
|
||||
async def complete(
|
||||
@@ -337,6 +337,14 @@ class CascadeRouter:
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
elif provider.type == "grok":
|
||||
result = await self._call_grok(
|
||||
provider=provider,
|
||||
messages=messages,
|
||||
model=model or provider.get_default_model(),
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown provider type: {provider.type}")
|
||||
|
||||
@@ -455,7 +463,40 @@ class CascadeRouter:
|
||||
"content": response.content[0].text,
|
||||
"model": response.model,
|
||||
}
|
||||
|
||||
|
||||
async def _call_grok(
|
||||
self,
|
||||
provider: Provider,
|
||||
messages: list[dict],
|
||||
model: str,
|
||||
temperature: float,
|
||||
max_tokens: Optional[int],
|
||||
) -> dict:
|
||||
"""Call xAI Grok API via OpenAI-compatible SDK."""
|
||||
import httpx
|
||||
import openai
|
||||
|
||||
client = openai.AsyncOpenAI(
|
||||
api_key=provider.api_key,
|
||||
base_url=provider.base_url or "https://api.x.ai/v1",
|
||||
timeout=httpx.Timeout(300.0),
|
||||
)
|
||||
|
||||
kwargs = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
}
|
||||
if max_tokens:
|
||||
kwargs["max_tokens"] = max_tokens
|
||||
|
||||
response = await client.chat.completions.create(**kwargs)
|
||||
|
||||
return {
|
||||
"content": response.choices[0].message.content,
|
||||
"model": response.model,
|
||||
}
|
||||
|
||||
def _record_success(self, provider: Provider, latency_ms: float) -> None:
|
||||
"""Record a successful request."""
|
||||
provider.metrics.total_requests += 1
|
||||
|
||||
@@ -20,12 +20,12 @@ from timmy.prompts import get_system_prompt
|
||||
from timmy.tools import create_full_toolkit
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
from timmy.backends import GrokBackend, TimmyAirLLMAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Union type for callers that want to hint the return type.
|
||||
TimmyAgent = Union[Agent, "TimmyAirLLMAgent"]
|
||||
TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend"]
|
||||
|
||||
# Models known to be too small for reliable tool calling.
|
||||
# These hallucinate tool calls as text, invoke tools randomly,
|
||||
@@ -68,12 +68,12 @@ def _resolve_backend(requested: str | None) -> str:
|
||||
if requested is not None:
|
||||
return requested
|
||||
|
||||
configured = settings.timmy_model_backend # "ollama" | "airllm" | "auto"
|
||||
configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "auto"
|
||||
if configured != "auto":
|
||||
return configured
|
||||
|
||||
# "auto" path — lazy import to keep startup fast and tests clean.
|
||||
from timmy.backends import airllm_available, is_apple_silicon
|
||||
from timmy.backends import airllm_available, grok_available, is_apple_silicon
|
||||
if is_apple_silicon() and airllm_available():
|
||||
return "airllm"
|
||||
return "ollama"
|
||||
@@ -97,6 +97,10 @@ def create_timmy(
|
||||
resolved = _resolve_backend(backend)
|
||||
size = model_size or settings.airllm_model_size
|
||||
|
||||
if resolved == "grok":
|
||||
from timmy.backends import GrokBackend
|
||||
return GrokBackend()
|
||||
|
||||
if resolved == "airllm":
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
return TimmyAirLLMAgent(model_size=size)
|
||||
|
||||
@@ -1,20 +1,26 @@
|
||||
"""AirLLM backend — only imported when the airllm extra is installed.
|
||||
"""LLM backends — AirLLM (local big models) and Grok (xAI premium cloud).
|
||||
|
||||
Provides TimmyAirLLMAgent: a drop-in replacement for an Agno Agent that
|
||||
exposes both the run(message, stream) → RunResult interface used by the
|
||||
dashboard and the print_response(message, stream) interface used by the CLI.
|
||||
On Apple Silicon (arm64 Darwin) the MLX backend is selected automatically;
|
||||
everywhere else AutoModel (PyTorch) is used.
|
||||
Provides drop-in replacements for the Agno Agent that expose the same
|
||||
run(message, stream) → RunResult interface used by the dashboard and the
|
||||
print_response(message, stream) interface used by the CLI.
|
||||
|
||||
No cloud. No telemetry. Sats are sovereignty, boss.
|
||||
Backends:
|
||||
- TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch)
|
||||
- GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium)
|
||||
|
||||
No cloud by default. No telemetry. Sats are sovereignty, boss.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import platform
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Literal, Optional
|
||||
|
||||
from timmy.prompts import TIMMY_SYSTEM_PROMPT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# HuggingFace model IDs for each supported size.
|
||||
_AIRLLM_MODELS: dict[str, str] = {
|
||||
"8b": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
@@ -133,3 +139,281 @@ class TimmyAirLLMAgent:
|
||||
Console().print(Markdown(text))
|
||||
except ImportError:
|
||||
print(text)
|
||||
|
||||
|
||||
# ── Grok (xAI) Backend ─────────────────────────────────────────────────────
|
||||
# Premium cloud augmentation — opt-in only, never the default path.
|
||||
|
||||
# Available Grok models (configurable via GROK_DEFAULT_MODEL)
|
||||
GROK_MODELS: dict[str, str] = {
|
||||
"grok-3-fast": "grok-3-fast",
|
||||
"grok-3": "grok-3",
|
||||
"grok-3-mini": "grok-3-mini",
|
||||
"grok-3-mini-fast": "grok-3-mini-fast",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class GrokUsageStats:
|
||||
"""Tracks Grok API usage for cost monitoring and Spark logging."""
|
||||
total_requests: int = 0
|
||||
total_prompt_tokens: int = 0
|
||||
total_completion_tokens: int = 0
|
||||
total_latency_ms: float = 0.0
|
||||
errors: int = 0
|
||||
last_request_at: Optional[float] = None
|
||||
|
||||
@property
|
||||
def estimated_cost_sats(self) -> int:
|
||||
"""Rough cost estimate in sats based on token usage."""
|
||||
# ~$5/1M input tokens, ~$15/1M output tokens for Grok
|
||||
# At ~$100k/BTC, 1 sat ≈ $0.001
|
||||
input_cost = (self.total_prompt_tokens / 1_000_000) * 5
|
||||
output_cost = (self.total_completion_tokens / 1_000_000) * 15
|
||||
total_usd = input_cost + output_cost
|
||||
return int(total_usd / 0.001) # Convert to sats
|
||||
|
||||
|
||||
class GrokBackend:
|
||||
"""xAI Grok backend — premium cloud augmentation for frontier reasoning.
|
||||
|
||||
Uses the OpenAI-compatible SDK to connect to xAI's API.
|
||||
Only activated when GROK_ENABLED=true and XAI_API_KEY is set.
|
||||
|
||||
Exposes the same interface as TimmyAirLLMAgent and Agno Agent:
|
||||
run(message, stream) → RunResult [dashboard]
|
||||
print_response(message, stream) → None [CLI]
|
||||
health_check() → dict [monitoring]
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> None:
|
||||
from config import settings
|
||||
|
||||
self._api_key = api_key or settings.xai_api_key
|
||||
self._model = model or settings.grok_default_model
|
||||
self._history: list[dict[str, str]] = []
|
||||
self.stats = GrokUsageStats()
|
||||
|
||||
if not self._api_key:
|
||||
logger.warning(
|
||||
"GrokBackend created without XAI_API_KEY — "
|
||||
"calls will fail until key is configured"
|
||||
)
|
||||
|
||||
def _get_client(self):
|
||||
"""Create OpenAI client configured for xAI endpoint."""
|
||||
import httpx
|
||||
from openai import OpenAI
|
||||
|
||||
return OpenAI(
|
||||
api_key=self._api_key,
|
||||
base_url="https://api.x.ai/v1",
|
||||
timeout=httpx.Timeout(300.0),
|
||||
)
|
||||
|
||||
async def _get_async_client(self):
|
||||
"""Create async OpenAI client configured for xAI endpoint."""
|
||||
import httpx
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
return AsyncOpenAI(
|
||||
api_key=self._api_key,
|
||||
base_url="https://api.x.ai/v1",
|
||||
timeout=httpx.Timeout(300.0),
|
||||
)
|
||||
|
||||
# ── Public interface (mirrors Agno Agent) ─────────────────────────────
|
||||
|
||||
def run(self, message: str, *, stream: bool = False) -> RunResult:
|
||||
"""Synchronous inference via Grok API.
|
||||
|
||||
Args:
|
||||
message: User prompt
|
||||
stream: Accepted for API compat; Grok returns full response
|
||||
|
||||
Returns:
|
||||
RunResult with response content
|
||||
"""
|
||||
if not self._api_key:
|
||||
return RunResult(
|
||||
content="Grok is not configured. Set XAI_API_KEY to enable."
|
||||
)
|
||||
|
||||
start = time.time()
|
||||
messages = self._build_messages(message)
|
||||
|
||||
try:
|
||||
client = self._get_client()
|
||||
response = client.chat.completions.create(
|
||||
model=self._model,
|
||||
messages=messages,
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
content = response.choices[0].message.content or ""
|
||||
latency_ms = (time.time() - start) * 1000
|
||||
|
||||
# Track usage
|
||||
self.stats.total_requests += 1
|
||||
self.stats.total_latency_ms += latency_ms
|
||||
self.stats.last_request_at = time.time()
|
||||
if response.usage:
|
||||
self.stats.total_prompt_tokens += response.usage.prompt_tokens
|
||||
self.stats.total_completion_tokens += response.usage.completion_tokens
|
||||
|
||||
# Update conversation history
|
||||
self._history.append({"role": "user", "content": message})
|
||||
self._history.append({"role": "assistant", "content": content})
|
||||
# Keep last 10 turns
|
||||
if len(self._history) > 20:
|
||||
self._history = self._history[-20:]
|
||||
|
||||
logger.info(
|
||||
"Grok response: %d tokens in %.0fms (model=%s)",
|
||||
response.usage.completion_tokens if response.usage else 0,
|
||||
latency_ms,
|
||||
self._model,
|
||||
)
|
||||
|
||||
return RunResult(content=content)
|
||||
|
||||
except Exception as exc:
|
||||
self.stats.errors += 1
|
||||
logger.error("Grok API error: %s", exc)
|
||||
return RunResult(
|
||||
content=f"Grok temporarily unavailable: {exc}"
|
||||
)
|
||||
|
||||
async def arun(self, message: str) -> RunResult:
|
||||
"""Async inference via Grok API — used by cascade router and tools."""
|
||||
if not self._api_key:
|
||||
return RunResult(
|
||||
content="Grok is not configured. Set XAI_API_KEY to enable."
|
||||
)
|
||||
|
||||
start = time.time()
|
||||
messages = self._build_messages(message)
|
||||
|
||||
try:
|
||||
client = await self._get_async_client()
|
||||
response = await client.chat.completions.create(
|
||||
model=self._model,
|
||||
messages=messages,
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
content = response.choices[0].message.content or ""
|
||||
latency_ms = (time.time() - start) * 1000
|
||||
|
||||
# Track usage
|
||||
self.stats.total_requests += 1
|
||||
self.stats.total_latency_ms += latency_ms
|
||||
self.stats.last_request_at = time.time()
|
||||
if response.usage:
|
||||
self.stats.total_prompt_tokens += response.usage.prompt_tokens
|
||||
self.stats.total_completion_tokens += response.usage.completion_tokens
|
||||
|
||||
# Update conversation history
|
||||
self._history.append({"role": "user", "content": message})
|
||||
self._history.append({"role": "assistant", "content": content})
|
||||
if len(self._history) > 20:
|
||||
self._history = self._history[-20:]
|
||||
|
||||
logger.info(
|
||||
"Grok async response: %d tokens in %.0fms (model=%s)",
|
||||
response.usage.completion_tokens if response.usage else 0,
|
||||
latency_ms,
|
||||
self._model,
|
||||
)
|
||||
|
||||
return RunResult(content=content)
|
||||
|
||||
except Exception as exc:
|
||||
self.stats.errors += 1
|
||||
logger.error("Grok async API error: %s", exc)
|
||||
return RunResult(
|
||||
content=f"Grok temporarily unavailable: {exc}"
|
||||
)
|
||||
|
||||
def print_response(self, message: str, *, stream: bool = True) -> None:
|
||||
"""Run inference and render the response to stdout (CLI interface)."""
|
||||
result = self.run(message, stream=stream)
|
||||
try:
|
||||
from rich.console import Console
|
||||
from rich.markdown import Markdown
|
||||
Console().print(Markdown(result.content))
|
||||
except ImportError:
|
||||
print(result.content)
|
||||
|
||||
def health_check(self) -> dict:
|
||||
"""Check Grok API connectivity and return status."""
|
||||
if not self._api_key:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "XAI_API_KEY not configured",
|
||||
"backend": "grok",
|
||||
"model": self._model,
|
||||
}
|
||||
|
||||
try:
|
||||
client = self._get_client()
|
||||
# Lightweight check — list models
|
||||
client.models.list()
|
||||
return {
|
||||
"ok": True,
|
||||
"error": None,
|
||||
"backend": "grok",
|
||||
"model": self._model,
|
||||
"stats": {
|
||||
"total_requests": self.stats.total_requests,
|
||||
"estimated_cost_sats": self.stats.estimated_cost_sats,
|
||||
},
|
||||
}
|
||||
except Exception as exc:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": str(exc),
|
||||
"backend": "grok",
|
||||
"model": self._model,
|
||||
}
|
||||
|
||||
@property
|
||||
def estimated_cost(self) -> int:
|
||||
"""Return estimated cost in sats for all requests so far."""
|
||||
return self.stats.estimated_cost_sats
|
||||
|
||||
# ── Private helpers ───────────────────────────────────────────────────
|
||||
|
||||
def _build_messages(self, message: str) -> list[dict[str, str]]:
|
||||
"""Build the messages array for the API call."""
|
||||
messages = [{"role": "system", "content": TIMMY_SYSTEM_PROMPT}]
|
||||
# Include conversation history for context
|
||||
messages.extend(self._history[-10:])
|
||||
messages.append({"role": "user", "content": message})
|
||||
return messages
|
||||
|
||||
|
||||
# ── Module-level Grok singleton ─────────────────────────────────────────────
|
||||
|
||||
_grok_backend: Optional[GrokBackend] = None
|
||||
|
||||
|
||||
def get_grok_backend() -> GrokBackend:
|
||||
"""Get or create the Grok backend singleton."""
|
||||
global _grok_backend
|
||||
if _grok_backend is None:
|
||||
_grok_backend = GrokBackend()
|
||||
return _grok_backend
|
||||
|
||||
|
||||
def grok_available() -> bool:
|
||||
"""Return True when Grok is enabled and API key is configured."""
|
||||
try:
|
||||
from config import settings
|
||||
return settings.grok_enabled and bool(settings.xai_api_key)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -278,39 +278,104 @@ def create_devops_tools(base_dir: str | Path | None = None):
|
||||
return toolkit
|
||||
|
||||
|
||||
def consult_grok(query: str) -> str:
|
||||
"""Consult Grok (xAI) for frontier reasoning on complex questions.
|
||||
|
||||
Use this tool when a question requires advanced reasoning, real-time
|
||||
knowledge, or capabilities beyond the local model. Grok is a premium
|
||||
cloud backend — use sparingly and only for high-complexity queries.
|
||||
|
||||
Args:
|
||||
query: The question or reasoning task to send to Grok.
|
||||
|
||||
Returns:
|
||||
Grok's response text, or an error/status message.
|
||||
"""
|
||||
from config import settings
|
||||
from timmy.backends import grok_available, get_grok_backend
|
||||
|
||||
if not grok_available():
|
||||
return (
|
||||
"Grok is not available. Enable with GROK_ENABLED=true "
|
||||
"and set XAI_API_KEY in your .env file."
|
||||
)
|
||||
|
||||
backend = get_grok_backend()
|
||||
|
||||
# Log to Spark if available
|
||||
try:
|
||||
from spark.engine import spark_engine
|
||||
spark_engine.on_tool_executed(
|
||||
agent_id="timmy",
|
||||
tool_name="consult_grok",
|
||||
success=True,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Generate Lightning invoice for monetization (unless free mode)
|
||||
invoice_info = ""
|
||||
if not settings.grok_free:
|
||||
try:
|
||||
from lightning.factory import get_backend as get_ln_backend
|
||||
ln = get_ln_backend()
|
||||
sats = min(settings.grok_max_sats_per_query, 100)
|
||||
inv = ln.create_invoice(sats, f"Grok query: {query[:50]}")
|
||||
invoice_info = f"\n[Lightning invoice: {sats} sats — {inv.payment_request[:40]}...]"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
result = backend.run(query)
|
||||
|
||||
response = result.content
|
||||
if invoice_info:
|
||||
response += invoice_info
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def create_full_toolkit(base_dir: str | Path | None = None):
|
||||
"""Create a full toolkit with all available tools (for Timmy).
|
||||
|
||||
|
||||
Includes: web search, file read/write, shell commands, python execution,
|
||||
and memory search for contextual recall.
|
||||
memory search for contextual recall, and Grok consultation.
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
# Return None when tools aren't available (tests)
|
||||
return None
|
||||
toolkit = Toolkit(name="full")
|
||||
|
||||
|
||||
# Web search
|
||||
search_tools = DuckDuckGoTools()
|
||||
toolkit.register(search_tools.web_search, name="web_search")
|
||||
|
||||
|
||||
# Python execution
|
||||
python_tools = PythonTools()
|
||||
toolkit.register(python_tools.run_python_code, name="python")
|
||||
|
||||
|
||||
# Shell commands
|
||||
shell_tools = ShellTools()
|
||||
toolkit.register(shell_tools.run_shell_command, name="shell")
|
||||
|
||||
|
||||
# File operations
|
||||
base_path = Path(base_dir) if base_dir else Path.cwd()
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(file_tools.read_file, name="read_file")
|
||||
toolkit.register(file_tools.save_file, name="write_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
|
||||
# Calculator — exact arithmetic (never let the LLM guess)
|
||||
toolkit.register(calculator, name="calculator")
|
||||
|
||||
# Grok consultation — premium frontier reasoning (opt-in)
|
||||
try:
|
||||
from timmy.backends import grok_available
|
||||
if grok_available():
|
||||
toolkit.register(consult_grok, name="consult_grok")
|
||||
logger.info("Grok consultation tool registered")
|
||||
except Exception:
|
||||
logger.debug("Grok tool not available")
|
||||
|
||||
# Memory search - semantic recall
|
||||
try:
|
||||
from timmy.semantic_memory import memory_search
|
||||
@@ -407,6 +472,11 @@ def get_all_available_tools() -> dict[str, dict]:
|
||||
"description": "Evaluate mathematical expressions with exact results",
|
||||
"available_in": ["timmy"],
|
||||
},
|
||||
"consult_grok": {
|
||||
"name": "Consult Grok",
|
||||
"description": "Premium frontier reasoning via xAI Grok (opt-in, Lightning-payable)",
|
||||
"available_in": ["timmy"],
|
||||
},
|
||||
}
|
||||
|
||||
# ── Git tools ─────────────────────────────────────────────────────────────
|
||||
|
||||
284
tests/timmy/test_grok_backend.py
Normal file
284
tests/timmy/test_grok_backend.py
Normal file
@@ -0,0 +1,284 @@
|
||||
"""Tests for GrokBackend in src/timmy/backends.py and Grok dashboard routes."""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ── grok_available ───────────────────────────────────────────────────────────
|
||||
|
||||
def test_grok_available_false_when_disabled():
|
||||
"""Grok not available when GROK_ENABLED is false."""
|
||||
with patch("config.settings") as mock_settings:
|
||||
mock_settings.grok_enabled = False
|
||||
mock_settings.xai_api_key = "xai-test-key"
|
||||
from timmy.backends import grok_available
|
||||
assert grok_available() is False
|
||||
|
||||
|
||||
def test_grok_available_false_when_no_key():
|
||||
"""Grok not available when XAI_API_KEY is empty."""
|
||||
with patch("config.settings") as mock_settings:
|
||||
mock_settings.grok_enabled = True
|
||||
mock_settings.xai_api_key = ""
|
||||
from timmy.backends import grok_available
|
||||
assert grok_available() is False
|
||||
|
||||
|
||||
def test_grok_available_true_when_enabled_and_key_set():
|
||||
"""Grok available when both enabled and key are set."""
|
||||
with patch("config.settings") as mock_settings:
|
||||
mock_settings.grok_enabled = True
|
||||
mock_settings.xai_api_key = "xai-test-key"
|
||||
from timmy.backends import grok_available
|
||||
assert grok_available() is True
|
||||
|
||||
|
||||
# ── GrokBackend construction ────────────────────────────────────────────────
|
||||
|
||||
def test_grok_backend_init_with_explicit_params():
|
||||
"""GrokBackend can be created with explicit api_key and model."""
|
||||
from timmy.backends import GrokBackend
|
||||
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
|
||||
assert backend._api_key == "xai-test"
|
||||
assert backend._model == "grok-3-fast"
|
||||
assert backend.stats.total_requests == 0
|
||||
|
||||
|
||||
def test_grok_backend_init_from_settings():
|
||||
"""GrokBackend reads from config.settings when no params given."""
|
||||
with patch("config.settings") as mock_settings:
|
||||
mock_settings.xai_api_key = "xai-from-env"
|
||||
mock_settings.grok_default_model = "grok-3"
|
||||
from timmy.backends import GrokBackend
|
||||
backend = GrokBackend()
|
||||
assert backend._api_key == "xai-from-env"
|
||||
assert backend._model == "grok-3"
|
||||
|
||||
|
||||
def test_grok_backend_run_no_key_returns_error():
|
||||
"""run() gracefully returns error message when no API key."""
|
||||
from timmy.backends import GrokBackend
|
||||
backend = GrokBackend(api_key="", model="grok-3-fast")
|
||||
result = backend.run("hello")
|
||||
assert "not configured" in result.content
|
||||
|
||||
|
||||
def test_grok_backend_run_success():
|
||||
"""run() returns content from the API on success."""
|
||||
from timmy.backends import GrokBackend
|
||||
|
||||
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "Grok says hello"
|
||||
mock_response.usage = MagicMock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
mock_response.model = "grok-3-fast"
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
with patch.object(backend, "_get_client", return_value=mock_client):
|
||||
result = backend.run("hello")
|
||||
|
||||
assert result.content == "Grok says hello"
|
||||
assert backend.stats.total_requests == 1
|
||||
assert backend.stats.total_prompt_tokens == 10
|
||||
assert backend.stats.total_completion_tokens == 5
|
||||
|
||||
|
||||
def test_grok_backend_run_api_error():
|
||||
"""run() returns error message on API failure."""
|
||||
from timmy.backends import GrokBackend
|
||||
|
||||
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create.side_effect = Exception("API timeout")
|
||||
|
||||
with patch.object(backend, "_get_client", return_value=mock_client):
|
||||
result = backend.run("hello")
|
||||
|
||||
assert "unavailable" in result.content
|
||||
assert backend.stats.errors == 1
|
||||
|
||||
|
||||
def test_grok_backend_history_management():
|
||||
"""GrokBackend maintains conversation history."""
|
||||
from timmy.backends import GrokBackend
|
||||
|
||||
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "response"
|
||||
mock_response.usage = MagicMock()
|
||||
mock_response.usage.prompt_tokens = 10
|
||||
mock_response.usage.completion_tokens = 5
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
with patch.object(backend, "_get_client", return_value=mock_client):
|
||||
backend.run("first message")
|
||||
backend.run("second message")
|
||||
|
||||
assert len(backend._history) == 4 # 2 user + 2 assistant
|
||||
assert backend._history[0]["role"] == "user"
|
||||
assert backend._history[1]["role"] == "assistant"
|
||||
|
||||
|
||||
def test_grok_backend_health_check_no_key():
|
||||
"""health_check() returns not-ok when no API key."""
|
||||
from timmy.backends import GrokBackend
|
||||
|
||||
backend = GrokBackend(api_key="", model="grok-3-fast")
|
||||
health = backend.health_check()
|
||||
assert health["ok"] is False
|
||||
assert "not configured" in health["error"]
|
||||
|
||||
|
||||
def test_grok_backend_health_check_success():
|
||||
"""health_check() returns ok when API key is set and models endpoint works."""
|
||||
from timmy.backends import GrokBackend
|
||||
|
||||
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.models.list.return_value = []
|
||||
|
||||
with patch.object(backend, "_get_client", return_value=mock_client):
|
||||
health = backend.health_check()
|
||||
|
||||
assert health["ok"] is True
|
||||
assert health["backend"] == "grok"
|
||||
|
||||
|
||||
def test_grok_backend_estimated_cost():
|
||||
"""estimated_cost property calculates sats from token usage."""
|
||||
from timmy.backends import GrokUsageStats
|
||||
|
||||
stats = GrokUsageStats(
|
||||
total_prompt_tokens=1_000_000,
|
||||
total_completion_tokens=500_000,
|
||||
)
|
||||
# Input: 1M tokens * $5/1M = $5
|
||||
# Output: 500K tokens * $15/1M = $7.50
|
||||
# Total: $12.50 / $0.001 = 12,500 sats
|
||||
assert stats.estimated_cost_sats == 12500
|
||||
|
||||
|
||||
def test_grok_backend_build_messages():
|
||||
"""_build_messages includes system prompt and history."""
|
||||
from timmy.backends import GrokBackend
|
||||
|
||||
backend = GrokBackend(api_key="xai-test", model="grok-3-fast")
|
||||
backend._history = [
|
||||
{"role": "user", "content": "previous"},
|
||||
{"role": "assistant", "content": "yes"},
|
||||
]
|
||||
|
||||
messages = backend._build_messages("new question")
|
||||
assert messages[0]["role"] == "system"
|
||||
assert messages[1]["role"] == "user"
|
||||
assert messages[1]["content"] == "previous"
|
||||
assert messages[-1]["role"] == "user"
|
||||
assert messages[-1]["content"] == "new question"
|
||||
|
||||
|
||||
# ── get_grok_backend singleton ──────────────────────────────────────────────
|
||||
|
||||
def test_get_grok_backend_returns_singleton():
|
||||
"""get_grok_backend returns the same instance on repeated calls."""
|
||||
import timmy.backends as backends_mod
|
||||
|
||||
# Reset singleton
|
||||
backends_mod._grok_backend = None
|
||||
|
||||
b1 = backends_mod.get_grok_backend()
|
||||
b2 = backends_mod.get_grok_backend()
|
||||
assert b1 is b2
|
||||
|
||||
# Cleanup
|
||||
backends_mod._grok_backend = None
|
||||
|
||||
|
||||
# ── GROK_MODELS constant ───────────────────────────────────────────────────
|
||||
|
||||
def test_grok_models_dict_has_expected_entries():
|
||||
from timmy.backends import GROK_MODELS
|
||||
assert "grok-3-fast" in GROK_MODELS
|
||||
assert "grok-3" in GROK_MODELS
|
||||
|
||||
|
||||
# ── consult_grok tool ──────────────────────────────────────────────────────
|
||||
|
||||
def test_consult_grok_returns_unavailable_when_disabled():
|
||||
"""consult_grok tool returns error when Grok is not available."""
|
||||
with patch("timmy.backends.grok_available", return_value=False):
|
||||
from timmy.tools import consult_grok
|
||||
result = consult_grok("test query")
|
||||
assert "not available" in result
|
||||
|
||||
|
||||
def test_consult_grok_calls_backend_when_available():
|
||||
"""consult_grok tool calls the Grok backend when available."""
|
||||
from timmy.backends import RunResult
|
||||
|
||||
mock_backend = MagicMock()
|
||||
mock_backend.run.return_value = RunResult(content="Grok answer")
|
||||
mock_backend.stats = MagicMock()
|
||||
mock_backend.stats.total_latency_ms = 100
|
||||
|
||||
with patch("timmy.backends.grok_available", return_value=True), \
|
||||
patch("timmy.backends.get_grok_backend", return_value=mock_backend), \
|
||||
patch("config.settings") as mock_settings:
|
||||
mock_settings.grok_free = True
|
||||
mock_settings.grok_enabled = True
|
||||
mock_settings.xai_api_key = "xai-test"
|
||||
from timmy.tools import consult_grok
|
||||
result = consult_grok("complex question")
|
||||
|
||||
assert "Grok answer" in result
|
||||
mock_backend.run.assert_called_once_with("complex question")
|
||||
|
||||
|
||||
# ── Grok dashboard route tests ─────────────────────────────────────────────
|
||||
|
||||
def test_grok_status_endpoint(client):
|
||||
"""GET /grok/status returns JSON with Grok configuration."""
|
||||
response = client.get("/grok/status")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "enabled" in data
|
||||
assert "available" in data
|
||||
assert "model" in data
|
||||
assert "api_key_set" in data
|
||||
|
||||
|
||||
def test_grok_toggle_returns_html(client):
|
||||
"""POST /grok/toggle returns HTML response."""
|
||||
response = client.post("/grok/toggle")
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
def test_grok_stats_endpoint(client):
|
||||
"""GET /grok/stats returns usage statistics."""
|
||||
response = client.get("/grok/stats")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "total_requests" in data or "error" in data
|
||||
|
||||
|
||||
def test_grok_chat_without_key(client):
|
||||
"""POST /grok/chat returns error when Grok is not available."""
|
||||
response = client.post(
|
||||
"/grok/chat",
|
||||
data={"message": "test query"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
# Should contain error since GROK_ENABLED is false in test mode
|
||||
assert "not available" in response.text.lower() or "error" in response.text.lower() or "grok" in response.text.lower()
|
||||
Reference in New Issue
Block a user