Compare commits
1 Commits
feat/dynam
...
claude/iss
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed0ba7f5d8 |
722
nexus/bilbo_harness.py
Normal file
722
nexus/bilbo_harness.py
Normal file
@@ -0,0 +1,722 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bilbo Harness — Light-Duty Gateway backed by local Gemma 4B (Ollama)
|
||||
|
||||
Bilbo's lane: documentation, labelling, tagging, formatting.
|
||||
Free local compute — no API key, no cost, no cloud dependency.
|
||||
|
||||
Architecture:
|
||||
Timmy (sovereign)
|
||||
├── Ezra (harness — Claude Opus 4.6, architecture/triage)
|
||||
├── Bezalel (harness — Claude Opus 4.6, security/forge)
|
||||
├── Allegro (harness — Kimi K2.5, bulk code execution)
|
||||
└── Bilbo (harness — Gemma 4B local, light-duty support) ← this module
|
||||
|
||||
Routing principles:
|
||||
- DO route here: doc stubs, tag/label extraction, README updates, issue formatting
|
||||
- DO NOT route here: security audits, complex reasoning, multi-step refactors
|
||||
|
||||
Ollama must be running locally with the gemma model pulled:
|
||||
ollama pull gemma3:4b (or gemma:4b, gemma2:2b — see BILBO_MODEL env var)
|
||||
ollama serve
|
||||
|
||||
Usage:
|
||||
# Single prompt:
|
||||
python -m nexus.bilbo_harness "Summarise this issue: ..."
|
||||
|
||||
# Serve as HTTP gateway:
|
||||
python -m nexus.bilbo_harness --serve --port 9400
|
||||
|
||||
# Summarise a file:
|
||||
python -m nexus.bilbo_harness --summarise path/to/file.md
|
||||
|
||||
Environment Variables:
|
||||
BILBO_MODEL — Ollama model tag (default: gemma3:4b)
|
||||
OLLAMA_BASE_URL — Ollama HTTP base (default: http://localhost:11434)
|
||||
HERMES_WS_URL — Hermes telemetry WebSocket (default: ws://localhost:8000/ws)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Iterator, Optional, Union
|
||||
|
||||
import requests
|
||||
|
||||
log = logging.getLogger("bilbo")
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [bilbo] %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# CONFIGURATION
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
BILBO_MODEL_DEFAULT = "gemma3:4b"
|
||||
|
||||
# Ollama OpenAI-compatible endpoint (v0.1.24+)
|
||||
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434")
|
||||
OLLAMA_CHAT_URL = f"{OLLAMA_BASE_URL}/v1/chat/completions"
|
||||
OLLAMA_TAGS_URL = f"{OLLAMA_BASE_URL}/api/tags"
|
||||
|
||||
DEFAULT_HERMES_WS_URL = os.environ.get("HERMES_WS_URL", "ws://localhost:8000/ws")
|
||||
HARNESS_ID = "bilbo"
|
||||
HARNESS_NAME = "Bilbo Harness"
|
||||
|
||||
# Light-duty task types Bilbo handles well
|
||||
BILBO_TASK_LANES = ["documentation", "tagging", "labelling", "formatting", "summarisation"]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# DATA CLASSES
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
@dataclass
|
||||
class BilboResponse:
|
||||
"""Response from a Bilbo generate call."""
|
||||
text: str = ""
|
||||
model: str = ""
|
||||
input_tokens: int = 0
|
||||
output_tokens: int = 0
|
||||
latency_ms: float = 0.0
|
||||
error: Optional[str] = None
|
||||
timestamp: str = field(
|
||||
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"text": self.text,
|
||||
"model": self.model,
|
||||
"input_tokens": self.input_tokens,
|
||||
"output_tokens": self.output_tokens,
|
||||
"latency_ms": self.latency_ms,
|
||||
"error": self.error,
|
||||
"timestamp": self.timestamp,
|
||||
}
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# BILBO HARNESS
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
class BilboHarness:
|
||||
"""
|
||||
Bilbo gateway harness — local Gemma 4B via Ollama.
|
||||
|
||||
Handles light-duty tasks: documentation stubs, tag extraction, issue
|
||||
formatting, README updates, label suggestions.
|
||||
|
||||
All calls use the Ollama OpenAI-compatible endpoint so the same
|
||||
request shape works against any future model swap.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: Optional[str] = None,
|
||||
ollama_base_url: str = OLLAMA_BASE_URL,
|
||||
hermes_ws_url: str = DEFAULT_HERMES_WS_URL,
|
||||
):
|
||||
self.model = model or os.environ.get("BILBO_MODEL", BILBO_MODEL_DEFAULT)
|
||||
self.ollama_base_url = ollama_base_url
|
||||
self.chat_url = f"{ollama_base_url}/v1/chat/completions"
|
||||
self.hermes_ws_url = hermes_ws_url
|
||||
|
||||
# Session bookkeeping
|
||||
self.session_id = str(uuid.uuid4())[:8]
|
||||
self.request_count = 0
|
||||
self.total_input_tokens = 0
|
||||
self.total_output_tokens = 0
|
||||
|
||||
# WebSocket connection (lazy)
|
||||
self._ws = None
|
||||
self._ws_connected = False
|
||||
|
||||
# ═══ LIFECYCLE ═══════════════════════════════════════════════════════
|
||||
|
||||
async def start(self):
|
||||
"""Register harness on the network via Hermes WebSocket."""
|
||||
log.info("=" * 50)
|
||||
log.info(f"{HARNESS_NAME} — STARTING")
|
||||
log.info(f" Session: {self.session_id}")
|
||||
log.info(f" Model: {self.model}")
|
||||
log.info(f" Ollama: {self.ollama_base_url}")
|
||||
log.info(f" Hermes: {self.hermes_ws_url}")
|
||||
log.info(f" Lane: {', '.join(BILBO_TASK_LANES)}")
|
||||
log.info("=" * 50)
|
||||
|
||||
await self._connect_hermes()
|
||||
await self._send_telemetry({
|
||||
"type": "harness_register",
|
||||
"harness_id": HARNESS_ID,
|
||||
"session_id": self.session_id,
|
||||
"model": self.model,
|
||||
"capabilities": BILBO_TASK_LANES,
|
||||
"transport": "ollama-local",
|
||||
})
|
||||
log.info("Bilbo registered on network")
|
||||
|
||||
async def stop(self):
|
||||
"""Deregister and disconnect."""
|
||||
await self._send_telemetry({
|
||||
"type": "harness_deregister",
|
||||
"harness_id": HARNESS_ID,
|
||||
"session_id": self.session_id,
|
||||
"stats": self._session_stats(),
|
||||
})
|
||||
if self._ws:
|
||||
try:
|
||||
await self._ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._ws_connected = False
|
||||
log.info(f"{HARNESS_NAME} stopped. {self._session_stats()}")
|
||||
|
||||
# ═══ HEALTH CHECK ═══════════════════════════════════════════════════
|
||||
|
||||
def check_ollama(self) -> dict:
|
||||
"""
|
||||
Verify Ollama is running and the configured model is available.
|
||||
|
||||
Returns dict with keys: running (bool), model_available (bool),
|
||||
available_models (list[str]), error (str|None).
|
||||
"""
|
||||
try:
|
||||
r = requests.get(f"{self.ollama_base_url}/api/tags", timeout=5)
|
||||
if r.status_code != 200:
|
||||
return {
|
||||
"running": False,
|
||||
"model_available": False,
|
||||
"available_models": [],
|
||||
"error": f"Ollama returned HTTP {r.status_code}",
|
||||
}
|
||||
data = r.json()
|
||||
models = [m["name"] for m in data.get("models", [])]
|
||||
# Match on prefix (gemma3:4b matches gemma3:4b-instruct-q4_0, etc.)
|
||||
model_available = any(
|
||||
m == self.model or m.startswith(self.model.split(":")[0])
|
||||
for m in models
|
||||
)
|
||||
return {
|
||||
"running": True,
|
||||
"model_available": model_available,
|
||||
"available_models": models,
|
||||
"error": None,
|
||||
}
|
||||
except requests.ConnectionError:
|
||||
return {
|
||||
"running": False,
|
||||
"model_available": False,
|
||||
"available_models": [],
|
||||
"error": f"Cannot connect to Ollama at {self.ollama_base_url}",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"running": False,
|
||||
"model_available": False,
|
||||
"available_models": [],
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
# ═══ CORE GENERATION ═════════════════════════════════════════════════
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: Union[str, list[dict]],
|
||||
*,
|
||||
system: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
temperature: float = 0.3,
|
||||
) -> BilboResponse:
|
||||
"""
|
||||
Generate a response from the local Gemma model via Ollama.
|
||||
|
||||
Args:
|
||||
prompt: String prompt or list of message dicts
|
||||
system: Optional system instruction
|
||||
max_tokens: Override default max output tokens (None = Ollama default)
|
||||
temperature: Sampling temperature (default: 0.3 for focused output)
|
||||
|
||||
Returns:
|
||||
BilboResponse with text, token counts, latency
|
||||
"""
|
||||
messages = self._build_messages(prompt, system=system)
|
||||
response = self._call_ollama(
|
||||
messages=messages,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
)
|
||||
self._record(response)
|
||||
return response
|
||||
|
||||
def summarise(self, text: str, max_words: int = 100) -> BilboResponse:
|
||||
"""
|
||||
Summarise text in plain language.
|
||||
|
||||
Args:
|
||||
text: Content to summarise
|
||||
max_words: Target word count for the summary
|
||||
|
||||
Returns:
|
||||
BilboResponse with the summary in .text
|
||||
"""
|
||||
system = (
|
||||
"You are a concise technical writer. "
|
||||
"Summarise the provided text clearly and accurately. "
|
||||
"Use plain language. Avoid jargon. Be brief."
|
||||
)
|
||||
prompt = (
|
||||
f"Summarise the following in approximately {max_words} words:\n\n{text}"
|
||||
)
|
||||
return self.generate(prompt, system=system, temperature=0.2)
|
||||
|
||||
def extract_tags(self, text: str) -> BilboResponse:
|
||||
"""
|
||||
Extract relevant tags/labels from text for issue or doc labelling.
|
||||
|
||||
Returns:
|
||||
BilboResponse where .text contains a comma-separated tag list
|
||||
"""
|
||||
system = (
|
||||
"You are a tagging assistant. "
|
||||
"Given some text, output a comma-separated list of short, lowercase tags "
|
||||
"(3-8 tags). Output ONLY the comma-separated list, nothing else."
|
||||
)
|
||||
prompt = f"Extract tags for:\n\n{text}"
|
||||
return self.generate(prompt, system=system, temperature=0.1, max_tokens=64)
|
||||
|
||||
def format_doc(self, text: str, target_format: str = "markdown") -> BilboResponse:
|
||||
"""
|
||||
Reformat or clean up a documentation snippet.
|
||||
|
||||
Args:
|
||||
text: The raw documentation text
|
||||
target_format: Output format (default: markdown)
|
||||
|
||||
Returns:
|
||||
BilboResponse with the reformatted content in .text
|
||||
"""
|
||||
system = (
|
||||
f"You are a documentation formatter. "
|
||||
f"Reformat the provided text as clean {target_format}. "
|
||||
f"Fix whitespace, headings, and lists. Preserve meaning exactly."
|
||||
)
|
||||
prompt = f"Reformat this documentation:\n\n{text}"
|
||||
return self.generate(prompt, system=system, temperature=0.1)
|
||||
|
||||
def write_doc_stub(self, signature: str, context: str = "") -> BilboResponse:
|
||||
"""
|
||||
Write a documentation stub for a function/class signature.
|
||||
|
||||
Args:
|
||||
signature: Function or class signature string
|
||||
context: Optional surrounding code context
|
||||
|
||||
Returns:
|
||||
BilboResponse with the docstring stub in .text
|
||||
"""
|
||||
system = (
|
||||
"You are a Python docstring writer. "
|
||||
"Write a concise docstring for the given signature. "
|
||||
"Include Args and Returns sections where applicable. "
|
||||
"Output only the docstring, including triple-quotes."
|
||||
)
|
||||
prompt = signature
|
||||
if context:
|
||||
prompt = f"Context:\n{context}\n\nSignature: {signature}"
|
||||
return self.generate(prompt, system=system, temperature=0.2)
|
||||
|
||||
# ═══ INTERNAL: API CALL ══════════════════════════════════════════════
|
||||
|
||||
def _call_ollama(
|
||||
self,
|
||||
messages: list[dict],
|
||||
max_tokens: Optional[int] = None,
|
||||
temperature: float = 0.3,
|
||||
) -> BilboResponse:
|
||||
"""Make a single call to the Ollama OpenAI-compatible endpoint."""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
payload: dict[str, Any] = {
|
||||
"model": self.model,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"options": {"temperature": temperature},
|
||||
}
|
||||
if max_tokens is not None:
|
||||
payload["options"]["num_predict"] = max_tokens
|
||||
|
||||
t0 = time.time()
|
||||
try:
|
||||
r = requests.post(
|
||||
self.chat_url, json=payload, headers=headers, timeout=120
|
||||
)
|
||||
latency_ms = (time.time() - t0) * 1000
|
||||
|
||||
if r.status_code != 200:
|
||||
return BilboResponse(
|
||||
model=self.model,
|
||||
latency_ms=latency_ms,
|
||||
error=f"HTTP {r.status_code}: {r.text[:200]}",
|
||||
)
|
||||
|
||||
data = r.json()
|
||||
choice = data.get("choices", [{}])[0]
|
||||
text = choice.get("message", {}).get("content", "")
|
||||
usage = data.get("usage", {})
|
||||
input_tokens = usage.get("prompt_tokens", 0)
|
||||
output_tokens = usage.get("completion_tokens", 0)
|
||||
|
||||
return BilboResponse(
|
||||
text=text,
|
||||
model=self.model,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
|
||||
except requests.Timeout:
|
||||
return BilboResponse(
|
||||
model=self.model,
|
||||
latency_ms=(time.time() - t0) * 1000,
|
||||
error="Request timed out (120s) — model may still be loading",
|
||||
)
|
||||
except requests.ConnectionError:
|
||||
return BilboResponse(
|
||||
model=self.model,
|
||||
latency_ms=(time.time() - t0) * 1000,
|
||||
error=(
|
||||
f"Cannot connect to Ollama at {self.ollama_base_url}. "
|
||||
"Run: ollama serve"
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
return BilboResponse(
|
||||
model=self.model,
|
||||
latency_ms=(time.time() - t0) * 1000,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
# ═══ INTERNAL: HELPERS ═══════════════════════════════════════════════
|
||||
|
||||
@staticmethod
|
||||
def _build_messages(
|
||||
prompt: Union[str, list[dict]],
|
||||
system: Optional[str] = None,
|
||||
) -> list[dict]:
|
||||
"""Build the messages list for Ollama chat API."""
|
||||
messages: list[dict] = []
|
||||
if system:
|
||||
messages.append({"role": "system", "content": system})
|
||||
if isinstance(prompt, str):
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
else:
|
||||
messages.extend(prompt)
|
||||
return messages
|
||||
|
||||
def _record(self, response: BilboResponse):
|
||||
"""Update session stats and emit telemetry for a completed response."""
|
||||
self.request_count += 1
|
||||
self.total_input_tokens += response.input_tokens
|
||||
self.total_output_tokens += response.output_tokens
|
||||
|
||||
if response.error:
|
||||
log.warning(f"[{response.model}] error: {response.error}")
|
||||
else:
|
||||
log.info(
|
||||
f"[{response.model}] {response.latency_ms:.0f}ms | "
|
||||
f"in={response.input_tokens} out={response.output_tokens}"
|
||||
)
|
||||
|
||||
try:
|
||||
asyncio.get_event_loop().create_task(
|
||||
self._send_telemetry({
|
||||
"type": "bilbo_response",
|
||||
"harness_id": HARNESS_ID,
|
||||
"session_id": self.session_id,
|
||||
"model": response.model,
|
||||
"latency_ms": response.latency_ms,
|
||||
"input_tokens": response.input_tokens,
|
||||
"output_tokens": response.output_tokens,
|
||||
"error": response.error,
|
||||
})
|
||||
)
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
def _session_stats(self) -> dict:
|
||||
return {
|
||||
"session_id": self.session_id,
|
||||
"request_count": self.request_count,
|
||||
"total_input_tokens": self.total_input_tokens,
|
||||
"total_output_tokens": self.total_output_tokens,
|
||||
}
|
||||
|
||||
# ═══ HERMES WEBSOCKET ════════════════════════════════════════════════
|
||||
|
||||
async def _connect_hermes(self):
|
||||
"""Connect to Hermes WebSocket for telemetry."""
|
||||
try:
|
||||
import websockets # type: ignore
|
||||
self._ws = await websockets.connect(self.hermes_ws_url)
|
||||
self._ws_connected = True
|
||||
log.info(f"Connected to Hermes: {self.hermes_ws_url}")
|
||||
except Exception as e:
|
||||
log.warning(f"Hermes connection failed (telemetry disabled): {e}")
|
||||
self._ws_connected = False
|
||||
|
||||
async def _send_telemetry(self, data: dict):
|
||||
"""Send a telemetry event to Hermes."""
|
||||
if not self._ws_connected or not self._ws:
|
||||
return
|
||||
try:
|
||||
await self._ws.send(json.dumps(data))
|
||||
except Exception as e:
|
||||
log.warning(f"Telemetry send failed: {e}")
|
||||
self._ws_connected = False
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# HTTP SERVER — expose harness to the network
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def create_app(harness: BilboHarness):
|
||||
"""
|
||||
Create a minimal HTTP app exposing Bilbo's harness to the network.
|
||||
|
||||
Endpoints:
|
||||
POST /generate — general text generation
|
||||
POST /summarise — summarise provided text
|
||||
POST /extract-tags — extract tags from text
|
||||
POST /format-doc — reformat documentation
|
||||
POST /write-doc-stub — write a docstring stub
|
||||
GET /health — health check (includes Ollama status)
|
||||
GET /status — session stats
|
||||
"""
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
|
||||
class BilboHandler(BaseHTTPRequestHandler):
|
||||
def log_message(self, fmt, *args):
|
||||
log.info(f"HTTP {fmt % args}")
|
||||
|
||||
def _read_body(self) -> dict:
|
||||
length = int(self.headers.get("Content-Length", 0))
|
||||
raw = self.rfile.read(length) if length else b"{}"
|
||||
return json.loads(raw)
|
||||
|
||||
def _send_json(self, data: dict, status: int = 200):
|
||||
body = json.dumps(data).encode()
|
||||
self.send_response(status)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == "/health":
|
||||
ollama_status = harness.check_ollama()
|
||||
self._send_json({
|
||||
"status": "ok" if ollama_status["running"] else "degraded",
|
||||
"harness": HARNESS_ID,
|
||||
"model": harness.model,
|
||||
"ollama": ollama_status,
|
||||
})
|
||||
elif self.path == "/status":
|
||||
self._send_json({
|
||||
**harness._session_stats(),
|
||||
"model": harness.model,
|
||||
"ollama_base_url": harness.ollama_base_url,
|
||||
"lanes": BILBO_TASK_LANES,
|
||||
})
|
||||
else:
|
||||
self._send_json({"error": "Not found"}, 404)
|
||||
|
||||
def do_POST(self):
|
||||
body = self._read_body()
|
||||
|
||||
if self.path == "/generate":
|
||||
prompt = body.get("prompt", "")
|
||||
system = body.get("system")
|
||||
max_tokens = body.get("max_tokens")
|
||||
temperature = float(body.get("temperature", 0.3))
|
||||
response = harness.generate(
|
||||
prompt, system=system, max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
)
|
||||
self._send_json(response.to_dict())
|
||||
|
||||
elif self.path == "/summarise":
|
||||
text = body.get("text", "")
|
||||
max_words = int(body.get("max_words", 100))
|
||||
response = harness.summarise(text, max_words=max_words)
|
||||
self._send_json(response.to_dict())
|
||||
|
||||
elif self.path == "/extract-tags":
|
||||
text = body.get("text", "")
|
||||
response = harness.extract_tags(text)
|
||||
self._send_json(response.to_dict())
|
||||
|
||||
elif self.path == "/format-doc":
|
||||
text = body.get("text", "")
|
||||
target_format = body.get("format", "markdown")
|
||||
response = harness.format_doc(text, target_format=target_format)
|
||||
self._send_json(response.to_dict())
|
||||
|
||||
elif self.path == "/write-doc-stub":
|
||||
signature = body.get("signature", "")
|
||||
context = body.get("context", "")
|
||||
response = harness.write_doc_stub(signature, context=context)
|
||||
self._send_json(response.to_dict())
|
||||
|
||||
else:
|
||||
self._send_json({"error": "Not found"}, 404)
|
||||
|
||||
return HTTPServer, BilboHandler
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# CLI ENTRYPOINT
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async def _async_start(harness: BilboHarness):
|
||||
await harness.start()
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description=f"{HARNESS_NAME} — Bilbo light-duty gateway (Gemma 4B local)",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python -m nexus.bilbo_harness "Write a one-line description of the heartbeat module"
|
||||
python -m nexus.bilbo_harness --summarise path/to/doc.md
|
||||
python -m nexus.bilbo_harness --tags "Python async websocket telemetry harness"
|
||||
python -m nexus.bilbo_harness --serve --port 9400
|
||||
python -m nexus.bilbo_harness --check
|
||||
|
||||
Environment Variables:
|
||||
BILBO_MODEL — Ollama model tag (default: gemma3:4b)
|
||||
OLLAMA_BASE_URL — Ollama HTTP base (default: http://localhost:11434)
|
||||
HERMES_WS_URL — Hermes telemetry endpoint
|
||||
""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"prompt",
|
||||
nargs="?",
|
||||
default=None,
|
||||
help="Prompt to send (omit for --serve or task-specific flags)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default=None,
|
||||
help=f"Ollama model tag (default: {BILBO_MODEL_DEFAULT})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--serve",
|
||||
action="store_true",
|
||||
help="Start HTTP server to expose harness on the network",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=9400,
|
||||
help="HTTP server port (default: 9400)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hermes-ws",
|
||||
default=DEFAULT_HERMES_WS_URL,
|
||||
help=f"Hermes WebSocket URL (default: {DEFAULT_HERMES_WS_URL})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--check",
|
||||
action="store_true",
|
||||
help="Check Ollama status and model availability, then exit",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--summarise",
|
||||
metavar="FILE_OR_TEXT",
|
||||
help="Summarise a file path or inline text",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tags",
|
||||
metavar="TEXT",
|
||||
help="Extract tags from TEXT",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
harness = BilboHarness(
|
||||
model=args.model,
|
||||
hermes_ws_url=args.hermes_ws,
|
||||
)
|
||||
|
||||
if args.check:
|
||||
status = harness.check_ollama()
|
||||
print(json.dumps(status, indent=2))
|
||||
if not status["running"]:
|
||||
print("\n[!] Ollama is not running. Start it with: ollama serve")
|
||||
elif not status["model_available"]:
|
||||
print(
|
||||
f"\n[!] Model '{harness.model}' not found. "
|
||||
f"Pull it with: ollama pull {harness.model}"
|
||||
)
|
||||
else:
|
||||
print(f"\n[OK] Bilbo gateway ready. Model: {harness.model}")
|
||||
return
|
||||
|
||||
if args.serve:
|
||||
asyncio.run(_async_start(harness))
|
||||
HTTPServer, BilboHandler = create_app(harness)
|
||||
server = HTTPServer(("0.0.0.0", args.port), BilboHandler)
|
||||
log.info(f"Bilbo serving on http://0.0.0.0:{args.port}")
|
||||
log.info(
|
||||
"Endpoints: /generate /summarise /extract-tags "
|
||||
"/format-doc /write-doc-stub /health /status"
|
||||
)
|
||||
try:
|
||||
server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
log.info("Shutting down Bilbo gateway")
|
||||
asyncio.run(harness.stop())
|
||||
return
|
||||
|
||||
if args.summarise:
|
||||
import pathlib
|
||||
p = pathlib.Path(args.summarise)
|
||||
text = p.read_text() if p.exists() else args.summarise
|
||||
response = harness.summarise(text)
|
||||
elif args.tags:
|
||||
response = harness.extract_tags(args.tags)
|
||||
elif args.prompt:
|
||||
response = harness.generate(args.prompt)
|
||||
else:
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
if response.error:
|
||||
print(f"ERROR: {response.error}")
|
||||
if "ollama serve" in (response.error or ""):
|
||||
print(
|
||||
"\nStart Ollama with: ollama serve\n"
|
||||
f"Pull the model with: ollama pull {harness.model}"
|
||||
)
|
||||
else:
|
||||
print(response.text)
|
||||
print(
|
||||
f"\n[{response.model}] {response.latency_ms:.0f}ms | "
|
||||
f"tokens: {response.input_tokens}→{response.output_tokens}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user