From ed0ba7f5d8d0591505f7c64da1591d489d4cd40d Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Sat, 4 Apr 2026 15:45:15 -0400 Subject: [PATCH] WIP: Claude Code progress on #825 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated salvage commit — agent session ended (exit 1). Work in progress, may need continuation. --- nexus/bilbo_harness.py | 722 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 722 insertions(+) create mode 100644 nexus/bilbo_harness.py diff --git a/nexus/bilbo_harness.py b/nexus/bilbo_harness.py new file mode 100644 index 0000000..09ee79e --- /dev/null +++ b/nexus/bilbo_harness.py @@ -0,0 +1,722 @@ +#!/usr/bin/env python3 +""" +Bilbo Harness — Light-Duty Gateway backed by local Gemma 4B (Ollama) + +Bilbo's lane: documentation, labelling, tagging, formatting. +Free local compute — no API key, no cost, no cloud dependency. + +Architecture: + Timmy (sovereign) + ├── Ezra (harness — Claude Opus 4.6, architecture/triage) + ├── Bezalel (harness — Claude Opus 4.6, security/forge) + ├── Allegro (harness — Kimi K2.5, bulk code execution) + └── Bilbo (harness — Gemma 4B local, light-duty support) ← this module + +Routing principles: +- DO route here: doc stubs, tag/label extraction, README updates, issue formatting +- DO NOT route here: security audits, complex reasoning, multi-step refactors + +Ollama must be running locally with the gemma model pulled: + ollama pull gemma3:4b (or gemma:4b, gemma2:2b — see BILBO_MODEL env var) + ollama serve + +Usage: + # Single prompt: + python -m nexus.bilbo_harness "Summarise this issue: ..." + + # Serve as HTTP gateway: + python -m nexus.bilbo_harness --serve --port 9400 + + # Summarise a file: + python -m nexus.bilbo_harness --summarise path/to/file.md + +Environment Variables: + BILBO_MODEL — Ollama model tag (default: gemma3:4b) + OLLAMA_BASE_URL — Ollama HTTP base (default: http://localhost:11434) + HERMES_WS_URL — Hermes telemetry WebSocket (default: ws://localhost:8000/ws) +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +import time +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Iterator, Optional, Union + +import requests + +log = logging.getLogger("bilbo") +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [bilbo] %(message)s", + datefmt="%H:%M:%S", +) + +# ═══════════════════════════════════════════════════════════════════════════ +# CONFIGURATION +# ═══════════════════════════════════════════════════════════════════════════ + +BILBO_MODEL_DEFAULT = "gemma3:4b" + +# Ollama OpenAI-compatible endpoint (v0.1.24+) +OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434") +OLLAMA_CHAT_URL = f"{OLLAMA_BASE_URL}/v1/chat/completions" +OLLAMA_TAGS_URL = f"{OLLAMA_BASE_URL}/api/tags" + +DEFAULT_HERMES_WS_URL = os.environ.get("HERMES_WS_URL", "ws://localhost:8000/ws") +HARNESS_ID = "bilbo" +HARNESS_NAME = "Bilbo Harness" + +# Light-duty task types Bilbo handles well +BILBO_TASK_LANES = ["documentation", "tagging", "labelling", "formatting", "summarisation"] + + +# ═══════════════════════════════════════════════════════════════════════════ +# DATA CLASSES +# ═══════════════════════════════════════════════════════════════════════════ + +@dataclass +class BilboResponse: + """Response from a Bilbo generate call.""" + text: str = "" + model: str = "" + input_tokens: int = 0 + output_tokens: int = 0 + latency_ms: float = 0.0 + error: Optional[str] = None + timestamp: str = field( + default_factory=lambda: datetime.now(timezone.utc).isoformat() + ) + + def to_dict(self) -> dict: + return { + "text": self.text, + "model": self.model, + "input_tokens": self.input_tokens, + "output_tokens": self.output_tokens, + "latency_ms": self.latency_ms, + "error": self.error, + "timestamp": self.timestamp, + } + + +# ═══════════════════════════════════════════════════════════════════════════ +# BILBO HARNESS +# ═══════════════════════════════════════════════════════════════════════════ + +class BilboHarness: + """ + Bilbo gateway harness — local Gemma 4B via Ollama. + + Handles light-duty tasks: documentation stubs, tag extraction, issue + formatting, README updates, label suggestions. + + All calls use the Ollama OpenAI-compatible endpoint so the same + request shape works against any future model swap. + """ + + def __init__( + self, + model: Optional[str] = None, + ollama_base_url: str = OLLAMA_BASE_URL, + hermes_ws_url: str = DEFAULT_HERMES_WS_URL, + ): + self.model = model or os.environ.get("BILBO_MODEL", BILBO_MODEL_DEFAULT) + self.ollama_base_url = ollama_base_url + self.chat_url = f"{ollama_base_url}/v1/chat/completions" + self.hermes_ws_url = hermes_ws_url + + # Session bookkeeping + self.session_id = str(uuid.uuid4())[:8] + self.request_count = 0 + self.total_input_tokens = 0 + self.total_output_tokens = 0 + + # WebSocket connection (lazy) + self._ws = None + self._ws_connected = False + + # ═══ LIFECYCLE ═══════════════════════════════════════════════════════ + + async def start(self): + """Register harness on the network via Hermes WebSocket.""" + log.info("=" * 50) + log.info(f"{HARNESS_NAME} — STARTING") + log.info(f" Session: {self.session_id}") + log.info(f" Model: {self.model}") + log.info(f" Ollama: {self.ollama_base_url}") + log.info(f" Hermes: {self.hermes_ws_url}") + log.info(f" Lane: {', '.join(BILBO_TASK_LANES)}") + log.info("=" * 50) + + await self._connect_hermes() + await self._send_telemetry({ + "type": "harness_register", + "harness_id": HARNESS_ID, + "session_id": self.session_id, + "model": self.model, + "capabilities": BILBO_TASK_LANES, + "transport": "ollama-local", + }) + log.info("Bilbo registered on network") + + async def stop(self): + """Deregister and disconnect.""" + await self._send_telemetry({ + "type": "harness_deregister", + "harness_id": HARNESS_ID, + "session_id": self.session_id, + "stats": self._session_stats(), + }) + if self._ws: + try: + await self._ws.close() + except Exception: + pass + self._ws_connected = False + log.info(f"{HARNESS_NAME} stopped. {self._session_stats()}") + + # ═══ HEALTH CHECK ═══════════════════════════════════════════════════ + + def check_ollama(self) -> dict: + """ + Verify Ollama is running and the configured model is available. + + Returns dict with keys: running (bool), model_available (bool), + available_models (list[str]), error (str|None). + """ + try: + r = requests.get(f"{self.ollama_base_url}/api/tags", timeout=5) + if r.status_code != 200: + return { + "running": False, + "model_available": False, + "available_models": [], + "error": f"Ollama returned HTTP {r.status_code}", + } + data = r.json() + models = [m["name"] for m in data.get("models", [])] + # Match on prefix (gemma3:4b matches gemma3:4b-instruct-q4_0, etc.) + model_available = any( + m == self.model or m.startswith(self.model.split(":")[0]) + for m in models + ) + return { + "running": True, + "model_available": model_available, + "available_models": models, + "error": None, + } + except requests.ConnectionError: + return { + "running": False, + "model_available": False, + "available_models": [], + "error": f"Cannot connect to Ollama at {self.ollama_base_url}", + } + except Exception as e: + return { + "running": False, + "model_available": False, + "available_models": [], + "error": str(e), + } + + # ═══ CORE GENERATION ═════════════════════════════════════════════════ + + def generate( + self, + prompt: Union[str, list[dict]], + *, + system: Optional[str] = None, + max_tokens: Optional[int] = None, + temperature: float = 0.3, + ) -> BilboResponse: + """ + Generate a response from the local Gemma model via Ollama. + + Args: + prompt: String prompt or list of message dicts + system: Optional system instruction + max_tokens: Override default max output tokens (None = Ollama default) + temperature: Sampling temperature (default: 0.3 for focused output) + + Returns: + BilboResponse with text, token counts, latency + """ + messages = self._build_messages(prompt, system=system) + response = self._call_ollama( + messages=messages, + max_tokens=max_tokens, + temperature=temperature, + ) + self._record(response) + return response + + def summarise(self, text: str, max_words: int = 100) -> BilboResponse: + """ + Summarise text in plain language. + + Args: + text: Content to summarise + max_words: Target word count for the summary + + Returns: + BilboResponse with the summary in .text + """ + system = ( + "You are a concise technical writer. " + "Summarise the provided text clearly and accurately. " + "Use plain language. Avoid jargon. Be brief." + ) + prompt = ( + f"Summarise the following in approximately {max_words} words:\n\n{text}" + ) + return self.generate(prompt, system=system, temperature=0.2) + + def extract_tags(self, text: str) -> BilboResponse: + """ + Extract relevant tags/labels from text for issue or doc labelling. + + Returns: + BilboResponse where .text contains a comma-separated tag list + """ + system = ( + "You are a tagging assistant. " + "Given some text, output a comma-separated list of short, lowercase tags " + "(3-8 tags). Output ONLY the comma-separated list, nothing else." + ) + prompt = f"Extract tags for:\n\n{text}" + return self.generate(prompt, system=system, temperature=0.1, max_tokens=64) + + def format_doc(self, text: str, target_format: str = "markdown") -> BilboResponse: + """ + Reformat or clean up a documentation snippet. + + Args: + text: The raw documentation text + target_format: Output format (default: markdown) + + Returns: + BilboResponse with the reformatted content in .text + """ + system = ( + f"You are a documentation formatter. " + f"Reformat the provided text as clean {target_format}. " + f"Fix whitespace, headings, and lists. Preserve meaning exactly." + ) + prompt = f"Reformat this documentation:\n\n{text}" + return self.generate(prompt, system=system, temperature=0.1) + + def write_doc_stub(self, signature: str, context: str = "") -> BilboResponse: + """ + Write a documentation stub for a function/class signature. + + Args: + signature: Function or class signature string + context: Optional surrounding code context + + Returns: + BilboResponse with the docstring stub in .text + """ + system = ( + "You are a Python docstring writer. " + "Write a concise docstring for the given signature. " + "Include Args and Returns sections where applicable. " + "Output only the docstring, including triple-quotes." + ) + prompt = signature + if context: + prompt = f"Context:\n{context}\n\nSignature: {signature}" + return self.generate(prompt, system=system, temperature=0.2) + + # ═══ INTERNAL: API CALL ══════════════════════════════════════════════ + + def _call_ollama( + self, + messages: list[dict], + max_tokens: Optional[int] = None, + temperature: float = 0.3, + ) -> BilboResponse: + """Make a single call to the Ollama OpenAI-compatible endpoint.""" + headers = {"Content-Type": "application/json"} + payload: dict[str, Any] = { + "model": self.model, + "messages": messages, + "stream": False, + "options": {"temperature": temperature}, + } + if max_tokens is not None: + payload["options"]["num_predict"] = max_tokens + + t0 = time.time() + try: + r = requests.post( + self.chat_url, json=payload, headers=headers, timeout=120 + ) + latency_ms = (time.time() - t0) * 1000 + + if r.status_code != 200: + return BilboResponse( + model=self.model, + latency_ms=latency_ms, + error=f"HTTP {r.status_code}: {r.text[:200]}", + ) + + data = r.json() + choice = data.get("choices", [{}])[0] + text = choice.get("message", {}).get("content", "") + usage = data.get("usage", {}) + input_tokens = usage.get("prompt_tokens", 0) + output_tokens = usage.get("completion_tokens", 0) + + return BilboResponse( + text=text, + model=self.model, + input_tokens=input_tokens, + output_tokens=output_tokens, + latency_ms=latency_ms, + ) + + except requests.Timeout: + return BilboResponse( + model=self.model, + latency_ms=(time.time() - t0) * 1000, + error="Request timed out (120s) — model may still be loading", + ) + except requests.ConnectionError: + return BilboResponse( + model=self.model, + latency_ms=(time.time() - t0) * 1000, + error=( + f"Cannot connect to Ollama at {self.ollama_base_url}. " + "Run: ollama serve" + ), + ) + except Exception as e: + return BilboResponse( + model=self.model, + latency_ms=(time.time() - t0) * 1000, + error=str(e), + ) + + # ═══ INTERNAL: HELPERS ═══════════════════════════════════════════════ + + @staticmethod + def _build_messages( + prompt: Union[str, list[dict]], + system: Optional[str] = None, + ) -> list[dict]: + """Build the messages list for Ollama chat API.""" + messages: list[dict] = [] + if system: + messages.append({"role": "system", "content": system}) + if isinstance(prompt, str): + messages.append({"role": "user", "content": prompt}) + else: + messages.extend(prompt) + return messages + + def _record(self, response: BilboResponse): + """Update session stats and emit telemetry for a completed response.""" + self.request_count += 1 + self.total_input_tokens += response.input_tokens + self.total_output_tokens += response.output_tokens + + if response.error: + log.warning(f"[{response.model}] error: {response.error}") + else: + log.info( + f"[{response.model}] {response.latency_ms:.0f}ms | " + f"in={response.input_tokens} out={response.output_tokens}" + ) + + try: + asyncio.get_event_loop().create_task( + self._send_telemetry({ + "type": "bilbo_response", + "harness_id": HARNESS_ID, + "session_id": self.session_id, + "model": response.model, + "latency_ms": response.latency_ms, + "input_tokens": response.input_tokens, + "output_tokens": response.output_tokens, + "error": response.error, + }) + ) + except RuntimeError: + pass + + def _session_stats(self) -> dict: + return { + "session_id": self.session_id, + "request_count": self.request_count, + "total_input_tokens": self.total_input_tokens, + "total_output_tokens": self.total_output_tokens, + } + + # ═══ HERMES WEBSOCKET ════════════════════════════════════════════════ + + async def _connect_hermes(self): + """Connect to Hermes WebSocket for telemetry.""" + try: + import websockets # type: ignore + self._ws = await websockets.connect(self.hermes_ws_url) + self._ws_connected = True + log.info(f"Connected to Hermes: {self.hermes_ws_url}") + except Exception as e: + log.warning(f"Hermes connection failed (telemetry disabled): {e}") + self._ws_connected = False + + async def _send_telemetry(self, data: dict): + """Send a telemetry event to Hermes.""" + if not self._ws_connected or not self._ws: + return + try: + await self._ws.send(json.dumps(data)) + except Exception as e: + log.warning(f"Telemetry send failed: {e}") + self._ws_connected = False + + +# ═══════════════════════════════════════════════════════════════════════════ +# HTTP SERVER — expose harness to the network +# ═══════════════════════════════════════════════════════════════════════════ + +def create_app(harness: BilboHarness): + """ + Create a minimal HTTP app exposing Bilbo's harness to the network. + + Endpoints: + POST /generate — general text generation + POST /summarise — summarise provided text + POST /extract-tags — extract tags from text + POST /format-doc — reformat documentation + POST /write-doc-stub — write a docstring stub + GET /health — health check (includes Ollama status) + GET /status — session stats + """ + from http.server import BaseHTTPRequestHandler, HTTPServer + + class BilboHandler(BaseHTTPRequestHandler): + def log_message(self, fmt, *args): + log.info(f"HTTP {fmt % args}") + + def _read_body(self) -> dict: + length = int(self.headers.get("Content-Length", 0)) + raw = self.rfile.read(length) if length else b"{}" + return json.loads(raw) + + def _send_json(self, data: dict, status: int = 200): + body = json.dumps(data).encode() + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self): + if self.path == "/health": + ollama_status = harness.check_ollama() + self._send_json({ + "status": "ok" if ollama_status["running"] else "degraded", + "harness": HARNESS_ID, + "model": harness.model, + "ollama": ollama_status, + }) + elif self.path == "/status": + self._send_json({ + **harness._session_stats(), + "model": harness.model, + "ollama_base_url": harness.ollama_base_url, + "lanes": BILBO_TASK_LANES, + }) + else: + self._send_json({"error": "Not found"}, 404) + + def do_POST(self): + body = self._read_body() + + if self.path == "/generate": + prompt = body.get("prompt", "") + system = body.get("system") + max_tokens = body.get("max_tokens") + temperature = float(body.get("temperature", 0.3)) + response = harness.generate( + prompt, system=system, max_tokens=max_tokens, + temperature=temperature, + ) + self._send_json(response.to_dict()) + + elif self.path == "/summarise": + text = body.get("text", "") + max_words = int(body.get("max_words", 100)) + response = harness.summarise(text, max_words=max_words) + self._send_json(response.to_dict()) + + elif self.path == "/extract-tags": + text = body.get("text", "") + response = harness.extract_tags(text) + self._send_json(response.to_dict()) + + elif self.path == "/format-doc": + text = body.get("text", "") + target_format = body.get("format", "markdown") + response = harness.format_doc(text, target_format=target_format) + self._send_json(response.to_dict()) + + elif self.path == "/write-doc-stub": + signature = body.get("signature", "") + context = body.get("context", "") + response = harness.write_doc_stub(signature, context=context) + self._send_json(response.to_dict()) + + else: + self._send_json({"error": "Not found"}, 404) + + return HTTPServer, BilboHandler + + +# ═══════════════════════════════════════════════════════════════════════════ +# CLI ENTRYPOINT +# ═══════════════════════════════════════════════════════════════════════════ + +async def _async_start(harness: BilboHarness): + await harness.start() + + +def main(): + import argparse + + parser = argparse.ArgumentParser( + description=f"{HARNESS_NAME} — Bilbo light-duty gateway (Gemma 4B local)", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python -m nexus.bilbo_harness "Write a one-line description of the heartbeat module" + python -m nexus.bilbo_harness --summarise path/to/doc.md + python -m nexus.bilbo_harness --tags "Python async websocket telemetry harness" + python -m nexus.bilbo_harness --serve --port 9400 + python -m nexus.bilbo_harness --check + +Environment Variables: + BILBO_MODEL — Ollama model tag (default: gemma3:4b) + OLLAMA_BASE_URL — Ollama HTTP base (default: http://localhost:11434) + HERMES_WS_URL — Hermes telemetry endpoint + """, + ) + parser.add_argument( + "prompt", + nargs="?", + default=None, + help="Prompt to send (omit for --serve or task-specific flags)", + ) + parser.add_argument( + "--model", + default=None, + help=f"Ollama model tag (default: {BILBO_MODEL_DEFAULT})", + ) + parser.add_argument( + "--serve", + action="store_true", + help="Start HTTP server to expose harness on the network", + ) + parser.add_argument( + "--port", + type=int, + default=9400, + help="HTTP server port (default: 9400)", + ) + parser.add_argument( + "--hermes-ws", + default=DEFAULT_HERMES_WS_URL, + help=f"Hermes WebSocket URL (default: {DEFAULT_HERMES_WS_URL})", + ) + parser.add_argument( + "--check", + action="store_true", + help="Check Ollama status and model availability, then exit", + ) + parser.add_argument( + "--summarise", + metavar="FILE_OR_TEXT", + help="Summarise a file path or inline text", + ) + parser.add_argument( + "--tags", + metavar="TEXT", + help="Extract tags from TEXT", + ) + args = parser.parse_args() + + harness = BilboHarness( + model=args.model, + hermes_ws_url=args.hermes_ws, + ) + + if args.check: + status = harness.check_ollama() + print(json.dumps(status, indent=2)) + if not status["running"]: + print("\n[!] Ollama is not running. Start it with: ollama serve") + elif not status["model_available"]: + print( + f"\n[!] Model '{harness.model}' not found. " + f"Pull it with: ollama pull {harness.model}" + ) + else: + print(f"\n[OK] Bilbo gateway ready. Model: {harness.model}") + return + + if args.serve: + asyncio.run(_async_start(harness)) + HTTPServer, BilboHandler = create_app(harness) + server = HTTPServer(("0.0.0.0", args.port), BilboHandler) + log.info(f"Bilbo serving on http://0.0.0.0:{args.port}") + log.info( + "Endpoints: /generate /summarise /extract-tags " + "/format-doc /write-doc-stub /health /status" + ) + try: + server.serve_forever() + except KeyboardInterrupt: + log.info("Shutting down Bilbo gateway") + asyncio.run(harness.stop()) + return + + if args.summarise: + import pathlib + p = pathlib.Path(args.summarise) + text = p.read_text() if p.exists() else args.summarise + response = harness.summarise(text) + elif args.tags: + response = harness.extract_tags(args.tags) + elif args.prompt: + response = harness.generate(args.prompt) + else: + parser.print_help() + return + + if response.error: + print(f"ERROR: {response.error}") + if "ollama serve" in (response.error or ""): + print( + "\nStart Ollama with: ollama serve\n" + f"Pull the model with: ollama pull {harness.model}" + ) + else: + print(response.text) + print( + f"\n[{response.model}] {response.latency_ms:.0f}ms | " + f"tokens: {response.input_tokens}→{response.output_tokens}", + flush=True, + ) + + +if __name__ == "__main__": + main()