Files
gemma-spectrum/archon-poc/ollama_client.py
Ezra 1b3bca9902 Implement Archon Architecture POC (Epic #370, Dispatch #371)
Three-layer architecture implementation:
- Layer 1: Thin Hermes profile (profile.yaml, < 50 lines)
- Layer 2: Claw Code runtime (harness.py, tool_registry.py)
- Layer 3: Gemma via Ollama (ollama_client.py)

Features:
- Tool registry with pattern matching (/time, /status, /echo, /ollama_list)
- Full routing between tool execution and intelligence layer
- Ollama client with streaming and chat support
- Comprehensive integration test suite (12 tests)
- Connection to localhost:11434 using gemma3:4b model
2026-04-02 19:47:00 +00:00

227 lines
6.2 KiB
Python

#!/usr/bin/env python3
"""
Ollama Client - Layer 3 Interface
Connects to local Ollama instance for Gemma inference.
This is the intelligence layer of the Archon architecture.
"""
import sys
import json
import requests
from typing import Dict, Any, List, Optional, Generator
from dataclasses import dataclass
@dataclass
class OllamaConfig:
base_url: str = "http://localhost:11434"
default_model: str = "gemma3:4b"
timeout: int = 120
class OllamaClient:
"""
Client for Ollama API - the intelligence layer.
Communicates with local Ollama instance to run
Gemma models for inference.
"""
def __init__(self, base_url: str = "http://localhost:11434"):
self.config = OllamaConfig(base_url=base_url)
self.session = requests.Session()
def health_check(self) -> bool:
"""Check if Ollama is reachable."""
try:
response = self.session.get(
f"{self.config.base_url}/api/tags",
timeout=5
)
return response.status_code == 200
except requests.RequestException:
return False
def list_models(self) -> List[str]:
"""List available models."""
try:
response = self.session.get(
f"{self.config.base_url}/api/tags",
timeout=self.config.timeout
)
response.raise_for_status()
data = response.json()
return [m["name"] for m in data.get("models", [])]
except requests.RequestException as e:
raise OllamaError(f"Failed to list models: {e}")
def generate(
self,
prompt: str,
model: Optional[str] = None,
system: Optional[str] = None,
context: Optional[list] = None,
options: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Generate a response from the model.
Args:
prompt: The user prompt
model: Model name (default: gemma3:4b)
system: System prompt
context: Previous context for conversation
options: Additional generation options
Returns:
Response dict from Ollama
"""
model = model or self.config.default_model
payload = {
"model": model,
"prompt": prompt,
"stream": False
}
if system:
payload["system"] = system
if context:
payload["context"] = context
if options:
payload["options"] = options
try:
response = self.session.post(
f"{self.config.base_url}/api/generate",
json=payload,
timeout=self.config.timeout
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
raise OllamaError(f"Generation failed: {e}")
def generate_stream(
self,
prompt: str,
model: Optional[str] = None,
system: Optional[str] = None
) -> Generator[str, None, None]:
"""
Stream generate responses.
Yields response chunks as they arrive.
"""
model = model or self.config.default_model
payload = {
"model": model,
"prompt": prompt,
"stream": True
}
if system:
payload["system"] = system
try:
response = self.session.post(
f"{self.config.base_url}/api/generate",
json=payload,
stream=True,
timeout=self.config.timeout
)
response.raise_for_status()
for line in response.iter_lines():
if line:
try:
data = json.loads(line)
if "response" in data:
yield data["response"]
except json.JSONDecodeError:
continue
except requests.RequestException as e:
raise OllamaError(f"Streaming failed: {e}")
def chat(
self,
messages: list,
model: Optional[str] = None
) -> Dict[str, Any]:
"""
Chat completion with message history.
Args:
messages: List of {role, content} dicts
model: Model name
"""
model = model or self.config.default_model
payload = {
"model": model,
"messages": messages,
"stream": False
}
try:
response = self.session.post(
f"{self.config.base_url}/api/chat",
json=payload,
timeout=self.config.timeout
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
raise OllamaError(f"Chat failed: {e}")
class OllamaError(Exception):
"""Ollama API error."""
pass
def main():
"""CLI entry point."""
client = OllamaClient()
# Health check
print("Checking Ollama connection...")
if not client.health_check():
print("ERROR: Ollama not reachable at localhost:11434")
sys.exit(1)
print("✓ Ollama is running\n")
# List models
print("Available models:")
for model in client.list_models():
print(f" - {model}")
print()
# Generate if prompt provided
if len(sys.argv) > 1:
prompt = " ".join(sys.argv[1:])
print(f"Prompt: {prompt}")
print("-" * 40)
try:
response = client.generate(
prompt=prompt,
system="You are a helpful assistant. Be concise."
)
print(response.get("response", "No response"))
print("-" * 40)
print(f"Tokens: {response.get('eval_count', 'N/A')}")
except OllamaError as e:
print(f"Error: {e}")
sys.exit(1)
else:
print("Usage: ollama_client.py <prompt>")
print("Example: ollama_client.py 'What is the Archon architecture?'")
if __name__ == "__main__":
main()