Three-layer architecture implementation: - Layer 1: Thin Hermes profile (profile.yaml, < 50 lines) - Layer 2: Claw Code runtime (harness.py, tool_registry.py) - Layer 3: Gemma via Ollama (ollama_client.py) Features: - Tool registry with pattern matching (/time, /status, /echo, /ollama_list) - Full routing between tool execution and intelligence layer - Ollama client with streaming and chat support - Comprehensive integration test suite (12 tests) - Connection to localhost:11434 using gemma3:4b model
227 lines
6.2 KiB
Python
227 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Ollama Client - Layer 3 Interface
|
|
|
|
Connects to local Ollama instance for Gemma inference.
|
|
This is the intelligence layer of the Archon architecture.
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import requests
|
|
from typing import Dict, Any, List, Optional, Generator
|
|
from dataclasses import dataclass
|
|
|
|
|
|
@dataclass
|
|
class OllamaConfig:
|
|
base_url: str = "http://localhost:11434"
|
|
default_model: str = "gemma3:4b"
|
|
timeout: int = 120
|
|
|
|
|
|
class OllamaClient:
|
|
"""
|
|
Client for Ollama API - the intelligence layer.
|
|
|
|
Communicates with local Ollama instance to run
|
|
Gemma models for inference.
|
|
"""
|
|
|
|
def __init__(self, base_url: str = "http://localhost:11434"):
|
|
self.config = OllamaConfig(base_url=base_url)
|
|
self.session = requests.Session()
|
|
|
|
def health_check(self) -> bool:
|
|
"""Check if Ollama is reachable."""
|
|
try:
|
|
response = self.session.get(
|
|
f"{self.config.base_url}/api/tags",
|
|
timeout=5
|
|
)
|
|
return response.status_code == 200
|
|
except requests.RequestException:
|
|
return False
|
|
|
|
def list_models(self) -> List[str]:
|
|
"""List available models."""
|
|
try:
|
|
response = self.session.get(
|
|
f"{self.config.base_url}/api/tags",
|
|
timeout=self.config.timeout
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
return [m["name"] for m in data.get("models", [])]
|
|
except requests.RequestException as e:
|
|
raise OllamaError(f"Failed to list models: {e}")
|
|
|
|
def generate(
|
|
self,
|
|
prompt: str,
|
|
model: Optional[str] = None,
|
|
system: Optional[str] = None,
|
|
context: Optional[list] = None,
|
|
options: Optional[Dict[str, Any]] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Generate a response from the model.
|
|
|
|
Args:
|
|
prompt: The user prompt
|
|
model: Model name (default: gemma3:4b)
|
|
system: System prompt
|
|
context: Previous context for conversation
|
|
options: Additional generation options
|
|
|
|
Returns:
|
|
Response dict from Ollama
|
|
"""
|
|
model = model or self.config.default_model
|
|
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"stream": False
|
|
}
|
|
|
|
if system:
|
|
payload["system"] = system
|
|
if context:
|
|
payload["context"] = context
|
|
if options:
|
|
payload["options"] = options
|
|
|
|
try:
|
|
response = self.session.post(
|
|
f"{self.config.base_url}/api/generate",
|
|
json=payload,
|
|
timeout=self.config.timeout
|
|
)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except requests.RequestException as e:
|
|
raise OllamaError(f"Generation failed: {e}")
|
|
|
|
def generate_stream(
|
|
self,
|
|
prompt: str,
|
|
model: Optional[str] = None,
|
|
system: Optional[str] = None
|
|
) -> Generator[str, None, None]:
|
|
"""
|
|
Stream generate responses.
|
|
|
|
Yields response chunks as they arrive.
|
|
"""
|
|
model = model or self.config.default_model
|
|
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"stream": True
|
|
}
|
|
|
|
if system:
|
|
payload["system"] = system
|
|
|
|
try:
|
|
response = self.session.post(
|
|
f"{self.config.base_url}/api/generate",
|
|
json=payload,
|
|
stream=True,
|
|
timeout=self.config.timeout
|
|
)
|
|
response.raise_for_status()
|
|
|
|
for line in response.iter_lines():
|
|
if line:
|
|
try:
|
|
data = json.loads(line)
|
|
if "response" in data:
|
|
yield data["response"]
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
except requests.RequestException as e:
|
|
raise OllamaError(f"Streaming failed: {e}")
|
|
|
|
def chat(
|
|
self,
|
|
messages: list,
|
|
model: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Chat completion with message history.
|
|
|
|
Args:
|
|
messages: List of {role, content} dicts
|
|
model: Model name
|
|
"""
|
|
model = model or self.config.default_model
|
|
|
|
payload = {
|
|
"model": model,
|
|
"messages": messages,
|
|
"stream": False
|
|
}
|
|
|
|
try:
|
|
response = self.session.post(
|
|
f"{self.config.base_url}/api/chat",
|
|
json=payload,
|
|
timeout=self.config.timeout
|
|
)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except requests.RequestException as e:
|
|
raise OllamaError(f"Chat failed: {e}")
|
|
|
|
|
|
class OllamaError(Exception):
|
|
"""Ollama API error."""
|
|
pass
|
|
|
|
|
|
def main():
|
|
"""CLI entry point."""
|
|
client = OllamaClient()
|
|
|
|
# Health check
|
|
print("Checking Ollama connection...")
|
|
if not client.health_check():
|
|
print("ERROR: Ollama not reachable at localhost:11434")
|
|
sys.exit(1)
|
|
print("✓ Ollama is running\n")
|
|
|
|
# List models
|
|
print("Available models:")
|
|
for model in client.list_models():
|
|
print(f" - {model}")
|
|
print()
|
|
|
|
# Generate if prompt provided
|
|
if len(sys.argv) > 1:
|
|
prompt = " ".join(sys.argv[1:])
|
|
print(f"Prompt: {prompt}")
|
|
print("-" * 40)
|
|
|
|
try:
|
|
response = client.generate(
|
|
prompt=prompt,
|
|
system="You are a helpful assistant. Be concise."
|
|
)
|
|
print(response.get("response", "No response"))
|
|
print("-" * 40)
|
|
print(f"Tokens: {response.get('eval_count', 'N/A')}")
|
|
except OllamaError as e:
|
|
print(f"Error: {e}")
|
|
sys.exit(1)
|
|
else:
|
|
print("Usage: ollama_client.py <prompt>")
|
|
print("Example: ollama_client.py 'What is the Archon architecture?'")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|