#!/usr/bin/env python3 """ Ollama Client - Layer 3 Interface Connects to local Ollama instance for Gemma inference. This is the intelligence layer of the Archon architecture. """ import sys import json import requests from typing import Dict, Any, List, Optional, Generator from dataclasses import dataclass @dataclass class OllamaConfig: base_url: str = "http://localhost:11434" default_model: str = "gemma3:4b" timeout: int = 120 class OllamaClient: """ Client for Ollama API - the intelligence layer. Communicates with local Ollama instance to run Gemma models for inference. """ def __init__(self, base_url: str = "http://localhost:11434"): self.config = OllamaConfig(base_url=base_url) self.session = requests.Session() def health_check(self) -> bool: """Check if Ollama is reachable.""" try: response = self.session.get( f"{self.config.base_url}/api/tags", timeout=5 ) return response.status_code == 200 except requests.RequestException: return False def list_models(self) -> List[str]: """List available models.""" try: response = self.session.get( f"{self.config.base_url}/api/tags", timeout=self.config.timeout ) response.raise_for_status() data = response.json() return [m["name"] for m in data.get("models", [])] except requests.RequestException as e: raise OllamaError(f"Failed to list models: {e}") def generate( self, prompt: str, model: Optional[str] = None, system: Optional[str] = None, context: Optional[list] = None, options: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: """ Generate a response from the model. Args: prompt: The user prompt model: Model name (default: gemma3:4b) system: System prompt context: Previous context for conversation options: Additional generation options Returns: Response dict from Ollama """ model = model or self.config.default_model payload = { "model": model, "prompt": prompt, "stream": False } if system: payload["system"] = system if context: payload["context"] = context if options: payload["options"] = options try: response = self.session.post( f"{self.config.base_url}/api/generate", json=payload, timeout=self.config.timeout ) response.raise_for_status() return response.json() except requests.RequestException as e: raise OllamaError(f"Generation failed: {e}") def generate_stream( self, prompt: str, model: Optional[str] = None, system: Optional[str] = None ) -> Generator[str, None, None]: """ Stream generate responses. Yields response chunks as they arrive. """ model = model or self.config.default_model payload = { "model": model, "prompt": prompt, "stream": True } if system: payload["system"] = system try: response = self.session.post( f"{self.config.base_url}/api/generate", json=payload, stream=True, timeout=self.config.timeout ) response.raise_for_status() for line in response.iter_lines(): if line: try: data = json.loads(line) if "response" in data: yield data["response"] except json.JSONDecodeError: continue except requests.RequestException as e: raise OllamaError(f"Streaming failed: {e}") def chat( self, messages: list, model: Optional[str] = None ) -> Dict[str, Any]: """ Chat completion with message history. Args: messages: List of {role, content} dicts model: Model name """ model = model or self.config.default_model payload = { "model": model, "messages": messages, "stream": False } try: response = self.session.post( f"{self.config.base_url}/api/chat", json=payload, timeout=self.config.timeout ) response.raise_for_status() return response.json() except requests.RequestException as e: raise OllamaError(f"Chat failed: {e}") class OllamaError(Exception): """Ollama API error.""" pass def main(): """CLI entry point.""" client = OllamaClient() # Health check print("Checking Ollama connection...") if not client.health_check(): print("ERROR: Ollama not reachable at localhost:11434") sys.exit(1) print("✓ Ollama is running\n") # List models print("Available models:") for model in client.list_models(): print(f" - {model}") print() # Generate if prompt provided if len(sys.argv) > 1: prompt = " ".join(sys.argv[1:]) print(f"Prompt: {prompt}") print("-" * 40) try: response = client.generate( prompt=prompt, system="You are a helpful assistant. Be concise." ) print(response.get("response", "No response")) print("-" * 40) print(f"Tokens: {response.get('eval_count', 'N/A')}") except OllamaError as e: print(f"Error: {e}") sys.exit(1) else: print("Usage: ollama_client.py ") print("Example: ollama_client.py 'What is the Archon architecture?'") if __name__ == "__main__": main()