gemma-spectrum/archon-poc/ollama_client.py

#!/usr/bin/env python3
"""
Ollama Client - Layer 3 Interface

Connects to local Ollama instance for Gemma inference.
This is the intelligence layer of the Archon architecture.
"""

import sys
import json
import requests
from typing import Dict, Any, List, Optional, Generator
from dataclasses import dataclass


@dataclass
class OllamaConfig:
    base_url: str = "http://localhost:11434"
    default_model: str = "gemma3:4b"
    timeout: int = 120


class OllamaClient:
    """
    Client for Ollama API - the intelligence layer.

    Communicates with local Ollama instance to run
    Gemma models for inference.
    """

    def __init__(self, base_url: str = "http://localhost:11434"):
        self.config = OllamaConfig(base_url=base_url)
        self.session = requests.Session()

    def health_check(self) -> bool:
        """Check if Ollama is reachable."""
        try:
            response = self.session.get(
                f"{self.config.base_url}/api/tags",
                timeout=5
            )
            return response.status_code == 200
        except requests.RequestException:
            return False

    def list_models(self) -> List[str]:
        """List available models."""
        try:
            response = self.session.get(
                f"{self.config.base_url}/api/tags",
                timeout=self.config.timeout
            )
            response.raise_for_status()
            data = response.json()
            return [m["name"] for m in data.get("models", [])]
        except requests.RequestException as e:
            raise OllamaError(f"Failed to list models: {e}")

    def generate(
        self,
        prompt: str,
        model: Optional[str] = None,
        system: Optional[str] = None,
        context: Optional[list] = None,
        options: Optional[Dict[str, Any]] = None
    ) -> Dict[str, Any]:
        """
        Generate a response from the model.

        Args:
            prompt: The user prompt
            model: Model name (default: gemma3:4b)
            system: System prompt
            context: Previous context for conversation
            options: Additional generation options

        Returns:
            Response dict from Ollama
        """
        model = model or self.config.default_model

        payload = {
            "model": model,
            "prompt": prompt,
            "stream": False
        }

        if system:
            payload["system"] = system
        if context:
            payload["context"] = context
        if options:
            payload["options"] = options

        try:
            response = self.session.post(
                f"{self.config.base_url}/api/generate",
                json=payload,
                timeout=self.config.timeout
            )
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            raise OllamaError(f"Generation failed: {e}")

    def generate_stream(
        self,
        prompt: str,
        model: Optional[str] = None,
        system: Optional[str] = None
    ) -> Generator[str, None, None]:
        """
        Stream generate responses.

        Yields response chunks as they arrive.
        """
        model = model or self.config.default_model

        payload = {
            "model": model,
            "prompt": prompt,
            "stream": True
        }

        if system:
            payload["system"] = system

        try:
            response = self.session.post(
                f"{self.config.base_url}/api/generate",
                json=payload,
                stream=True,
                timeout=self.config.timeout
            )
            response.raise_for_status()

            for line in response.iter_lines():
                if line:
                    try:
                        data = json.loads(line)
                        if "response" in data:
                            yield data["response"]
                    except json.JSONDecodeError:
                        continue

        except requests.RequestException as e:
            raise OllamaError(f"Streaming failed: {e}")

    def chat(
        self,
        messages: list,
        model: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Chat completion with message history.

        Args:
            messages: List of {role, content} dicts
            model: Model name
        """
        model = model or self.config.default_model

        payload = {
            "model": model,
            "messages": messages,
            "stream": False
        }

        try:
            response = self.session.post(
                f"{self.config.base_url}/api/chat",
                json=payload,
                timeout=self.config.timeout
            )
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            raise OllamaError(f"Chat failed: {e}")


class OllamaError(Exception):
    """Ollama API error."""
    pass


def main():
    """CLI entry point."""
    client = OllamaClient()

    # Health check
    print("Checking Ollama connection...")
    if not client.health_check():
        print("ERROR: Ollama not reachable at localhost:11434")
        sys.exit(1)
    print("✓ Ollama is running\n")

    # List models
    print("Available models:")
    for model in client.list_models():
        print(f"  - {model}")
    print()

    # Generate if prompt provided
    if len(sys.argv) > 1:
        prompt = " ".join(sys.argv[1:])
        print(f"Prompt: {prompt}")
        print("-" * 40)

        try:
            response = client.generate(
                prompt=prompt,
                system="You are a helpful assistant. Be concise."
            )
            print(response.get("response", "No response"))
            print("-" * 40)
            print(f"Tokens: {response.get('eval_count', 'N/A')}")
        except OllamaError as e:
            print(f"Error: {e}")
            sys.exit(1)
    else:
        print("Usage: ollama_client.py <prompt>")
        print("Example: ollama_client.py 'What is the Archon architecture?'")


if __name__ == "__main__":
    main()