From 7936483ffc1efcf2709cd6e87791b3d4ee37b9bf Mon Sep 17 00:00:00 2001 From: Bezalel Date: Tue, 7 Apr 2026 15:55:50 +0000 Subject: [PATCH] feat(provider): first-class Ollama support + Gemma 4 defaults (#169) - Add 'ollama' to CLI provider choices and auth aliases - Wire Ollama through resolve_provider_client with auto-detection - Add _try_ollama to auxiliary fallback chain (before local/custom) - Add ollama to vision provider order - Update model_metadata.py: ollama prefix + gemma-4-* context lengths (256K) - Default model: gemma4:12b when provider=ollama --- agent/auxiliary_client.py | 25 +++++++++++++++++++++++++ agent/model_metadata.py | 9 +++++++-- hermes_cli/auth.py | 5 +++-- hermes_cli/main.py | 2 +- hermes_cli/models.py | 2 +- 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 9edc505e3..196ff7986 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -922,6 +922,7 @@ def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[st _AUTO_PROVIDER_LABELS = { "_try_openrouter": "openrouter", "_try_nous": "nous", + "_try_ollama": "ollama", "_try_custom_endpoint": "local/custom", "_try_codex": "openai-codex", "_resolve_api_key_provider": "api-key", @@ -930,6 +931,18 @@ _AUTO_PROVIDER_LABELS = { _AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"}) +def _try_ollama() -> Tuple[Optional[OpenAI], Optional[str]]: + """Detect and return an Ollama client if the server is reachable.""" + base_url = (os.getenv("OLLAMA_BASE_URL", "") or "http://localhost:11434").strip().rstrip("/") + base_url = base_url + "/v1" if not base_url.endswith("/v1") else base_url + from agent.model_metadata import detect_local_server_type + if detect_local_server_type(base_url) != "ollama": + return None, None + api_key = (os.getenv("OLLAMA_API_KEY", "") or "ollama").strip() + model = _read_main_model() or "gemma4:12b" + return OpenAI(api_key=*** base_url=base_url), model + + def _get_provider_chain() -> List[tuple]: """Return the ordered provider detection chain. @@ -939,6 +952,7 @@ def _get_provider_chain() -> List[tuple]: return [ ("openrouter", _try_openrouter), ("nous", _try_nous), + ("ollama", _try_ollama), ("local/custom", _try_custom_endpoint), ("openai-codex", _try_codex), ("api-key", _resolve_api_key_provider), @@ -988,6 +1002,7 @@ def _try_payment_fallback( # Map common resolved_provider values back to chain labels. _alias_to_label = {"openrouter": "openrouter", "nous": "nous", "openai-codex": "openai-codex", "codex": "openai-codex", + "ollama": "ollama", "custom": "local/custom", "local/custom": "local/custom"} skip_chain_labels = {_alias_to_label.get(s, s) for s in skip_labels} @@ -1195,6 +1210,15 @@ def resolve_provider_client( return (_to_async_client(client, final_model) if async_mode else (client, final_model)) + # ── Ollama (first-class local provider) ────────────────────────── + if provider == "ollama": + base_url = (explicit_base_url or os.getenv("OLLAMA_BASE_URL", "") or "http://localhost:11434").strip().rstrip("/") + base_url = base_url + "/v1" if not base_url.endswith("/v1") else base_url + api_key = (explicit_api_key or os.getenv("OLLAMA_API_KEY", "") or "ollama").strip() + final_model = model or _read_main_model() or "gemma4:12b" + client = OpenAI(api_key=*** base_url=base_url) + return (_to_async_client(client, final_model) if async_mode else (client, final_model)) + # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ─────────── if provider == "custom": if explicit_base_url: @@ -1335,6 +1359,7 @@ def get_async_text_auxiliary_client(task: str = ""): _VISION_AUTO_PROVIDER_ORDER = ( "openrouter", "nous", + "ollama", "openai-codex", "anthropic", "custom", diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 62dfb2b82..aabf40300 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -26,7 +26,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "openrouter", "nous", "openai-codex", "copilot", "copilot-acp", "gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek", "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba", - "custom", "local", + "ollama", "custom", "local", # Common aliases "google", "google-gemini", "google-ai-studio", "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot", @@ -102,9 +102,12 @@ DEFAULT_CONTEXT_LENGTHS = { "gpt-4": 128000, # Google "gemini": 1048576, - # Gemma (open models served via AI Studio) + # Gemma (open models — Ollama / AI Studio) "gemma-4-31b": 256000, "gemma-4-26b": 256000, + "gemma-4-12b": 256000, + "gemma-4-4b": 256000, + "gemma-4-1b": 256000, "gemma-3": 131072, "gemma": 8192, # fallback for older gemma models # DeepSeek @@ -187,6 +190,8 @@ _URL_TO_PROVIDER: Dict[str, str] = { "api.githubcopilot.com": "copilot", "models.github.ai": "copilot", "api.fireworks.ai": "fireworks", + "localhost": "ollama", + "127.0.0.1": "ollama", } diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 23119c661..2ed84e728 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -820,10 +820,11 @@ def resolve_provider( "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface", "go": "opencode-go", "opencode-go-sub": "opencode-go", "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode", - # Local server aliases — route through the generic custom provider + # Local server aliases "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom", - "ollama": "custom", "vllm": "custom", "llamacpp": "custom", + "vllm": "custom", "llamacpp": "custom", "llama.cpp": "custom", "llama-cpp": "custom", + "ollama": "ollama", } normalized = _PROVIDER_ALIASES.get(normalized, normalized) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 55faf8413..4bd110140 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -4206,7 +4206,7 @@ For more help on a command: ) chat_parser.add_argument( "--provider", - choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"], + choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "ollama"], default=None, help="Inference provider (default: auto)" ) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index a5b1c2b2f..18a35711b 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -568,7 +568,7 @@ def list_available_providers() -> list[dict[str, str]]: "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba", "opencode-zen", "opencode-go", - "ai-gateway", "deepseek", "custom", + "ai-gateway", "deepseek", "ollama", "custom", ] # Build reverse alias map aliases_for: dict[str, list[str]] = {}