feat: add ollama as first-class provider for local model inference (#169)

Add 'ollama' as a recognized inference provider so local models (Gemma4, Hermes3, Hermes4) can run through the agent harness without falling back to OpenRouter. Changes: - hermes_cli/auth.py: Add ollama to PROVIDER_REGISTRY with base_url=http://localhost:11434/v1, dummy API key fallback (ollama needs no auth), remove 'ollama' -> 'custom' alias - hermes_cli/main.py: Add 'ollama' to --provider choices - hermes_cli/models.py: Add ollama model catalog (gemma4, hermes3, hermes4, llama3.1, qwen2.5-coder, etc.), label, and provider order - hermes_cli/providers.py: Add HermesOverlay for ollama, remove 'ollama' -> 'ollama-cloud' alias Usage: hermes chat -m gemma4 --provider ollama hermes --profile gemma4-local chat -q 'hello' Ollama exposes an OpenAI-compatible API at localhost:11434/v1. No API key required (dummy 'ollama' token used for credential checks). Override with OLLAMA_BASE_URL or OLLAMA_API_KEY env vars. Closes #169
2026-04-06 23:01:00 -04:00
parent 0e336b0e71
commit 341f976bcc
4 changed files with 41 additions and 4 deletions
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -233,6 +233,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("HF_TOKEN",),
        base_url_env_var="HF_BASE_URL",
    ),
+    "ollama": ProviderConfig(
+        id="ollama",
+        name="Ollama (Local)",
+        auth_type="api_key",
+        inference_base_url="http://localhost:11434/v1",
+        api_key_env_vars=("OLLAMA_API_KEY",),
+        base_url_env_var="OLLAMA_BASE_URL",
+    ),
 }


@@ -343,6 +351,15 @@ def _resolve_api_key_provider_secret(
            pass
        return "", ""

+    # Ollama does not require an API key — check env vars first,
+    # then fall back to a dummy token so credential checks pass.
+    if provider_id == "ollama":
+        for env_var in pconfig.api_key_env_vars:
+            val = os.getenv(env_var, "").strip()
+            if has_usable_secret(val):
+                return val, env_var
+        return "ollama", "default"
+
    for env_var in pconfig.api_key_env_vars:
        val = os.getenv(env_var, "").strip()
        if has_usable_secret(val):
@@ -781,7 +798,7 @@ def resolve_provider(
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
        # Local server aliases — route through the generic custom provider
        "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
-        "ollama": "custom", "vllm": "custom", "llamacpp": "custom",
+        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
    }
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -4206,7 +4206,7 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"],
+        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "ollama"],
        default=None,
        help="Inference provider (default: auto)"
    )
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -263,6 +263,20 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "XiaomiMiMo/MiMo-V2-Flash",
        "moonshotai/Kimi-K2-Thinking",
    ],
+    "ollama": [
+        "gemma4",
+        "gemma4:27b",
+        "hermes3",
+        "hermes3:70b",
+        "hermes4",
+        "llama3.1",
+        "llama3.1:70b",
+        "qwen2.5-coder",
+        "qwen2.5-coder:32b",
+        "deepseek-r1",
+        "phi4",
+        "mistral",
+    ],
 }

 _PROVIDER_LABELS = {
@@ -284,6 +298,7 @@ _PROVIDER_LABELS = {
    "kilocode": "Kilo Code",
    "alibaba": "Alibaba Cloud (DashScope)",
    "huggingface": "Hugging Face",
+    "ollama": "Ollama (Local)",
    "custom": "Custom endpoint",
 }

@@ -568,7 +583,7 @@ def list_available_providers() -> list[dict[str, str]]:
        "gemini", "huggingface",
        "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
        "opencode-zen", "opencode-go",
-        "ai-gateway", "deepseek", "custom",
+        "ai-gateway", "deepseek", "ollama", "custom",
    ]
    # Build reverse alias map
    aliases_for: dict[str, list[str]] = {}
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -122,6 +122,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        is_aggregator=True,
        base_url_env_var="HF_BASE_URL",
    ),
+    "ollama": HermesOverlay(
+        transport="openai_chat",
+        is_aggregator=False,
+        base_url_env_var="OLLAMA_BASE_URL",
+    ),
 }


@@ -216,7 +221,7 @@ ALIASES: Dict[str, str] = {
    "lmstudio": "lmstudio",
    "lm-studio": "lmstudio",
    "lm_studio": "lmstudio",
-    "ollama": "ollama-cloud",
+    # ollama is now a first-class provider (issue #169)
    "vllm": "local",
    "llamacpp": "local",
    "llama.cpp": "local",