Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
341f976bcc feat: add ollama as first-class provider for local model inference (#169)
Some checks failed
Nix / nix (macos-latest) (pull_request) Waiting to run
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Docs Site Checks / docs-site-checks (pull_request) Failing after 4s
Nix / nix (ubuntu-latest) (pull_request) Failing after 1s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Failing after 1s
Tests / test (pull_request) Failing after 3s
Tests / e2e (pull_request) Failing after 3s
Add 'ollama' as a recognized inference provider so local models (Gemma4,
Hermes3, Hermes4) can run through the agent harness without falling back
to OpenRouter.

Changes:
- hermes_cli/auth.py: Add ollama to PROVIDER_REGISTRY with
  base_url=http://localhost:11434/v1, dummy API key fallback (ollama
  needs no auth), remove 'ollama' -> 'custom' alias
- hermes_cli/main.py: Add 'ollama' to --provider choices
- hermes_cli/models.py: Add ollama model catalog (gemma4, hermes3,
  hermes4, llama3.1, qwen2.5-coder, etc.), label, and provider order
- hermes_cli/providers.py: Add HermesOverlay for ollama, remove
  'ollama' -> 'ollama-cloud' alias

Usage:
  hermes chat -m gemma4 --provider ollama
  hermes --profile gemma4-local chat -q 'hello'

Ollama exposes an OpenAI-compatible API at localhost:11434/v1.
No API key required (dummy 'ollama' token used for credential checks).
Override with OLLAMA_BASE_URL or OLLAMA_API_KEY env vars.

Closes #169
2026-04-06 23:01:00 -04:00
4 changed files with 41 additions and 4 deletions

View File

@@ -233,6 +233,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("HF_TOKEN",),
base_url_env_var="HF_BASE_URL",
),
"ollama": ProviderConfig(
id="ollama",
name="Ollama (Local)",
auth_type="api_key",
inference_base_url="http://localhost:11434/v1",
api_key_env_vars=("OLLAMA_API_KEY",),
base_url_env_var="OLLAMA_BASE_URL",
),
}
@@ -343,6 +351,15 @@ def _resolve_api_key_provider_secret(
pass
return "", ""
# Ollama does not require an API key — check env vars first,
# then fall back to a dummy token so credential checks pass.
if provider_id == "ollama":
for env_var in pconfig.api_key_env_vars:
val = os.getenv(env_var, "").strip()
if has_usable_secret(val):
return val, env_var
return "ollama", "default"
for env_var in pconfig.api_key_env_vars:
val = os.getenv(env_var, "").strip()
if has_usable_secret(val):
@@ -781,7 +798,7 @@ def resolve_provider(
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
# Local server aliases — route through the generic custom provider
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
"ollama": "custom", "vllm": "custom", "llamacpp": "custom",
"vllm": "custom", "llamacpp": "custom",
"llama.cpp": "custom", "llama-cpp": "custom",
}
normalized = _PROVIDER_ALIASES.get(normalized, normalized)

View File

@@ -4206,7 +4206,7 @@ For more help on a command:
)
chat_parser.add_argument(
"--provider",
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"],
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "ollama"],
default=None,
help="Inference provider (default: auto)"
)

View File

@@ -263,6 +263,20 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"XiaomiMiMo/MiMo-V2-Flash",
"moonshotai/Kimi-K2-Thinking",
],
"ollama": [
"gemma4",
"gemma4:27b",
"hermes3",
"hermes3:70b",
"hermes4",
"llama3.1",
"llama3.1:70b",
"qwen2.5-coder",
"qwen2.5-coder:32b",
"deepseek-r1",
"phi4",
"mistral",
],
}
_PROVIDER_LABELS = {
@@ -284,6 +298,7 @@ _PROVIDER_LABELS = {
"kilocode": "Kilo Code",
"alibaba": "Alibaba Cloud (DashScope)",
"huggingface": "Hugging Face",
"ollama": "Ollama (Local)",
"custom": "Custom endpoint",
}
@@ -568,7 +583,7 @@ def list_available_providers() -> list[dict[str, str]]:
"gemini", "huggingface",
"zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
"opencode-zen", "opencode-go",
"ai-gateway", "deepseek", "custom",
"ai-gateway", "deepseek", "ollama", "custom",
]
# Build reverse alias map
aliases_for: dict[str, list[str]] = {}

View File

@@ -122,6 +122,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
is_aggregator=True,
base_url_env_var="HF_BASE_URL",
),
"ollama": HermesOverlay(
transport="openai_chat",
is_aggregator=False,
base_url_env_var="OLLAMA_BASE_URL",
),
}
@@ -216,7 +221,7 @@ ALIASES: Dict[str, str] = {
"lmstudio": "lmstudio",
"lm-studio": "lmstudio",
"lm_studio": "lmstudio",
"ollama": "ollama-cloud",
# ollama is now a first-class provider (issue #169)
"vllm": "local",
"llamacpp": "local",
"llama.cpp": "local",