feat: add ollama as first-class provider for local model inference (#169)
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Docs Site Checks / docs-site-checks (pull_request) Failing after 4s
Nix / nix (ubuntu-latest) (pull_request) Failing after 1s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Failing after 1s
Tests / test (pull_request) Failing after 3s
Tests / e2e (pull_request) Failing after 3s
Nix / nix (macos-latest) (pull_request) Has been cancelled
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Docs Site Checks / docs-site-checks (pull_request) Failing after 4s
Nix / nix (ubuntu-latest) (pull_request) Failing after 1s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Failing after 1s
Tests / test (pull_request) Failing after 3s
Tests / e2e (pull_request) Failing after 3s
Nix / nix (macos-latest) (pull_request) Has been cancelled
Add 'ollama' as a recognized inference provider so local models (Gemma4, Hermes3, Hermes4) can run through the agent harness without falling back to OpenRouter. Changes: - hermes_cli/auth.py: Add ollama to PROVIDER_REGISTRY with base_url=http://localhost:11434/v1, dummy API key fallback (ollama needs no auth), remove 'ollama' -> 'custom' alias - hermes_cli/main.py: Add 'ollama' to --provider choices - hermes_cli/models.py: Add ollama model catalog (gemma4, hermes3, hermes4, llama3.1, qwen2.5-coder, etc.), label, and provider order - hermes_cli/providers.py: Add HermesOverlay for ollama, remove 'ollama' -> 'ollama-cloud' alias Usage: hermes chat -m gemma4 --provider ollama hermes --profile gemma4-local chat -q 'hello' Ollama exposes an OpenAI-compatible API at localhost:11434/v1. No API key required (dummy 'ollama' token used for credential checks). Override with OLLAMA_BASE_URL or OLLAMA_API_KEY env vars. Closes #169
This commit is contained in:
@@ -233,6 +233,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=("HF_TOKEN",),
|
||||
base_url_env_var="HF_BASE_URL",
|
||||
),
|
||||
"ollama": ProviderConfig(
|
||||
id="ollama",
|
||||
name="Ollama (Local)",
|
||||
auth_type="api_key",
|
||||
inference_base_url="http://localhost:11434/v1",
|
||||
api_key_env_vars=("OLLAMA_API_KEY",),
|
||||
base_url_env_var="OLLAMA_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -343,6 +351,15 @@ def _resolve_api_key_provider_secret(
|
||||
pass
|
||||
return "", ""
|
||||
|
||||
# Ollama does not require an API key — check env vars first,
|
||||
# then fall back to a dummy token so credential checks pass.
|
||||
if provider_id == "ollama":
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
val = os.getenv(env_var, "").strip()
|
||||
if has_usable_secret(val):
|
||||
return val, env_var
|
||||
return "ollama", "default"
|
||||
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
val = os.getenv(env_var, "").strip()
|
||||
if has_usable_secret(val):
|
||||
@@ -781,7 +798,7 @@ def resolve_provider(
|
||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||
# Local server aliases — route through the generic custom provider
|
||||
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
|
||||
"ollama": "custom", "vllm": "custom", "llamacpp": "custom",
|
||||
"vllm": "custom", "llamacpp": "custom",
|
||||
"llama.cpp": "custom", "llama-cpp": "custom",
|
||||
}
|
||||
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
|
||||
|
||||
@@ -4206,7 +4206,7 @@ For more help on a command:
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--provider",
|
||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"],
|
||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "ollama"],
|
||||
default=None,
|
||||
help="Inference provider (default: auto)"
|
||||
)
|
||||
|
||||
@@ -263,6 +263,20 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"XiaomiMiMo/MiMo-V2-Flash",
|
||||
"moonshotai/Kimi-K2-Thinking",
|
||||
],
|
||||
"ollama": [
|
||||
"gemma4",
|
||||
"gemma4:27b",
|
||||
"hermes3",
|
||||
"hermes3:70b",
|
||||
"hermes4",
|
||||
"llama3.1",
|
||||
"llama3.1:70b",
|
||||
"qwen2.5-coder",
|
||||
"qwen2.5-coder:32b",
|
||||
"deepseek-r1",
|
||||
"phi4",
|
||||
"mistral",
|
||||
],
|
||||
}
|
||||
|
||||
_PROVIDER_LABELS = {
|
||||
@@ -284,6 +298,7 @@ _PROVIDER_LABELS = {
|
||||
"kilocode": "Kilo Code",
|
||||
"alibaba": "Alibaba Cloud (DashScope)",
|
||||
"huggingface": "Hugging Face",
|
||||
"ollama": "Ollama (Local)",
|
||||
"custom": "Custom endpoint",
|
||||
}
|
||||
|
||||
@@ -568,7 +583,7 @@ def list_available_providers() -> list[dict[str, str]]:
|
||||
"gemini", "huggingface",
|
||||
"zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
|
||||
"opencode-zen", "opencode-go",
|
||||
"ai-gateway", "deepseek", "custom",
|
||||
"ai-gateway", "deepseek", "ollama", "custom",
|
||||
]
|
||||
# Build reverse alias map
|
||||
aliases_for: dict[str, list[str]] = {}
|
||||
|
||||
@@ -122,6 +122,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
is_aggregator=True,
|
||||
base_url_env_var="HF_BASE_URL",
|
||||
),
|
||||
"ollama": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
is_aggregator=False,
|
||||
base_url_env_var="OLLAMA_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -216,7 +221,7 @@ ALIASES: Dict[str, str] = {
|
||||
"lmstudio": "lmstudio",
|
||||
"lm-studio": "lmstudio",
|
||||
"lm_studio": "lmstudio",
|
||||
"ollama": "ollama-cloud",
|
||||
# ollama is now a first-class provider (issue #169)
|
||||
"vllm": "local",
|
||||
"llamacpp": "local",
|
||||
"llama.cpp": "local",
|
||||
|
||||
Reference in New Issue
Block a user