diff --git a/config/ezra-kimi-primary.yaml b/config/ezra-kimi-primary.yaml new file mode 100644 index 000000000..602b47bfc --- /dev/null +++ b/config/ezra-kimi-primary.yaml @@ -0,0 +1,44 @@ +# Ezra Configuration - Kimi Primary +# Anthropic removed from chain entirely + +# PRIMARY: Kimi for all operations +model: kimi-coding/kimi-for-coding + +# Fallback chain: Only local/offline options +# NO anthropic in the chain - quota issues solved +fallback_providers: + - provider: ollama + model: qwen2.5:7b + base_url: http://localhost:11434 + timeout: 120 + reason: "Local fallback when Kimi unavailable" + +# Provider settings +providers: + kimi-coding: + timeout: 60 + max_retries: 3 + # Uses KIMI_API_KEY from .env + + ollama: + timeout: 120 + keep_alive: true + base_url: http://localhost:11434 + +# REMOVED: anthropic provider entirely +# No more quota issues, no more choking + +# Toolsets - Ezra needs these +toolsets: + - hermes-cli + - github + - web + +# Agent settings +agent: + max_turns: 90 + tool_use_enforcement: auto + +# Display settings +display: + show_provider_switches: true diff --git a/config/fallback-config.yaml b/config/fallback-config.yaml new file mode 100644 index 000000000..0622e07c1 --- /dev/null +++ b/config/fallback-config.yaml @@ -0,0 +1,53 @@ +# Hermes Agent Fallback Configuration +# Deploy this to Timmy and Ezra for automatic kimi-coding fallback + +model: anthropic/claude-opus-4.6 + +# Fallback chain: Anthropic -> Kimi -> Ollama (local) +fallback_providers: + - provider: kimi-coding + model: kimi-for-coding + timeout: 60 + reason: "Primary fallback when Anthropic quota limited" + + - provider: ollama + model: qwen2.5:7b + base_url: http://localhost:11434 + timeout: 120 + reason: "Local fallback for offline operation" + +# Provider settings +providers: + anthropic: + timeout: 30 + retry_on_quota: true + max_retries: 2 + + kimi-coding: + timeout: 60 + max_retries: 3 + + ollama: + timeout: 120 + keep_alive: true + +# Toolsets +toolsets: + - hermes-cli + - github + - web + +# Agent settings +agent: + max_turns: 90 + tool_use_enforcement: auto + fallback_on_errors: + - rate_limit_exceeded + - quota_exceeded + - timeout + - service_unavailable + +# Display settings +display: + show_fallback_notifications: true + show_provider_switches: true