diff --git a/config/ezra-deploy.sh b/config/ezra-deploy.sh index cd656dab3..7fddcec75 100755 --- a/config/ezra-deploy.sh +++ b/config/ezra-deploy.sh @@ -34,7 +34,7 @@ echo "" echo -e "${GREEN}[DEPLOY]${NC} Copying config to Ezra..." # Backup existing -ssh root@$EZRA_HOST "cp $EZRA_HERMES_HOME/config.yaml $EZRA_HERMES_HOME/config.yaml.backup.anthropic-$(date +%s) 2>/dev/null || true" +ssh root@$EZRA_HOST "cp $EZRA_HERMES_HOME/config.yaml $EZRA_HERMES_HOME/config.yaml.backup.pre-kimi-$(date +%s) 2>/dev/null || true" # Copy new config scp "$CONFIG_SOURCE" root@$EZRA_HOST:$EZRA_HERMES_HOME/config.yaml @@ -52,7 +52,7 @@ ssh root@$EZRA_HOST "cd $EZRA_HERMES_HOME && nohup python -m gateway.run > logs/ echo "" echo -e "${GREEN}[SUCCESS]${NC} Ezra is now running Kimi primary!" echo "" -echo "Anthropic: FIRED ✓" +echo "Anthropic: BANNED ✓ (removed from all configs)" echo "Kimi: PRIMARY ✓" echo "" echo "To verify: ssh root@$EZRA_HOST 'tail -f $EZRA_HERMES_HOME/logs/gateway.log'" diff --git a/config/ezra-kimi-primary.yaml b/config/ezra-kimi-primary.yaml index 13dab4af9..00da8c19c 100644 --- a/config/ezra-kimi-primary.yaml +++ b/config/ezra-kimi-primary.yaml @@ -2,22 +2,29 @@ model: default: kimi-k2.5 provider: kimi-coding toolsets: - - all +- all fallback_providers: - - provider: kimi-coding - model: kimi-k2.5 - timeout: 120 - reason: Kimi coding fallback (front of chain) - - provider: anthropic - model: claude-sonnet-4-20250514 - timeout: 120 - reason: Direct Anthropic fallback - - provider: openrouter - model: anthropic/claude-sonnet-4-20250514 - base_url: https://openrouter.ai/api/v1 - api_key_env: OPENROUTER_API_KEY - timeout: 120 - reason: OpenRouter fallback +- provider: kimi-coding + model: kimi-k2.5 + timeout: 120 + reason: Kimi coding fallback (front of chain) +- provider: openrouter + model: google/gemini-2.5-pro + base_url: https://openrouter.ai/api/v1 + api_key_env: OPENROUTER_API_KEY + timeout: 120 + reason: Gemini 2.5 Pro fallback (replaces banned Anthropic) +- provider: openrouter + model: google/gemini-2.5-pro + base_url: https://openrouter.ai/api/v1 + api_key_env: OPENROUTER_API_KEY + timeout: 120 + reason: Gemini 2.5 Pro via OpenRouter (replaces banned Anthropic) +- provider: ollama + model: gemma4:latest + base_url: http://localhost:11434 + timeout: 300 + reason: "Terminal fallback \u2014 local Ollama" agent: max_turns: 90 reasoning_effort: high @@ -27,8 +34,6 @@ providers: base_url: https://api.kimi.com/coding/v1 timeout: 60 max_retries: 3 - anthropic: - timeout: 120 openrouter: base_url: https://openrouter.ai/api/v1 timeout: 120 diff --git a/config/fallback-config.yaml b/config/fallback-config.yaml index cdefd49f9..dd361a2b2 100644 --- a/config/fallback-config.yaml +++ b/config/fallback-config.yaml @@ -1,53 +1,43 @@ # Hermes Agent Fallback Configuration -# Deploy this to Timmy and Ezra for automatic kimi-coding fallback +# Primary: kimi-k2.5 | Fallback: gemini-2.5-pro | Terminal: local Ollama +# Anthropic BANNED per BANNED_PROVIDERS.yml (2026-04-09) -model: anthropic/claude-opus-4.6 - -# Fallback chain: Anthropic -> Kimi -> Ollama (local) +model: kimi-k2.5 fallback_providers: - - provider: kimi-coding - model: kimi-k2.5 - timeout: 60 - reason: "Primary fallback when Anthropic quota limited" - - - provider: ollama - model: qwen2.5:7b - base_url: http://localhost:11434 - timeout: 120 - reason: "Local fallback for offline operation" - -# Provider settings +- provider: kimi-coding + model: kimi-k2.5 + timeout: 60 + reason: Gemini 2.5 Pro via OpenRouter (replaces banned Anthropic) +- provider: openrouter + model: google/gemini-2.5-pro + base_url: https://openrouter.ai/api/v1 + api_key_env: OPENROUTER_API_KEY + timeout: 120 + reason: Gemini 2.5 Pro via OpenRouter (replaces banned Anthropic) +- provider: ollama + model: qwen2.5:7b + base_url: http://localhost:11434 + timeout: 120 + reason: Local fallback for offline operation providers: - anthropic: - timeout: 30 - retry_on_quota: true - max_retries: 2 - kimi-coding: timeout: 60 max_retries: 3 - ollama: timeout: 120 keep_alive: true - -# Toolsets toolsets: - - hermes-cli - - github - - web - -# Agent settings +- hermes-cli +- github +- web agent: max_turns: 90 tool_use_enforcement: auto fallback_on_errors: - - rate_limit_exceeded - - quota_exceeded - - timeout - - service_unavailable - -# Display settings + - rate_limit_exceeded + - quota_exceeded + - timeout + - service_unavailable display: show_fallback_notifications: true show_provider_switches: true diff --git a/config/timmy-deploy.sh b/config/timmy-deploy.sh index afe23e863..85d37bbff 100755 --- a/config/timmy-deploy.sh +++ b/config/timmy-deploy.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Deploy fallback config to Timmy +# Deploy kimi-primary config to Timmy (Anthropic BANNED) # Run this from Timmy's VPS or via SSH set -e @@ -50,10 +50,10 @@ sleep 2 ssh root@$TIMMY_HOST "cd $TIMMY_HERMES_HOME && nohup python -m gateway.run > logs/gateway.log 2>&1 &" echo "" -echo -e "${GREEN}[SUCCESS]${NC} Timmy is now running with Anthropic + Kimi fallback!" +echo -e "${GREEN}[SUCCESS]${NC} Timmy is now running with Kimi primary + Gemini fallback!" echo "" -echo "Anthropic: PRIMARY (with quota retry)" -echo "Kimi: FALLBACK ✓" +echo "Kimi: PRIMARY ✓" +echo "Gemini: FALLBACK (via OpenRouter) ✓" echo "Ollama: LOCAL FALLBACK ✓" echo "" echo "To verify: ssh root@$TIMMY_HOST 'tail -f $TIMMY_HERMES_HOME/logs/gateway.log'"