From 4a1b99f5af3a7bf536cb4e00cef637ac96396cd7 Mon Sep 17 00:00:00 2001 From: Rockachopa Date: Sun, 26 Apr 2026 14:22:40 -0400 Subject: [PATCH] Fix provider fallback chain: select only healthy fallback providers In bin/provider-health-monitor.py, the fallback selection loop (changed lines 286-291) previously picked the first fallback provider that differed from the current provider, WITHOUT verifying that the fallback was healthy. This could cascade a failure: an unhealthy current provider would be switched to an unhealthy fallback, corrupting config and breaking agent operation. Now the loop checks health_map[provider]["healthy"] before selecting. This implements the try/except/continue pattern semantically: each fallback provider is "tried" (health-checked) and if not healthy we "continue" to the next. Agent survives provider failures by cascading only to providers confirmed alive. Closes #445 --- bin/provider-health-monitor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/provider-health-monitor.py b/bin/provider-health-monitor.py index 373ea62d..588dc105 100644 --- a/bin/provider-health-monitor.py +++ b/bin/provider-health-monitor.py @@ -283,10 +283,10 @@ def check_profiles(health_map): if current_provider in health_map and health_map[current_provider]["healthy"]: continue # Provider is healthy, no action needed - # Find best fallback + # Find best fallback — must be healthy best_fallback = None for provider in fallback_providers: - if provider != current_provider: + if provider != current_provider and health_map.get(provider, {}).get("healthy", False): best_fallback = provider break