Compare commits
1 Commits
step35/595
...
step35/428
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fe3fbebd49 |
@@ -7,7 +7,7 @@ Gitea unreachable, etc.), this script diagnoses the failure and applies
|
||||
common sense fallbacks automatically.
|
||||
|
||||
Fallback chain:
|
||||
1. Primary model (Kimi) down -> switch config to local-llama.cpp
|
||||
1. Primary model (Kimi or Anthropic) down -> switch config to local-llama.cpp
|
||||
2. Gitea unreachable -> cache issues locally, retry on recovery
|
||||
3. VPS agents down -> alert + lazarus protocol
|
||||
4. Local llama.cpp down -> try Ollama, then alert-only mode
|
||||
@@ -89,6 +89,36 @@ def check_kimi():
|
||||
return True, f"HTTP {out}"
|
||||
return False, f"HTTP {out} err={err[:80]}"
|
||||
|
||||
|
||||
def check_anthropic():
|
||||
"""Can we reach Anthropic API?"""
|
||||
key = os.environ.get("ANTHROPIC_API_KEY", "")
|
||||
if not key:
|
||||
# Check multiple .env locations
|
||||
for env_path in [HERMES_HOME / ".env", Path.home() / ".hermes" / ".env"]:
|
||||
if env_path.exists():
|
||||
for line in open(env_path):
|
||||
line = line.strip()
|
||||
if line.startswith("ANTHROPIC_API_KEY="):
|
||||
key = line.split("=", 1)[1].strip().strip('"').strip("'")
|
||||
break
|
||||
if key:
|
||||
break
|
||||
if not key:
|
||||
return False, "no API key"
|
||||
code, out, err = run(
|
||||
f'curl -s -o /dev/null -w "%{{http_code}}" -H "x-api-key: {key}" '
|
||||
f'-H "anthropic-version: 2023-06-01" '
|
||||
f'https://api.anthropic.com/v1/messages -X POST '
|
||||
f'-H "content-type: application/json" '
|
||||
f'-d \'{{"model":"claude-haiku-4-5-20251001","max_tokens":1,"messages":[{{"role":"user","content":"ping"}}]}}\' ',
|
||||
timeout=15
|
||||
)
|
||||
if code == 0 and out in ("200", "429"):
|
||||
return True, f"HTTP {out}"
|
||||
return False, f"HTTP {out} err={err[:80]}"
|
||||
|
||||
|
||||
def check_local_llama():
|
||||
"""Is local llama.cpp serving?"""
|
||||
code, out, err = run("curl -s http://localhost:8081/v1/models", timeout=5)
|
||||
@@ -179,6 +209,9 @@ def diagnose_and_fallback():
|
||||
kimi_ok, kimi_msg = check_kimi()
|
||||
results["checks"]["kimi-coding"] = {"ok": kimi_ok, "msg": kimi_msg}
|
||||
|
||||
anthropic_ok, anthropic_msg = check_anthropic()
|
||||
results["checks"]["anthropic"] = {"ok": anthropic_ok, "msg": anthropic_msg}
|
||||
|
||||
llama_ok, llama_msg = check_local_llama()
|
||||
results["checks"]["local_llama"] = {"ok": llama_ok, "msg": llama_msg}
|
||||
|
||||
@@ -202,34 +235,59 @@ def diagnose_and_fallback():
|
||||
|
||||
# ─── FALLBACK LOGIC ───
|
||||
|
||||
# Case 1: Primary (Kimi) down, local available
|
||||
if not kimi_ok and current_provider == "kimi-coding":
|
||||
# Determine primary health based on current provider
|
||||
if current_provider == "kimi-coding":
|
||||
primary_ok = kimi_ok
|
||||
elif current_provider == "anthropic":
|
||||
primary_ok = anthropic_ok
|
||||
else:
|
||||
primary_ok = False # unknown or fallback providers are not primary
|
||||
|
||||
# Build dynamic fallback tags for this provider
|
||||
fallback_tag_local = f"{current_provider}->local-llama"
|
||||
fallback_tag_ollama = f"{current_provider}->ollama"
|
||||
|
||||
# Case 1: Primary down (and we are on a known primary), trigger fallback chain
|
||||
if current_provider in ("kimi-coding", "anthropic") and not primary_ok:
|
||||
if llama_ok:
|
||||
msg = fallback_to_local_model(cfg)
|
||||
results["actions"].append(msg)
|
||||
state["active_fallbacks"].append("kimi->local-llama")
|
||||
state["active_fallbacks"].append(fallback_tag_local)
|
||||
results["status"] = "degraded_local"
|
||||
elif ollama_ok:
|
||||
msg = fallback_to_ollama(cfg)
|
||||
results["actions"].append(msg)
|
||||
state["active_fallbacks"].append("kimi->ollama")
|
||||
state["active_fallbacks"].append(fallback_tag_ollama)
|
||||
results["status"] = "degraded_ollama"
|
||||
else:
|
||||
msg = enter_safe_mode(state)
|
||||
results["actions"].append(msg)
|
||||
results["status"] = "safe_mode"
|
||||
|
||||
# Case 2: Already on fallback, check if primary recovered
|
||||
elif kimi_ok and "kimi->local-llama" in state.get("active_fallbacks", []):
|
||||
msg = restore_config()
|
||||
results["actions"].append(msg)
|
||||
state["active_fallbacks"].remove("kimi->local-llama")
|
||||
results["status"] = "recovered"
|
||||
elif kimi_ok and "kimi->ollama" in state.get("active_fallbacks", []):
|
||||
msg = restore_config()
|
||||
results["actions"].append(msg)
|
||||
state["active_fallbacks"].remove("kimi->ollama")
|
||||
results["status"] = "recovered"
|
||||
# Case 2: Already on fallback (current provider is not a primary), check if original primary recovered
|
||||
active = state.get("active_fallbacks", [])
|
||||
if current_provider not in ("kimi-coding", "anthropic"):
|
||||
# We're on a fallback provider; check recovery for whichever primary tag we have
|
||||
if kimi_ok and "kimi->local-llama" in active:
|
||||
msg = restore_config()
|
||||
results["actions"].append(msg)
|
||||
state["active_fallbacks"].remove("kimi->local-llama")
|
||||
results["status"] = "recovered"
|
||||
elif kimi_ok and "kimi->ollama" in active:
|
||||
msg = restore_config()
|
||||
results["actions"].append(msg)
|
||||
state["active_fallbacks"].remove("kimi->ollama")
|
||||
results["status"] = "recovered"
|
||||
elif anthropic_ok and "anthropic->local-llama" in active:
|
||||
msg = restore_config()
|
||||
results["actions"].append(msg)
|
||||
state["active_fallbacks"].remove("anthropic->local-llama")
|
||||
results["status"] = "recovered"
|
||||
elif anthropic_ok and "anthropic->ollama" in active:
|
||||
msg = restore_config()
|
||||
results["actions"].append(msg)
|
||||
state["active_fallbacks"].remove("anthropic->ollama")
|
||||
results["status"] = "recovered"
|
||||
|
||||
# Case 3: Gitea down — just flag it, work locally
|
||||
if not gitea_ok:
|
||||
|
||||
Reference in New Issue
Block a user