Some checks failed
PR Checklist / pr-checklist (pull_request) Failing after 1m18s
Golden state: Kimi K2.5 primary → Gemini via OpenRouter → local Ollama. Anthropic is gone from every active config, fallback chain, and loop script. Wizard configs (3): - allegro, bezalel, ezra: removed anthropic from fallback_providers, replaced with gemini + ollama. Removed anthropic provider section. Playbooks (7): - All playbooks now use kimi-k2.5 as preferred, google/gemini-2.5-pro as fallback. No claude model references remain. Fleet scripts (8): - claude-loop.sh: deprecated (exit 0, original preserved as reference) - claudemax-watchdog.sh: deprecated (exit 0) - agent-loop.sh: removed claude dispatch case - start-loops.sh: removed claude-locks, claude-loop from proc list - timmy-orchestrator.sh: removed claude worker monitoring - fleet-status.sh: zeroed claude loop counter - model-health-check.sh: replaced check_anthropic_model with check_kimi_model - ops-gitea.sh, ops-helpers.sh, ops-panel.sh: removed claude from agent lists Infrastructure (5): - wizard_bootstrap.py: removed anthropic pip package and API key checks - WIZARD_ENVIRONMENT_CONTRACT.md: replaced ANTHROPIC keys with KIMI - DEPLOY.md: replaced ANTHROPIC_API_KEY with KIMI_API_KEY - fallback-portfolios.yaml: replaced anthropic provider with kimi-coding - fleet-vocabulary.md: updated Ezra and Claude entries to Kimi K2.5 Docs (2): - sonnet-workforce.md: deprecated with notice - GoldenRockachopa-checkin.md: updated model references Preserved (not touched): - training/ data (changing would corrupt training set) - evaluations/ (historical benchmarks) - RELEASE_*.md (changelogs) - metrics_helpers.py (historical cost calculation) - hermes-sovereign/githooks/pre-commit.py (secret detection - still useful) - security/secret-scan.yml (key detection - still useful) - architecture_linter.py (warns about anthropic usage - desired behavior) - test_sovereignty_enforcement.py (tests anthropic is blocked - correct) - son-of-timmy.md philosophical references (Claude as one of many backends) Refs: Sovereignty directive, zero-cloud vision
126 lines
3.2 KiB
Bash
Executable File
126 lines
3.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# model-health-check.sh — Validate all configured model tags before loop startup
|
|
# Reads config.yaml, extracts model tags, tests each against its provider API.
|
|
# Exit 1 if primary model is dead. Warnings for auxiliary models.
|
|
|
|
set -euo pipefail
|
|
|
|
CONFIG="${HERMES_HOME:-$HOME/.hermes}/config.yaml"
|
|
LOG_DIR="$HOME/.hermes/logs"
|
|
LOG_FILE="$LOG_DIR/model-health.log"
|
|
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
log() {
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE"
|
|
}
|
|
|
|
PASS=0
|
|
FAIL=0
|
|
WARN=0
|
|
|
|
check_kimi_model() {
|
|
local model="$1"
|
|
local label="$2"
|
|
local api_key="${KIMI_API_KEY:-}"
|
|
|
|
if [ -z "$api_key" ]; then
|
|
# Try loading from .env
|
|
api_key=$(grep '^KIMI_API_KEY=' "${HERMES_HOME:-$HOME/.hermes}/.env" 2>/dev/null | head -1 | cut -d= -f2- | tr -d "'\"" || echo "")
|
|
fi
|
|
|
|
if [ -z "$api_key" ]; then
|
|
log "SKIP [$label] $model -- no KIMI_API_KEY"
|
|
return 0
|
|
fi
|
|
|
|
response=$(curl -sf --max-time 10 -X POST \
|
|
"https://api.kimi.com/v1/messages" \
|
|
-H "Authorization: Bearer: ${api_key}" \
|
|
-H "content-type: application/json" \
|
|
-H "content-type: application/json" \
|
|
-d "{\"model\":\"${model}\",\"max_tokens\":1,\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}" 2>&1 || echo "ERROR")
|
|
|
|
if echo "$response" | grep -q '"not_found_error"'; then
|
|
log "FAIL [$label] $model -- model not found (404)"
|
|
return 1
|
|
elif echo "$response" | grep -q '"rate_limit_error"\|"overloaded_error"'; then
|
|
log "PASS [$label] $model -- rate limited but model exists"
|
|
return 0
|
|
elif echo "$response" | grep -q '"content"'; then
|
|
log "PASS [$label] $model -- healthy"
|
|
return 0
|
|
elif echo "$response" | grep -q 'ERROR'; then
|
|
log "WARN [$label] $model -- could not reach API"
|
|
return 2
|
|
else
|
|
log "PASS [$label] $model -- responded (non-404)"
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
# Extract models from config
|
|
log "=== Model Health Check ==="
|
|
|
|
# Primary model
|
|
primary=$(python3 -c "
|
|
import yaml
|
|
with open('$CONFIG') as f:
|
|
c = yaml.safe_load(f)
|
|
m = c.get('model', {})
|
|
if isinstance(m, dict):
|
|
print(m.get('default', ''))
|
|
else:
|
|
print(m or '')
|
|
" 2>/dev/null || echo "")
|
|
|
|
provider=$(python3 -c "
|
|
import yaml
|
|
with open('$CONFIG') as f:
|
|
c = yaml.safe_load(f)
|
|
m = c.get('model', {})
|
|
if isinstance(m, dict):
|
|
print(m.get('provider', ''))
|
|
else:
|
|
print('')
|
|
" 2>/dev/null || echo "")
|
|
|
|
if [ -n "$primary" ] && [ "$provider" = "anthropic" ]; then
|
|
if check_anthropic_model "$primary" "PRIMARY"; then
|
|
PASS=$((PASS + 1))
|
|
else
|
|
rc=$?
|
|
if [ "$rc" -eq 1 ]; then
|
|
FAIL=$((FAIL + 1))
|
|
log "CRITICAL: Primary model $primary is DEAD. Loops will fail."
|
|
log "Known good alternatives: claude-opus-4.6, claude-haiku-4-5-20251001"
|
|
else
|
|
WARN=$((WARN + 1))
|
|
fi
|
|
fi
|
|
elif [ -n "$primary" ]; then
|
|
log "SKIP [PRIMARY] $primary -- non-anthropic provider ($provider), no validator yet"
|
|
fi
|
|
|
|
# Cron model check (haiku)
|
|
CRON_MODEL="claude-haiku-4-5-20251001"
|
|
if check_anthropic_model "$CRON_MODEL" "CRON"; then
|
|
PASS=$((PASS + 1))
|
|
else
|
|
rc=$?
|
|
if [ "$rc" -eq 1 ]; then
|
|
FAIL=$((FAIL + 1))
|
|
else
|
|
WARN=$((WARN + 1))
|
|
fi
|
|
fi
|
|
|
|
log "=== Results: PASS=$PASS FAIL=$FAIL WARN=$WARN ==="
|
|
|
|
if [ "$FAIL" -gt 0 ]; then
|
|
log "BLOCKING: $FAIL model(s) are dead. Fix config before starting loops."
|
|
exit 1
|
|
fi
|
|
|
|
exit 0
|