Files
timmy-config/bin/model-health-check.sh
Perplexity Computer aeefe5027d
Some checks failed
Architecture Lint / Linter Tests (push) Successful in 10s
Smoke Test / smoke (push) Failing after 8s
Validate Config / YAML Lint (push) Failing after 6s
Validate Config / JSON Validate (push) Successful in 6s
Validate Config / Python Syntax & Import Check (push) Failing after 7s
Validate Config / Python Test Suite (push) Has been skipped
Validate Config / Shell Script Lint (push) Failing after 14s
Validate Config / Cron Syntax Check (push) Successful in 5s
Validate Config / Deploy Script Dry Run (push) Successful in 4s
Validate Config / Playbook Schema Validation (push) Successful in 8s
Architecture Lint / Lint Repository (push) Failing after 8s
purge: remove Anthropic from timmy-config (14 files) (#502)
2026-04-13 02:02:06 +00:00

126 lines
3.2 KiB
Bash
Executable File

#!/usr/bin/env bash
# model-health-check.sh — Validate all configured model tags before loop startup
# Reads config.yaml, extracts model tags, tests each against its provider API.
# Exit 1 if primary model is dead. Warnings for auxiliary models.
set -euo pipefail
CONFIG="${HERMES_HOME:-$HOME/.hermes}/config.yaml"
LOG_DIR="$HOME/.hermes/logs"
LOG_FILE="$LOG_DIR/model-health.log"
mkdir -p "$LOG_DIR"
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE"
}
PASS=0
FAIL=0
WARN=0
check_kimi_model() {
local model="$1"
local label="$2"
local api_key="${KIMI_API_KEY:-}"
if [ -z "$api_key" ]; then
# Try loading from .env
api_key=$(grep '^KIMI_API_KEY=' "${HERMES_HOME:-$HOME/.hermes}/.env" 2>/dev/null | head -1 | cut -d= -f2- | tr -d "'\"" || echo "")
fi
if [ -z "$api_key" ]; then
log "SKIP [$label] $model -- no KIMI_API_KEY"
return 0
fi
response=$(curl -sf --max-time 10 -X POST \
"https://api.kimi.com/coding/v1/chat/completions" \
-H "x-api-key: ${api_key}" \
-H "x-api-provider: kimi-coding" \
-H "content-type: application/json" \
-d "{\"model\":\"${model}\",\"max_tokens\":1,\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}" 2>&1 || echo "ERROR")
if echo "$response" | grep -q '"not_found_error"'; then
log "FAIL [$label] $model -- model not found (404)"
return 1
elif echo "$response" | grep -q '"rate_limit_error"\|"overloaded_error"'; then
log "PASS [$label] $model -- rate limited but model exists"
return 0
elif echo "$response" | grep -q '"content"'; then
log "PASS [$label] $model -- healthy"
return 0
elif echo "$response" | grep -q 'ERROR'; then
log "WARN [$label] $model -- could not reach API"
return 2
else
log "PASS [$label] $model -- responded (non-404)"
return 0
fi
}
# Extract models from config
log "=== Model Health Check ==="
# Primary model
primary=$(python3 -c "
import yaml
with open('$CONFIG') as f:
c = yaml.safe_load(f)
m = c.get('model', {})
if isinstance(m, dict):
print(m.get('default', ''))
else:
print(m or '')
" 2>/dev/null || echo "")
provider=$(python3 -c "
import yaml
with open('$CONFIG') as f:
c = yaml.safe_load(f)
m = c.get('model', {})
if isinstance(m, dict):
print(m.get('provider', ''))
else:
print('')
" 2>/dev/null || echo "")
if [ -n "$primary" ] && [ "$provider" = "kimi-coding" ]; then
if check_kimi_model "$primary" "PRIMARY"; then
PASS=$((PASS + 1))
else
rc=$?
if [ "$rc" -eq 1 ]; then
FAIL=$((FAIL + 1))
log "CRITICAL: Primary model $primary is DEAD. Loops will fail."
log "Known good alternatives: kimi-k2.5, google/gemini-2.5-pro"
else
WARN=$((WARN + 1))
fi
fi
elif [ -n "$primary" ]; then
log "SKIP [PRIMARY] $primary -- non-kimi provider ($provider), no validator yet"
fi
# Cron model check (haiku)
CRON_MODEL="kimi-k2.5"
if check_kimi_model "$CRON_MODEL" "CRON"; then
PASS=$((PASS + 1))
else
rc=$?
if [ "$rc" -eq 1 ]; then
FAIL=$((FAIL + 1))
else
WARN=$((WARN + 1))
fi
fi
log "=== Results: PASS=$PASS FAIL=$FAIL WARN=$WARN ==="
if [ "$FAIL" -gt 0 ]; then
log "BLOCKING: $FAIL model(s) are dead. Fix config before starting loops."
exit 1
fi
exit 0