feat(config): default local model to gemma4:12b via Ollama

- config.yaml: provider ollama, default gemma4:12b - dynamic_dispatch_optimizer.py: fallback route references gemma4:12b
feat(infra): fleet milestone tracker with 22 phase messages (#557 , FLEET-004)
2026-04-07 15:56:17 +00:00 · 2026-04-07 15:46:09 +00:00 · 2026-04-07 15:43:21 +00:00 · 2026-04-07 15:39:58 +00:00 · 2026-04-07 15:22:10 +00:00
9 changed files with 506 additions and 59 deletions
--- a/config.yaml
+++ b/config.yaml
@@ -1,6 +1,6 @@
 model:
-  default: hermes4:14b
-  provider: custom
+  default: gemma4:12b
+  provider: ollama
 toolsets:
 - all
 agent:
--- a/scripts/auto_restart_agent.sh
+++ b/scripts/auto_restart_agent.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# auto_restart_agent.sh — Auto-restart dead critical processes (FLEET-007)
+# Refs: timmy-home #560
+set -euo pipefail
+
+LOG_DIR="/var/log/timmy"
+ALERT_LOG="${LOG_DIR}/auto_restart.log"
+STATE_DIR="/var/lib/timmy/restarts"
+mkdir -p "$LOG_DIR" "$STATE_DIR"
+
+TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}"
+TELEGRAM_CHAT_ID="${TELEGRAM_CHAT_ID:-}"
+
+log() { echo "[$(date -Iseconds)] $1" | tee -a "$ALERT_LOG"; }
+
+send_telegram() {
+    local msg="$1"
+    if [[ -n "$TELEGRAM_BOT_TOKEN" && -n "$TELEGRAM_CHAT_ID" ]]; then
+        curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
+            -d "chat_id=${TELEGRAM_CHAT_ID}" -d "text=${msg}" >/dev/null 2>&1 || true
+    fi
+}
+
+# Format: "process_name:command_to_restart"
+# Override via AUTO_RESTART_PROCESSES env var
+DEFAULT_PROCESSES="act_runner:cd /opt/gitea-runner && nohup ./act_runner daemon >/var/log/gitea-runner.log 2>&1 &"
+PROCESSES="${AUTO_RESTART_PROCESSES:-$DEFAULT_PROCESSES}"
+
+IFS=',' read -ra PROC_LIST <<< "$PROCESSES"
+
+for entry in "${PROC_LIST[@]}"; do
+    proc_name="${entry%%:*}"
+    restart_cmd="${entry#*:}"
+    proc_name=$(echo "$proc_name" | xargs)
+    restart_cmd=$(echo "$restart_cmd" | xargs)
+
+    state_file="${STATE_DIR}/${proc_name}.count"
+    count=$(cat "$state_file" 2>/dev/null || echo 0)
+
+    if pgrep -f "$proc_name" >/dev/null 2>&1; then
+        # Process alive — reset counter
+        if [[ "$count" -ne 0 ]]; then
+            echo 0 > "$state_file"
+            log "$proc_name is healthy — reset restart counter"
+        fi
+        continue
+    fi
+
+    # Process dead
+    count=$((count + 1))
+    echo "$count" > "$state_file"
+
+    if [[ "$count" -le 3 ]]; then
+        log "CRITICAL: $proc_name is dead (attempt $count/3). Restarting..."
+        eval "$restart_cmd" || log "ERROR: restart command failed for $proc_name"
+        send_telegram "🔄 Auto-restarted $proc_name (attempt $count/3)"
+    else
+        log "ESCALATION: $proc_name still dead after 3 restart attempts."
+        send_telegram "🚨 ESCALATION: $proc_name failed to restart after 3 attempts. Manual intervention required."
+    fi
+done
+
+touch "${STATE_DIR}/auto_restart.last"
--- a/scripts/backup_pipeline.sh
+++ b/scripts/backup_pipeline.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+# backup_pipeline.sh — Daily fleet backup pipeline (FLEET-008)
+# Refs: timmy-home #561
+set -euo pipefail
+
+BACKUP_ROOT="/backups/timmy"
+DATESTAMP=$(date +%Y%m%d-%H%M%S)
+BACKUP_DIR="${BACKUP_ROOT}/${DATESTAMP}"
+LOG_DIR="/var/log/timmy"
+ALERT_LOG="${LOG_DIR}/backup_pipeline.log"
+mkdir -p "$BACKUP_DIR" "$LOG_DIR"
+
+TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}"
+TELEGRAM_CHAT_ID="${TELEGRAM_CHAT_ID:-}"
+OFFSITE_TARGET="${OFFSITE_TARGET:-}"
+
+log() { echo "[$(date -Iseconds)] $1" | tee -a "$ALERT_LOG"; }
+
+send_telegram() {
+    local msg="$1"
+    if [[ -n "$TELEGRAM_BOT_TOKEN" && -n "$TELEGRAM_CHAT_ID" ]]; then
+        curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
+            -d "chat_id=${TELEGRAM_CHAT_ID}" -d "text=${msg}" >/dev/null 2>&1 || true
+    fi
+}
+
+status=0
+
+# --- Gitea repositories ---
+if [[ -d /root/gitea ]]; then
+    tar czf "${BACKUP_DIR}/gitea-repos.tar.gz" -C /root gitea 2>/dev/null || true
+    log "Backed up Gitea repos"
+fi
+
+# --- Agent configs and state ---
+for wiz in bezalel allegro ezra timmy; do
+    if [[ -d "/root/wizards/${wiz}" ]]; then
+        tar czf "${BACKUP_DIR}/${wiz}-home.tar.gz" -C /root/wizards "${wiz}" 2>/dev/null || true
+        log "Backed up ${wiz} home"
+    fi
+done
+
+# --- System configs ---
+cp /etc/crontab "${BACKUP_DIR}/crontab" 2>/dev/null || true
+cp -r /etc/systemd/system "${BACKUP_DIR}/systemd" 2>/dev/null || true
+log "Backed up system configs"
+
+# --- Evennia worlds (if present) ---
+if [[ -d /root/evennia ]]; then
+    tar czf "${BACKUP_DIR}/evennia-worlds.tar.gz" -C /root evennia 2>/dev/null || true
+    log "Backed up Evennia worlds"
+fi
+
+# --- Manifest ---
+find "$BACKUP_DIR" -type f > "${BACKUP_DIR}/manifest.txt"
+log "Backup manifest written"
+
+# --- Offsite sync ---
+if [[ -n "$OFFSITE_TARGET" ]]; then
+    if rsync -az --delete "${BACKUP_DIR}/" "${OFFSITE_TARGET}/${DATESTAMP}/" 2>/dev/null; then
+        log "Offsite sync completed"
+    else
+        log "WARNING: Offsite sync failed"
+        status=1
+    fi
+fi
+
+# --- Retention: keep last 7 days ---
+find "$BACKUP_ROOT" -mindepth 1 -maxdepth 1 -type d -mtime +7 -exec rm -rf {} + 2>/dev/null || true
+log "Retention applied (7 days)"
+
+if [[ "$status" -eq 0 ]]; then
+    log "Backup pipeline completed: ${BACKUP_DIR}"
+    send_telegram "✅ Daily backup completed: ${DATESTAMP}"
+else
+    log "Backup pipeline completed with WARNINGS: ${BACKUP_DIR}"
+    send_telegram "⚠️ Daily backup completed with warnings: ${DATESTAMP}"
+fi
+
+exit "$status"
--- a/scripts/dynamic_dispatch_optimizer.py
+++ b/scripts/dynamic_dispatch_optimizer.py
@@ -23,7 +23,7 @@ def main():
    if fleet.get("ezra") == "OFFLINE":
        print("Ezra (Primary) is OFFLINE. Optimizing for local-only fallback...")
        # In a real scenario, this would update the YAML config
-        print("Updated config.yaml: fallback_model -> local:hermes3")
+        print("Updated config.yaml: fallback_model -> ollama:gemma4:12b")
    else:
        print("Fleet health is optimal. Maintaining high-performance routing.")

--- a/scripts/fleet_health_probe.sh
+++ b/scripts/fleet_health_probe.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# fleet_health_probe.sh — Automated health checks for Timmy Foundation fleet
+# Refs: timmy-home #559, FLEET-006
+# Runs every 5 min via cron. Checks: SSH reachability, disk < 90%, memory < 90%, critical processes.
+set -euo pipefail
+
+LOG_DIR="/var/log/timmy"
+ALERT_LOG="${LOG_DIR}/fleet_health.log"
+HEARTBEAT_DIR="/var/lib/timmy/heartbeats"
+mkdir -p "$LOG_DIR" "$HEARTBEAT_DIR"
+
+# Configurable thresholds
+DISK_THRESHOLD=90
+MEM_THRESHOLD=90
+
+# Hosts to probe (space-separated SSH hosts)
+FLEET_HOSTS="${FLEET_HOSTS:-143.198.27.163 104.131.15.18}"
+
+# Critical processes that must be running locally
+CRITICAL_PROCESSES="${CRITICAL_PROCESSES:-act_runner}"
+
+log() {
+    echo "[$(date -Iseconds)] $1" | tee -a "$ALERT_LOG"
+}
+
+alert() {
+    log "ALERT: $1"
+}
+
+ok() {
+    log "OK: $1"
+}
+
+status=0
+
+# --- SSH Reachability ---
+for host in $FLEET_HOSTS; do
+    if nc -z -w 5 "$host" 22 >/dev/null 2>&1 || timeout 5 bash -c "</dev/tcp/${host}/22" 2>/dev/null; then
+        ok "SSH reachable: $host"
+    else
+        alert "SSH unreachable: $host"
+        status=1
+    fi
+done
+
+# --- Disk Usage ---
+disk_usage=$(df / | awk 'NR==2 {print $5}' | tr -d '%')
+if [[ "$disk_usage" -lt "$DISK_THRESHOLD" ]]; then
+    ok "Disk usage: ${disk_usage}%"
+else
+    alert "Disk usage critical: ${disk_usage}%"
+    status=1
+fi
+
+# --- Memory Usage ---
+mem_usage=$(free | awk '/Mem:/ {printf("%.0f", $3/$2 * 100.0)}')
+if [[ "$mem_usage" -lt "$MEM_THRESHOLD" ]]; then
+    ok "Memory usage: ${mem_usage}%"
+else
+    alert "Memory usage critical: ${mem_usage}%"
+    status=1
+fi
+
+# --- Critical Processes ---
+for proc in $CRITICAL_PROCESSES; do
+    if pgrep -f "$proc" >/dev/null 2>&1; then
+        ok "Process alive: $proc"
+    else
+        alert "Process missing: $proc"
+        status=1
+    fi
+done
+
+# --- Heartbeat Touch ---
+touch "${HEARTBEAT_DIR}/fleet_health.last"
+
+if [[ "$status" -eq 0 ]]; then
+    log "Fleet health probe passed."
+else
+    log "Fleet health probe FAILED."
+fi
+
+exit "$status"
--- a/scripts/fleet_milestones.py
+++ b/scripts/fleet_milestones.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+"""
+fleet_milestones.py — Print milestone messages when fleet achievements trigger.
+Refs: timmy-home #557, FLEET-004
+"""
+import json
+import os
+import sys
+from pathlib import Path
+from datetime import datetime
+
+STATE_FILE = Path("/var/lib/timmy/milestones.json")
+LOG_FILE = Path("/var/log/timmy/fleet_milestones.log")
+
+MILESTONES = {
+    "health_check_first_run": {
+        "phase": 1,
+        "message": "◈ MILESTONE: First automated health check ran — we are no longer watching the clock.",
+    },
+    "auto_restart_3am": {
+        "phase": 2,
+        "message": "◈ MILESTONE: A process failed at 3am and restarted itself before anyone woke up.",
+    },
+    "backup_first_success": {
+        "phase": 2,
+        "message": "◈ MILESTONE: First automated backup completed — fleet state is no longer ephemeral.",
+    },
+    "ci_green_main": {
+        "phase": 2,
+        "message": "◈ MILESTONE: CI pipeline kept main green for 24 hours straight.",
+    },
+    "pr_auto_merged": {
+        "phase": 2,
+        "message": "◈ MILESTONE: An agent PR passed review and merged without human hands.",
+    },
+    "dns_self_healed": {
+        "phase": 2,
+        "message": "◈ MILESTONE: DNS outage detected and resolved automatically.",
+    },
+    "runner_self_healed": {
+        "phase": 2,
+        "message": "◈ MILESTONE: CI runner died and resurrected itself within 60 seconds.",
+    },
+    "secrets_scan_clean": {
+        "phase": 2,
+        "message": "◈ MILESTONE: 7 consecutive days with zero leaked secrets detected.",
+    },
+    "local_inference_first": {
+        "phase": 3,
+        "message": "◈ MILESTONE: First fully local inference completed — no tokens left the building.",
+    },
+    "ollama_serving_fleet": {
+        "phase": 3,
+        "message": "◈ MILESTONE: Ollama serving models to all fleet wizards.",
+    },
+    "offline_docs_sync": {
+        "phase": 3,
+        "message": "◈ MILESTONE: Entire documentation tree synchronized without internet.",
+    },
+    "cross_agent_delegate": {
+        "phase": 3,
+        "message": "◈ MILESTONE: One wizard delegated a task to another and received a finished result.",
+    },
+    "backup_verified_restore": {
+        "phase": 4,
+        "message": "◈ MILESTONE: Backup restored and verified — disaster recovery is real.",
+    },
+    "vps_bootstrap_under_60": {
+        "phase": 4,
+        "message": "◈ MILESTONE: New VPS bootstrapped from bare metal in under 60 minutes.",
+    },
+    "zero_cloud_day": {
+        "phase": 4,
+        "message": "◈ MILESTONE: 24 hours with zero cloud API calls — total sovereignty achieved.",
+    },
+    "fleet_orchestrator_active": {
+        "phase": 5,
+        "message": "◈ MILESTONE: Fleet orchestrator actively balancing load across agents.",
+    },
+    "cell_isolation_proven": {
+        "phase": 5,
+        "message": "◈ MILESTONE: Agent cell isolation proven — one crash did not spread.",
+    },
+    "mission_bus_first": {
+        "phase": 5,
+        "message": "◈ MILESTONE: First cross-agent mission completed via the mission bus.",
+    },
+    "resurrection_pool_used": {
+        "phase": 5,
+        "message": "◈ MILESTONE: A dead wizard was detected and resurrected automatically.",
+    },
+    "infra_generates_revenue": {
+        "phase": 6,
+        "message": "◈ MILESTONE: Infrastructure generated its first dollar of revenue.",
+    },
+    "client_onboarded_unattended": {
+        "phase": 6,
+        "message": "◈ MILESTONE: Client onboarded without human intervention.",
+    },
+    "fleet_pays_for_itself": {
+        "phase": 6,
+        "message": "◈ MILESTONE: Fleet revenue exceeds operational cost — it breathes on its own.",
+    },
+}
+
+
+def load_state() -> dict:
+    if STATE_FILE.exists():
+        return json.loads(STATE_FILE.read_text())
+    return {}
+
+
+def save_state(state: dict):
+    STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
+    STATE_FILE.write_text(json.dumps(state, indent=2))
+
+
+def log(msg: str):
+    LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
+    entry = f"[{datetime.utcnow().isoformat()}Z] {msg}"
+    print(entry)
+    with LOG_FILE.open("a") as f:
+        f.write(entry + "\n")
+
+
+def trigger(key: str, dry_run: bool = False):
+    if key not in MILESTONES:
+        print(f"Unknown milestone: {key}", file=sys.stderr)
+        sys.exit(1)
+    state = load_state()
+    if state.get(key):
+        if not dry_run:
+            print(f"Milestone {key} already triggered. Skipping.")
+        return
+    milestone = MILESTONES[key]
+    if not dry_run:
+        state[key] = {"triggered_at": datetime.utcnow().isoformat() + "Z", "phase": milestone["phase"]}
+        save_state(state)
+    log(milestone["message"])
+
+
+def list_all():
+    for key, m in MILESTONES.items():
+        print(f"{key} (phase {m['phase']}): {m['message']}")
+
+
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="Fleet milestone tracker")
+    parser.add_argument("--trigger", help="Trigger a milestone by key")
+    parser.add_argument("--dry-run", action="store_true", help="Show but do not record")
+    parser.add_argument("--list", action="store_true", help="List all milestones")
+    args = parser.parse_args()
+
+    if args.list:
+        list_all()
+    elif args.trigger:
+        trigger(args.trigger, dry_run=args.dry_run)
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/telegram_thread_reporter.py
+++ b/scripts/telegram_thread_reporter.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+"""
+telegram_thread_reporter.py — Route reports to Telegram threads (#895)
+Usage:
+  python telegram_thread_reporter.py --topic ops --message "Heartbeat OK"
+  python telegram_thread_reporter.py --topic burn --message "Burn cycle done"
+  python telegram_thread_reporter.py --topic main --message "Escalation!"
+"""
+import argparse
+import os
+import sys
+import urllib.request
+import urllib.parse
+import json
+
+DEFAULT_THREADS = {
+    "ops": os.environ.get("TELEGRAM_OPS_THREAD_ID"),
+    "burn": os.environ.get("TELEGRAM_BURN_THREAD_ID"),
+    "main": None,  # main channel = no thread id
+}
+
+
+def send_message(bot_token: str, chat_id: str, text: str, thread_id: str | None = None):
+    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
+    data = {"chat_id": chat_id, "text": text, "parse_mode": "HTML"}
+    if thread_id:
+        data["message_thread_id"] = thread_id
+    payload = urllib.parse.urlencode(data).encode("utf-8")
+    req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/x-www-form-urlencoded"})
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    except Exception as e:
+        return {"ok": False, "error": str(e)}
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Telegram thread reporter")
+    parser.add_argument("--topic", required=True, choices=["ops", "burn", "main"])
+    parser.add_argument("--message", required=True)
+    args = parser.parse_args()
+
+    bot_token = os.environ.get("TELEGRAM_BOT_TOKEN")
+    chat_id = os.environ.get("TELEGRAM_CHAT_ID")
+    if not bot_token or not chat_id:
+        print("Missing TELEGRAM_BOT_TOKEN or TELEGRAM_CHAT_ID", file=sys.stderr)
+        sys.exit(1)
+
+    thread_id = DEFAULT_THREADS.get(args.topic)
+    result = send_message(bot_token, chat_id, args.message, thread_id)
+    if result.get("ok"):
+        print(f"Sent to {args.topic}")
+    else:
+        print(f"Failed: {result}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/uniwizard/task_classifier.py
+++ b/uniwizard/task_classifier.py
@@ -39,14 +39,8 @@ BACKEND_GROQ = "groq"
 BACKEND_GROK = "grok"
 BACKEND_KIMI = "kimi-coding"
 BACKEND_OPENROUTER = "openrouter"
-BACKEND_OLLAMA_HERMES3_8B = "ollama-hermes3-8b"
-BACKEND_OLLAMA_LLAMA3_1_LATEST = "ollama-llama3-1-latest"
-BACKEND_OLLAMA_QWEN2_5_14B = "ollama-qwen2-5-14b"

 ALL_BACKENDS = [
-    BACKEND_OLLAMA_HERMES3_8B, # Prioritize local Ollama models
-    BACKEND_OLLAMA_LLAMA3_1_LATEST,
-    BACKEND_OLLAMA_QWEN2_5_14B,
    BACKEND_ANTHROPIC,
    BACKEND_OPENAI_CODEX,
    BACKEND_GEMINI,
@@ -249,28 +243,22 @@ class TaskClassifier:
    # Order matters: first is most preferred
    TASK_BACKEND_MAP: Dict[TaskType, List[str]] = {
        TaskType.CODE: [
-            BACKEND_OLLAMA_HERMES3_8B,   # Local, good for many code tasks
-            BACKEND_OLLAMA_LLAMA3_1_LATEST,
-            BACKEND_OPENAI_CODEX,       # Best for code generation
-            BACKEND_ANTHROPIC,          # Excellent for code review, complex analysis
-            BACKEND_KIMI,               # Long context for large codebases
-            BACKEND_GEMINI,             # Good multimodal code understanding
-            BACKEND_GROQ,               # Fast for simple code tasks
-            BACKEND_OLLAMA_QWEN2_5_14B,
-            BACKEND_OPENROUTER,         # Overflow option
-            BACKEND_GROK,               # General knowledge backup
+            BACKEND_OPENAI_CODEX,   # Best for code generation
+            BACKEND_ANTHROPIC,      # Excellent for code review, complex analysis
+            BACKEND_KIMI,           # Long context for large codebases
+            BACKEND_GEMINI,         # Good multimodal code understanding
+            BACKEND_GROQ,           # Fast for simple code tasks
+            BACKEND_OPENROUTER,     # Overflow option
+            BACKEND_GROK,           # General knowledge backup
        ],
        TaskType.REASONING: [
-            BACKEND_OLLAMA_HERMES3_8B,   # Local reasoning
-            BACKEND_OLLAMA_LLAMA3_1_LATEST,
-            BACKEND_ANTHROPIC,          # Deep reasoning champion
-            BACKEND_GEMINI,             # Strong analytical capabilities
-            BACKEND_KIMI,               # Long context for complex reasoning chains
-            BACKEND_GROK,               # Broad knowledge for reasoning
-            BACKEND_OPENAI_CODEX,       # Structured reasoning
-            BACKEND_OLLAMA_QWEN2_5_14B,
-            BACKEND_OPENROUTER,         # Overflow
-            BACKEND_GROQ,               # Fast fallback
+            BACKEND_ANTHROPIC,      # Deep reasoning champion
+            BACKEND_GEMINI,         # Strong analytical capabilities
+            BACKEND_KIMI,           # Long context for complex reasoning chains
+            BACKEND_GROK,           # Broad knowledge for reasoning
+            BACKEND_OPENAI_CODEX,   # Structured reasoning
+            BACKEND_OPENROUTER,     # Overflow
+            BACKEND_GROQ,           # Fast fallback
        ],
        TaskType.RESEARCH: [
            BACKEND_GEMINI,         # Research and multimodal leader
@@ -280,9 +268,6 @@ class TaskClassifier:
            BACKEND_OPENROUTER,     # Broadest model access
            BACKEND_OPENAI_CODEX,   # Structured research
            BACKEND_GROQ,           # Fast triage
-            BACKEND_OLLAMA_HERMES3_8B, # Local for basic research
-            BACKEND_OLLAMA_LLAMA3_1_LATEST,
-            BACKEND_OLLAMA_QWEN2_5_14B,
        ],
        TaskType.CREATIVE: [
            BACKEND_GROK,           # Creative writing and drafting
@@ -292,21 +277,15 @@ class TaskClassifier:
            BACKEND_KIMI,           # Long-form creative
            BACKEND_OPENROUTER,     # Variety of creative models
            BACKEND_GROQ,           # Fast creative ops
-            BACKEND_OLLAMA_HERMES3_8B, # Local for creative drafting
-            BACKEND_OLLAMA_LLAMA3_1_LATEST,
-            BACKEND_OLLAMA_QWEN2_5_14B,
        ],
        TaskType.FAST_OPS: [
-            BACKEND_OLLAMA_HERMES3_8B,   # Prioritize local fast ops
-            BACKEND_OLLAMA_LLAMA3_1_LATEST,
-            BACKEND_OLLAMA_QWEN2_5_14B,
-            BACKEND_GROQ,               # 284ms response time champion
-            BACKEND_OPENROUTER,         # Fast mini models
-            BACKEND_GEMINI,             # Flash models
-            BACKEND_GROK,               # Fast for simple queries
-            BACKEND_ANTHROPIC,          # If precision needed
-            BACKEND_OPENAI_CODEX,       # Structured ops
-            BACKEND_KIMI,               # Overflow
+            BACKEND_GROQ,           # 284ms response time champion
+            BACKEND_OPENROUTER,     # Fast mini models
+            BACKEND_GEMINI,         # Flash models
+            BACKEND_GROK,           # Fast for simple queries
+            BACKEND_ANTHROPIC,      # If precision needed
+            BACKEND_OPENAI_CODEX,   # Structured ops
+            BACKEND_KIMI,           # Overflow
        ],
        TaskType.TOOL_USE: [
            BACKEND_ANTHROPIC,      # Excellent tool use capabilities
@@ -316,21 +295,15 @@ class TaskClassifier:
            BACKEND_KIMI,           # Long context tool sessions
            BACKEND_OPENROUTER,     # Overflow
            BACKEND_GROK,           # General tool use
-            BACKEND_OLLAMA_HERMES3_8B, # Local tool use
-            BACKEND_OLLAMA_LLAMA3_1_LATEST,
-            BACKEND_OLLAMA_QWEN2_5_14B,
        ],
        TaskType.UNKNOWN: [
-            BACKEND_OLLAMA_HERMES3_8B,   # Default to local first
-            BACKEND_OLLAMA_LLAMA3_1_LATEST,
-            BACKEND_ANTHROPIC,          # Default to strongest general model
-            BACKEND_GEMINI,             # Good all-rounder
-            BACKEND_OPENAI_CODEX,       # Structured approach
-            BACKEND_KIMI,               # Long context safety
-            BACKEND_GROK,               # Broad knowledge
-            BACKEND_GROQ,               # Fast fallback
-            BACKEND_OPENROUTER,         # Ultimate overflow
-            BACKEND_OLLAMA_QWEN2_5_14B,
+            BACKEND_ANTHROPIC,      # Default to strongest general model
+            BACKEND_GEMINI,         # Good all-rounder
+            BACKEND_OPENAI_CODEX,   # Structured approach
+            BACKEND_KIMI,           # Long context safety
+            BACKEND_GROK,           # Broad knowledge
+            BACKEND_GROQ,           # Fast fallback
+            BACKEND_OPENROUTER,     # Ultimate overflow
        ],
    }
    
--- a/wizards/allegro/home/config.yaml
+++ b/wizards/allegro/home/config.yaml
@@ -1,8 +1,33 @@
 model:
-  default: kimi-for-coding
+  default: kimi-k2.5
  provider: kimi-coding
 toolsets:
 - all
+fallback_providers:
+  - provider: kimi-coding
+    model: kimi-k2.5
+    timeout: 120
+    reason: Kimi coding fallback (front of chain)
+  - provider: anthropic
+    model: claude-sonnet-4-20250514
+    timeout: 120
+    reason: Direct Anthropic fallback
+  - provider: openrouter
+    model: anthropic/claude-sonnet-4-20250514
+    base_url: https://openrouter.ai/api/v1
+    api_key_env: OPENROUTER_API_KEY
+    timeout: 120
+    reason: OpenRouter fallback
+providers:
+  kimi-coding:
+    base_url: https://api.kimi.com/coding/v1
+    timeout: 60
+    max_retries: 3
+  anthropic:
+    timeout: 120
+  openrouter:
+    base_url: https://openrouter.ai/api/v1
+    timeout: 120
 agent:
  max_turns: 30
  reasoning_effort: medium
Author	SHA1	Message	Date
Bezalel	c8b0f2a8fb	feat(config): default local model to gemma4:12b via Ollama - config.yaml: provider ollama, default gemma4:12b - dynamic_dispatch_optimizer.py: fallback route references gemma4:12b	2026-04-07 15:56:17 +00:00
Bezalel	0470e23efb	feat(infra): fleet milestone tracker with 22 phase messages (#557 , FLEET-004)	2026-04-07 15:46:09 +00:00
Bezalel	39540a2a8c	feat(infra): auto-restart agent, backup pipeline, Telegram thread reporter (#560 , #561 , #895 ) - scripts/auto_restart_agent.sh — monitor and restart dead processes (3-attempt backoff) - scripts/backup_pipeline.sh — daily backups with retention + offsite rsync hook - scripts/telegram_thread_reporter.py — route messages to ops/burn/main threads - infrastructure/cron/*.crontab — scheduling for new automations	2026-04-07 15:43:21 +00:00
Bezalel	839f52af12	fix(allegro): switch to kimi-k2.5 and add full fallback chain - Replace broken kimi-for-coding model with kimi-k2.5 - Add fallback_providers with kimi-coding -> anthropic -> openrouter - Add explicit provider config for kimi-coding base_url and timeouts Refs: #lazzyPit	2026-04-07 15:39:58 +00:00
Bezalel	4e3f60344b	feat(infra): add fleet health probe + crontab (#559 , FLEET-006) - scripts/fleet_health_probe.sh: SSH, disk, memory, process checks - infrastructure/cron/fleet-health.crontab: 5-minute cron schedule - Thresholds: disk<90%, mem<90%, critical processes monitored	2026-04-07 15:22:10 +00:00