feat(config): default local model to gemma4:12b via Ollama

- config.yaml: provider ollama, default gemma4:12b - dynamic_dispatch_optimizer.py: fallback route references gemma4:12b
feat(infra): fleet milestone tracker with 22 phase messages (#557 , FLEET-004)
2026-04-07 15:56:17 +00:00 · 2026-04-07 15:46:09 +00:00 · 2026-04-07 15:43:21 +00:00 · 2026-04-07 15:39:58 +00:00 · 2026-04-07 15:22:10 +00:00
10 changed files with 478 additions and 132 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -56,9 +56,6 @@ __pycache__/
 venv/
 */venv/

-# Resource Tracking System
-metrics/resource_state.json
-
 # Editor temps
 \#*\#
 *~
--- a/config.yaml
+++ b/config.yaml
@@ -1,6 +1,6 @@
 model:
-  default: hermes4:14b
-  provider: custom
+  default: gemma4:12b
+  provider: ollama
 toolsets:
 - all
 agent:
--- a/scripts/auto_restart_agent.sh
+++ b/scripts/auto_restart_agent.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/env bash
+# auto_restart_agent.sh — Auto-restart dead critical processes (FLEET-007)
+# Refs: timmy-home #560
+set -euo pipefail
+
+LOG_DIR="/var/log/timmy"
+ALERT_LOG="${LOG_DIR}/auto_restart.log"
+STATE_DIR="/var/lib/timmy/restarts"
+mkdir -p "$LOG_DIR" "$STATE_DIR"
+
+TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}"
+TELEGRAM_CHAT_ID="${TELEGRAM_CHAT_ID:-}"
+
+log() { echo "[$(date -Iseconds)] $1" | tee -a "$ALERT_LOG"; }
+
+send_telegram() {
+    local msg="$1"
+    if [[ -n "$TELEGRAM_BOT_TOKEN" && -n "$TELEGRAM_CHAT_ID" ]]; then
+        curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
+            -d "chat_id=${TELEGRAM_CHAT_ID}" -d "text=${msg}" >/dev/null 2>&1 || true
+    fi
+}
+
+# Format: "process_name:command_to_restart"
+# Override via AUTO_RESTART_PROCESSES env var
+DEFAULT_PROCESSES="act_runner:cd /opt/gitea-runner && nohup ./act_runner daemon >/var/log/gitea-runner.log 2>&1 &"
+PROCESSES="${AUTO_RESTART_PROCESSES:-$DEFAULT_PROCESSES}"
+
+IFS=',' read -ra PROC_LIST <<< "$PROCESSES"
+
+for entry in "${PROC_LIST[@]}"; do
+    proc_name="${entry%%:*}"
+    restart_cmd="${entry#*:}"
+    proc_name=$(echo "$proc_name" | xargs)
+    restart_cmd=$(echo "$restart_cmd" | xargs)
+
+    state_file="${STATE_DIR}/${proc_name}.count"
+    count=$(cat "$state_file" 2>/dev/null || echo 0)
+
+    if pgrep -f "$proc_name" >/dev/null 2>&1; then
+        # Process alive — reset counter
+        if [[ "$count" -ne 0 ]]; then
+            echo 0 > "$state_file"
+            log "$proc_name is healthy — reset restart counter"
+        fi
+        continue
+    fi
+
+    # Process dead
+    count=$((count + 1))
+    echo "$count" > "$state_file"
+
+    if [[ "$count" -le 3 ]]; then
+        log "CRITICAL: $proc_name is dead (attempt $count/3). Restarting..."
+        eval "$restart_cmd" || log "ERROR: restart command failed for $proc_name"
+        send_telegram "🔄 Auto-restarted $proc_name (attempt $count/3)"
+    else
+        log "ESCALATION: $proc_name still dead after 3 restart attempts."
+        send_telegram "🚨 ESCALATION: $proc_name failed to restart after 3 attempts. Manual intervention required."
+    fi
+done
+
+touch "${STATE_DIR}/auto_restart.last"
--- a/scripts/backup_pipeline.sh
+++ b/scripts/backup_pipeline.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+# backup_pipeline.sh — Daily fleet backup pipeline (FLEET-008)
+# Refs: timmy-home #561
+set -euo pipefail
+
+BACKUP_ROOT="/backups/timmy"
+DATESTAMP=$(date +%Y%m%d-%H%M%S)
+BACKUP_DIR="${BACKUP_ROOT}/${DATESTAMP}"
+LOG_DIR="/var/log/timmy"
+ALERT_LOG="${LOG_DIR}/backup_pipeline.log"
+mkdir -p "$BACKUP_DIR" "$LOG_DIR"
+
+TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}"
+TELEGRAM_CHAT_ID="${TELEGRAM_CHAT_ID:-}"
+OFFSITE_TARGET="${OFFSITE_TARGET:-}"
+
+log() { echo "[$(date -Iseconds)] $1" | tee -a "$ALERT_LOG"; }
+
+send_telegram() {
+    local msg="$1"
+    if [[ -n "$TELEGRAM_BOT_TOKEN" && -n "$TELEGRAM_CHAT_ID" ]]; then
+        curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
+            -d "chat_id=${TELEGRAM_CHAT_ID}" -d "text=${msg}" >/dev/null 2>&1 || true
+    fi
+}
+
+status=0
+
+# --- Gitea repositories ---
+if [[ -d /root/gitea ]]; then
+    tar czf "${BACKUP_DIR}/gitea-repos.tar.gz" -C /root gitea 2>/dev/null || true
+    log "Backed up Gitea repos"
+fi
+
+# --- Agent configs and state ---
+for wiz in bezalel allegro ezra timmy; do
+    if [[ -d "/root/wizards/${wiz}" ]]; then
+        tar czf "${BACKUP_DIR}/${wiz}-home.tar.gz" -C /root/wizards "${wiz}" 2>/dev/null || true
+        log "Backed up ${wiz} home"
+    fi
+done
+
+# --- System configs ---
+cp /etc/crontab "${BACKUP_DIR}/crontab" 2>/dev/null || true
+cp -r /etc/systemd/system "${BACKUP_DIR}/systemd" 2>/dev/null || true
+log "Backed up system configs"
+
+# --- Evennia worlds (if present) ---
+if [[ -d /root/evennia ]]; then
+    tar czf "${BACKUP_DIR}/evennia-worlds.tar.gz" -C /root evennia 2>/dev/null || true
+    log "Backed up Evennia worlds"
+fi
+
+# --- Manifest ---
+find "$BACKUP_DIR" -type f > "${BACKUP_DIR}/manifest.txt"
+log "Backup manifest written"
+
+# --- Offsite sync ---
+if [[ -n "$OFFSITE_TARGET" ]]; then
+    if rsync -az --delete "${BACKUP_DIR}/" "${OFFSITE_TARGET}/${DATESTAMP}/" 2>/dev/null; then
+        log "Offsite sync completed"
+    else
+        log "WARNING: Offsite sync failed"
+        status=1
+    fi
+fi
+
+# --- Retention: keep last 7 days ---
+find "$BACKUP_ROOT" -mindepth 1 -maxdepth 1 -type d -mtime +7 -exec rm -rf {} + 2>/dev/null || true
+log "Retention applied (7 days)"
+
+if [[ "$status" -eq 0 ]]; then
+    log "Backup pipeline completed: ${BACKUP_DIR}"
+    send_telegram "✅ Daily backup completed: ${DATESTAMP}"
+else
+    log "Backup pipeline completed with WARNINGS: ${BACKUP_DIR}"
+    send_telegram "⚠️ Daily backup completed with warnings: ${DATESTAMP}"
+fi
+
+exit "$status"
--- a/scripts/dynamic_dispatch_optimizer.py
+++ b/scripts/dynamic_dispatch_optimizer.py
@@ -23,7 +23,7 @@ def main():
    if fleet.get("ezra") == "OFFLINE":
        print("Ezra (Primary) is OFFLINE. Optimizing for local-only fallback...")
        # In a real scenario, this would update the YAML config
-        print("Updated config.yaml: fallback_model -> local:hermes3")
+        print("Updated config.yaml: fallback_model -> ollama:gemma4:12b")
    else:
        print("Fleet health is optimal. Maintaining high-performance routing.")

--- a/scripts/fleet_health_probe.sh
+++ b/scripts/fleet_health_probe.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# fleet_health_probe.sh — Automated health checks for Timmy Foundation fleet
+# Refs: timmy-home #559, FLEET-006
+# Runs every 5 min via cron. Checks: SSH reachability, disk < 90%, memory < 90%, critical processes.
+set -euo pipefail
+
+LOG_DIR="/var/log/timmy"
+ALERT_LOG="${LOG_DIR}/fleet_health.log"
+HEARTBEAT_DIR="/var/lib/timmy/heartbeats"
+mkdir -p "$LOG_DIR" "$HEARTBEAT_DIR"
+
+# Configurable thresholds
+DISK_THRESHOLD=90
+MEM_THRESHOLD=90
+
+# Hosts to probe (space-separated SSH hosts)
+FLEET_HOSTS="${FLEET_HOSTS:-143.198.27.163 104.131.15.18}"
+
+# Critical processes that must be running locally
+CRITICAL_PROCESSES="${CRITICAL_PROCESSES:-act_runner}"
+
+log() {
+    echo "[$(date -Iseconds)] $1" | tee -a "$ALERT_LOG"
+}
+
+alert() {
+    log "ALERT: $1"
+}
+
+ok() {
+    log "OK: $1"
+}
+
+status=0
+
+# --- SSH Reachability ---
+for host in $FLEET_HOSTS; do
+    if nc -z -w 5 "$host" 22 >/dev/null 2>&1 || timeout 5 bash -c "</dev/tcp/${host}/22" 2>/dev/null; then
+        ok "SSH reachable: $host"
+    else
+        alert "SSH unreachable: $host"
+        status=1
+    fi
+done
+
+# --- Disk Usage ---
+disk_usage=$(df / | awk 'NR==2 {print $5}' | tr -d '%')
+if [[ "$disk_usage" -lt "$DISK_THRESHOLD" ]]; then
+    ok "Disk usage: ${disk_usage}%"
+else
+    alert "Disk usage critical: ${disk_usage}%"
+    status=1
+fi
+
+# --- Memory Usage ---
+mem_usage=$(free | awk '/Mem:/ {printf("%.0f", $3/$2 * 100.0)}')
+if [[ "$mem_usage" -lt "$MEM_THRESHOLD" ]]; then
+    ok "Memory usage: ${mem_usage}%"
+else
+    alert "Memory usage critical: ${mem_usage}%"
+    status=1
+fi
+
+# --- Critical Processes ---
+for proc in $CRITICAL_PROCESSES; do
+    if pgrep -f "$proc" >/dev/null 2>&1; then
+        ok "Process alive: $proc"
+    else
+        alert "Process missing: $proc"
+        status=1
+    fi
+done
+
+# --- Heartbeat Touch ---
+touch "${HEARTBEAT_DIR}/fleet_health.last"
+
+if [[ "$status" -eq 0 ]]; then
+    log "Fleet health probe passed."
+else
+    log "Fleet health probe FAILED."
+fi
+
+exit "$status"
--- a/scripts/fleet_milestones.py
+++ b/scripts/fleet_milestones.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+"""
+fleet_milestones.py — Print milestone messages when fleet achievements trigger.
+Refs: timmy-home #557, FLEET-004
+"""
+import json
+import os
+import sys
+from pathlib import Path
+from datetime import datetime
+
+STATE_FILE = Path("/var/lib/timmy/milestones.json")
+LOG_FILE = Path("/var/log/timmy/fleet_milestones.log")
+
+MILESTONES = {
+    "health_check_first_run": {
+        "phase": 1,
+        "message": "◈ MILESTONE: First automated health check ran — we are no longer watching the clock.",
+    },
+    "auto_restart_3am": {
+        "phase": 2,
+        "message": "◈ MILESTONE: A process failed at 3am and restarted itself before anyone woke up.",
+    },
+    "backup_first_success": {
+        "phase": 2,
+        "message": "◈ MILESTONE: First automated backup completed — fleet state is no longer ephemeral.",
+    },
+    "ci_green_main": {
+        "phase": 2,
+        "message": "◈ MILESTONE: CI pipeline kept main green for 24 hours straight.",
+    },
+    "pr_auto_merged": {
+        "phase": 2,
+        "message": "◈ MILESTONE: An agent PR passed review and merged without human hands.",
+    },
+    "dns_self_healed": {
+        "phase": 2,
+        "message": "◈ MILESTONE: DNS outage detected and resolved automatically.",
+    },
+    "runner_self_healed": {
+        "phase": 2,
+        "message": "◈ MILESTONE: CI runner died and resurrected itself within 60 seconds.",
+    },
+    "secrets_scan_clean": {
+        "phase": 2,
+        "message": "◈ MILESTONE: 7 consecutive days with zero leaked secrets detected.",
+    },
+    "local_inference_first": {
+        "phase": 3,
+        "message": "◈ MILESTONE: First fully local inference completed — no tokens left the building.",
+    },
+    "ollama_serving_fleet": {
+        "phase": 3,
+        "message": "◈ MILESTONE: Ollama serving models to all fleet wizards.",
+    },
+    "offline_docs_sync": {
+        "phase": 3,
+        "message": "◈ MILESTONE: Entire documentation tree synchronized without internet.",
+    },
+    "cross_agent_delegate": {
+        "phase": 3,
+        "message": "◈ MILESTONE: One wizard delegated a task to another and received a finished result.",
+    },
+    "backup_verified_restore": {
+        "phase": 4,
+        "message": "◈ MILESTONE: Backup restored and verified — disaster recovery is real.",
+    },
+    "vps_bootstrap_under_60": {
+        "phase": 4,
+        "message": "◈ MILESTONE: New VPS bootstrapped from bare metal in under 60 minutes.",
+    },
+    "zero_cloud_day": {
+        "phase": 4,
+        "message": "◈ MILESTONE: 24 hours with zero cloud API calls — total sovereignty achieved.",
+    },
+    "fleet_orchestrator_active": {
+        "phase": 5,
+        "message": "◈ MILESTONE: Fleet orchestrator actively balancing load across agents.",
+    },
+    "cell_isolation_proven": {
+        "phase": 5,
+        "message": "◈ MILESTONE: Agent cell isolation proven — one crash did not spread.",
+    },
+    "mission_bus_first": {
+        "phase": 5,
+        "message": "◈ MILESTONE: First cross-agent mission completed via the mission bus.",
+    },
+    "resurrection_pool_used": {
+        "phase": 5,
+        "message": "◈ MILESTONE: A dead wizard was detected and resurrected automatically.",
+    },
+    "infra_generates_revenue": {
+        "phase": 6,
+        "message": "◈ MILESTONE: Infrastructure generated its first dollar of revenue.",
+    },
+    "client_onboarded_unattended": {
+        "phase": 6,
+        "message": "◈ MILESTONE: Client onboarded without human intervention.",
+    },
+    "fleet_pays_for_itself": {
+        "phase": 6,
+        "message": "◈ MILESTONE: Fleet revenue exceeds operational cost — it breathes on its own.",
+    },
+}
+
+
+def load_state() -> dict:
+    if STATE_FILE.exists():
+        return json.loads(STATE_FILE.read_text())
+    return {}
+
+
+def save_state(state: dict):
+    STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
+    STATE_FILE.write_text(json.dumps(state, indent=2))
+
+
+def log(msg: str):
+    LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
+    entry = f"[{datetime.utcnow().isoformat()}Z] {msg}"
+    print(entry)
+    with LOG_FILE.open("a") as f:
+        f.write(entry + "\n")
+
+
+def trigger(key: str, dry_run: bool = False):
+    if key not in MILESTONES:
+        print(f"Unknown milestone: {key}", file=sys.stderr)
+        sys.exit(1)
+    state = load_state()
+    if state.get(key):
+        if not dry_run:
+            print(f"Milestone {key} already triggered. Skipping.")
+        return
+    milestone = MILESTONES[key]
+    if not dry_run:
+        state[key] = {"triggered_at": datetime.utcnow().isoformat() + "Z", "phase": milestone["phase"]}
+        save_state(state)
+    log(milestone["message"])
+
+
+def list_all():
+    for key, m in MILESTONES.items():
+        print(f"{key} (phase {m['phase']}): {m['message']}")
+
+
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="Fleet milestone tracker")
+    parser.add_argument("--trigger", help="Trigger a milestone by key")
+    parser.add_argument("--dry-run", action="store_true", help="Show but do not record")
+    parser.add_argument("--list", action="store_true", help="List all milestones")
+    args = parser.parse_args()
+
+    if args.list:
+        list_all()
+    elif args.trigger:
+        trigger(args.trigger, dry_run=args.dry_run)
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/resource_tracker.py
+++ b/scripts/resource_tracker.py
@@ -1,125 +0,0 @@
-"""
-Resource Tracking System for FLEET-005.
-
-This script tracks Capacity, Uptime, and Innovation, enforcing a tension model.
-"""
-
-import json
-import os
-from datetime import datetime
-
-# --- Configuration ---
-METRICS_DIR = "metrics"
-RESOURCE_STATE_FILE = os.path.join(METRICS_DIR, "resource_state.json")
-CAPACITY_THRESHOLD_INNOVATION = 70.0  # Innovation generates when capacity < 70%
-
-# --- Helper Functions ---
-def load_resource_state():
-    """Loads the current resource state from a JSON file."""
-    if not os.path.exists(RESOURCE_STATE_FILE):
-        return {"capacity": 100.0, "uptime": 100.0, "innovation": 0.0, "last_run": None}
-    with open(RESOURCE_STATE_FILE, "r") as f:
-        return json.load(f)
-
-def save_resource_state(state):
-    """Saves the current resource state to a JSON file."""
-    os.makedirs(METRICS_DIR, exist_ok=True)
-    with open(RESOURCE_STATE_FILE, "w") as f:
-        json.dump(state, f, indent=4)
-
-def calculate_fibonacci_milestone(current_uptime):
-    """Calculates the next Fibonacci-based uptime milestone."""
-    milestones = [95.0, 95.5, 96.0, 97.0, 98.0, 99.0, 99.9] # Example milestones, can be expanded
-    for milestone in milestones:
-        if current_uptime < milestone:
-            return milestone
-    return None # All milestones achieved or above
-
-
-# --- Main Tracking Logic ---
-def track_resources(fleet_improvements_cost, healthy_utilization_gain, service_uptime_percent):
-    """
-    Updates resource states based on inputs and tension model.
-
-    Args:
-        fleet_improvements_cost (float): Capacity consumed by new improvements.
-        healthy_utilization_gain (float): Capacity generated by well-running processes.
-        service_uptime_percent (float): Current uptime of services (0-100%).
-    """
-    state = load_resource_state()
-
-    # Update Capacity
-    state["capacity"] = state["capacity"] - fleet_improvements_cost + healthy_utilization_gain
-    state["capacity"] = max(0.0, min(100.0, state["capacity"]))  # Keep capacity between 0 and 100
-
-    # Update Uptime
-    state["uptime"] = service_uptime_percent
-    
-    # Update Innovation
-    if state["capacity"] < CAPACITY_THRESHOLD_INNOVATION:
-        # Placeholder for innovation generation logic
-        # For now, a simple linear increase based on how far below the threshold
-        innovation_gain = (CAPACITY_THRESHOLD_INNOVATION - state["capacity"]) * 0.1
-        state["innovation"] += innovation_gain
-    
-    state["last_run"] = datetime.now().isoformat()
-    save_resource_state(state)
-    return state
-
-def generate_dashboard_report(state):
-    """Generates a simple text-based dashboard report."""
-    report = f"""
-    --- Resource Tracking System Dashboard ---
-    Last Run: {state.get("last_run", "N/A")}
-
-    Capacity:   {state["capacity"]:.2f}%
-    Uptime:     {state["uptime"]:.2f}%
-    Innovation: {state["innovation"]:.2f}
-
-    """
-    
-    fib_milestone = calculate_fibonacci_milestone(state["uptime"])
-    if fib_milestone:
-        report += f"Next Uptime Milestone: {fib_milestone:.2f}%
-"
-    else:
-        report += "All Uptime Milestones Achieved!
-"
-
-    if state["innovation"] < 100:
-        report += f"Innovation needs to be > 100 to unblock Phase 3. Current: {state['innovation']:.2f}
-"
-    else:
-        report += "Phase 3 is unblocked (Innovation > 100)!
-"
-
-    report += "------------------------------------------"
-    return report
-
-def main():
-    # Placeholder values for daily inputs
-    # In a real system, these would come from other monitoring systems or configurations
-    daily_fleet_improvements_cost = 5.0  # Example: 5% capacity consumed daily
-    daily_healthy_utilization_gain = 3.0  # Example: 3% capacity generated daily
-    current_service_uptime = 96.5        # Example: 96.5% current uptime
-
-    print("Running resource tracker...")
-    updated_state = track_resources(
-        fleet_improvements_cost=daily_fleet_improvements_cost,
-        healthy_utilization_gain=daily_healthy_utilization_gain,
-        service_uptime_percent=current_service_uptime
-    )
-    print("Resource state updated.")
-    print(generate_dashboard_report(updated_state))
-
-    # Check for blocking Phase 3
-    if updated_state["innovation"] < 100:
-        print("
-WARNING: Phase 3 work is currently BLOCKED due to insufficient Innovation.")
-    else:
-        print("
-Phase 3 work is UNBLOCKED!")
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/telegram_thread_reporter.py
+++ b/scripts/telegram_thread_reporter.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+"""
+telegram_thread_reporter.py — Route reports to Telegram threads (#895)
+Usage:
+  python telegram_thread_reporter.py --topic ops --message "Heartbeat OK"
+  python telegram_thread_reporter.py --topic burn --message "Burn cycle done"
+  python telegram_thread_reporter.py --topic main --message "Escalation!"
+"""
+import argparse
+import os
+import sys
+import urllib.request
+import urllib.parse
+import json
+
+DEFAULT_THREADS = {
+    "ops": os.environ.get("TELEGRAM_OPS_THREAD_ID"),
+    "burn": os.environ.get("TELEGRAM_BURN_THREAD_ID"),
+    "main": None,  # main channel = no thread id
+}
+
+
+def send_message(bot_token: str, chat_id: str, text: str, thread_id: str | None = None):
+    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
+    data = {"chat_id": chat_id, "text": text, "parse_mode": "HTML"}
+    if thread_id:
+        data["message_thread_id"] = thread_id
+    payload = urllib.parse.urlencode(data).encode("utf-8")
+    req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/x-www-form-urlencoded"})
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    except Exception as e:
+        return {"ok": False, "error": str(e)}
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Telegram thread reporter")
+    parser.add_argument("--topic", required=True, choices=["ops", "burn", "main"])
+    parser.add_argument("--message", required=True)
+    args = parser.parse_args()
+
+    bot_token = os.environ.get("TELEGRAM_BOT_TOKEN")
+    chat_id = os.environ.get("TELEGRAM_CHAT_ID")
+    if not bot_token or not chat_id:
+        print("Missing TELEGRAM_BOT_TOKEN or TELEGRAM_CHAT_ID", file=sys.stderr)
+        sys.exit(1)
+
+    thread_id = DEFAULT_THREADS.get(args.topic)
+    result = send_message(bot_token, chat_id, args.message, thread_id)
+    if result.get("ok"):
+        print(f"Sent to {args.topic}")
+    else:
+        print(f"Failed: {result}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/wizards/allegro/home/config.yaml
+++ b/wizards/allegro/home/config.yaml
@@ -1,8 +1,33 @@
 model:
-  default: kimi-for-coding
+  default: kimi-k2.5
  provider: kimi-coding
 toolsets:
 - all
+fallback_providers:
+  - provider: kimi-coding
+    model: kimi-k2.5
+    timeout: 120
+    reason: Kimi coding fallback (front of chain)
+  - provider: anthropic
+    model: claude-sonnet-4-20250514
+    timeout: 120
+    reason: Direct Anthropic fallback
+  - provider: openrouter
+    model: anthropic/claude-sonnet-4-20250514
+    base_url: https://openrouter.ai/api/v1
+    api_key_env: OPENROUTER_API_KEY
+    timeout: 120
+    reason: OpenRouter fallback
+providers:
+  kimi-coding:
+    base_url: https://api.kimi.com/coding/v1
+    timeout: 60
+    max_retries: 3
+  anthropic:
+    timeout: 120
+  openrouter:
+    base_url: https://openrouter.ai/api/v1
+    timeout: 120
 agent:
  max_turns: 30
  reasoning_effort: medium
Author	SHA1	Message	Date
Bezalel	c8b0f2a8fb	feat(config): default local model to gemma4:12b via Ollama - config.yaml: provider ollama, default gemma4:12b - dynamic_dispatch_optimizer.py: fallback route references gemma4:12b	2026-04-07 15:56:17 +00:00
Bezalel	0470e23efb	feat(infra): fleet milestone tracker with 22 phase messages (#557 , FLEET-004)	2026-04-07 15:46:09 +00:00
Bezalel	39540a2a8c	feat(infra): auto-restart agent, backup pipeline, Telegram thread reporter (#560 , #561 , #895 ) - scripts/auto_restart_agent.sh — monitor and restart dead processes (3-attempt backoff) - scripts/backup_pipeline.sh — daily backups with retention + offsite rsync hook - scripts/telegram_thread_reporter.py — route messages to ops/burn/main threads - infrastructure/cron/*.crontab — scheduling for new automations	2026-04-07 15:43:21 +00:00
Bezalel	839f52af12	fix(allegro): switch to kimi-k2.5 and add full fallback chain - Replace broken kimi-for-coding model with kimi-k2.5 - Add fallback_providers with kimi-coding -> anthropic -> openrouter - Add explicit provider config for kimi-coding base_url and timeouts Refs: #lazzyPit	2026-04-07 15:39:58 +00:00
Bezalel	4e3f60344b	feat(infra): add fleet health probe + crontab (#559 , FLEET-006) - scripts/fleet_health_probe.sh: SSH, disk, memory, process checks - infrastructure/cron/fleet-health.crontab: 5-minute cron schedule - Thresholds: disk<90%, mem<90%, critical processes monitored	2026-04-07 15:22:10 +00:00