feat: add pre-flight provider check script (#508 )

- Checks OpenRouter balance via /api/v1/auth/key - Tests Nous and Anthropic API keys - Verifies Ollama is running - Pre-flight check before session launch - Returns exit code for automation Closes #508
feat: add autonomous cron supervisor job (#513 )
2026-04-15 03:55:04 +00:00 · 2026-04-15 03:33:43 +00:00
3 changed files with 302 additions and 481 deletions
--- a/bin/preflight-provider-check.py
+++ b/bin/preflight-provider-check.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+"""
+Pre-Flight Provider Check Script
+Issue #508: [Robustness] Credential drain detection — provider health checks
+
+Pre-flight check before session launch: verifies provider credentials and balance.
+
+Usage:
+  python3 preflight-provider-check.py              # Check all providers
+  python3 preflight-provider-check.py --launch      # Check and return exit code
+  python3 preflight-provider-check.py --balance     # Check OpenRouter balance
+"""
+
+import os, sys, json, yaml, urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+
+# Configuration
+HERMES_HOME = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+LOG_DIR = Path.home() / ".local" / "timmy" / "fleet-health"
+LOG_FILE = LOG_DIR / "preflight-check.log"
+
+def log(msg):
+    """Log message to file and optionally console."""
+    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+    log_entry = "[" + timestamp + "] " + msg
+    
+    LOG_DIR.mkdir(parents=True, exist_ok=True)
+    with open(LOG_FILE, "a") as f:
+        f.write(log_entry + "\n")
+    
+    if "--quiet" not in sys.argv:
+        print(log_entry)
+
+def get_provider_api_key(provider):
+    """Get API key for a provider from .env or environment."""
+    env_file = HERMES_HOME / ".env"
+    if env_file.exists():
+        with open(env_file) as f:
+            for line in f:
+                line = line.strip()
+                if line.startswith(provider.upper() + "_API_KEY="):
+                    return line.split("=", 1)[1].strip().strip("'\"")
+    
+    return os.environ.get(provider.upper() + "_API_KEY")
+
+def check_openrouter_balance(api_key):
+    """Check OpenRouter balance via /api/v1/auth/key."""
+    if not api_key:
+        return False, "No API key", 0
+    
+    try:
+        req = urllib.request.Request(
+            "https://openrouter.ai/api/v1/auth/key",
+            headers={"Authorization": "Bearer " + api_key}
+        )
+        resp = urllib.request.urlopen(req, timeout=10)
+        data = json.loads(resp.read())
+        
+        # Check for credits
+        credits = data.get("data", {}).get("limit", 0)
+        usage = data.get("data", {}).get("usage", 0)
+        remaining = credits - usage if credits else None
+        
+        if remaining is not None and remaining <= 0:
+            return False, "No credits remaining", 0
+        elif remaining is not None:
+            return True, "Credits available", remaining
+        else:
+            return True, "Unlimited or unknown balance", None
+    
+    except urllib.error.HTTPError as e:
+        if e.code == 401:
+            return False, "Invalid API key", 0
+        else:
+            return False, "HTTP " + str(e.code), 0
+    except Exception as e:
+        return False, str(e)[:100], 0
+
+def check_nous_key(api_key):
+    """Check Nous API key with minimal test call."""
+    if not api_key:
+        return False, "No API key"
+    
+    try:
+        req = urllib.request.Request(
+            "https://inference.nousresearch.com/v1/models",
+            headers={"Authorization": "Bearer " + api_key}
+        )
+        resp = urllib.request.urlopen(req, timeout=10)
+        
+        if resp.status == 200:
+            return True, "Valid key"
+        else:
+            return False, "HTTP " + str(resp.status)
+    
+    except urllib.error.HTTPError as e:
+        if e.code == 401:
+            return False, "Invalid API key"
+        elif e.code == 403:
+            return False, "Forbidden"
+        else:
+            return False, "HTTP " + str(e.code)
+    except Exception as e:
+        return False, str(e)[:100]
+
+def check_anthropic_key(api_key):
+    """Check Anthropic API key with minimal test call."""
+    if not api_key:
+        return False, "No API key"
+    
+    try:
+        req = urllib.request.Request(
+            "https://api.anthropic.com/v1/models",
+            headers={
+                "x-api-key": api_key,
+                "anthropic-version": "2023-06-01"
+            }
+        )
+        resp = urllib.request.urlopen(req, timeout=10)
+        
+        if resp.status == 200:
+            return True, "Valid key"
+        else:
+            return False, "HTTP " + str(resp.status)
+    
+    except urllib.error.HTTPError as e:
+        if e.code == 401:
+            return False, "Invalid API key"
+        elif e.code == 403:
+            return False, "Forbidden"
+        else:
+            return False, "HTTP " + str(e.code)
+    except Exception as e:
+        return False, str(e)[:100]
+
+def check_ollama():
+    """Check if Ollama is running."""
+    try:
+        req = urllib.request.Request("http://localhost:11434/api/tags")
+        resp = urllib.request.urlopen(req, timeout=5)
+        
+        if resp.status == 200:
+            data = json.loads(resp.read())
+            models = data.get("models", [])
+            return True, str(len(models)) + " models loaded"
+        else:
+            return False, "HTTP " + str(resp.status)
+    
+    except Exception as e:
+        return False, str(e)[:100]
+
+def get_configured_provider():
+    """Get the configured provider from global config."""
+    config_file = HERMES_HOME / "config.yaml"
+    if not config_file.exists():
+        return None
+    
+    try:
+        with open(config_file) as f:
+            config = yaml.safe_load(f)
+        
+        model_config = config.get("model", {})
+        if isinstance(model_config, dict):
+            return model_config.get("provider")
+    except:
+        pass
+    
+    return None
+
+def run_preflight_check():
+    """Run pre-flight check on all providers."""
+    log("=== Pre-Flight Provider Check ===")
+    
+    results = {}
+    
+    # Check OpenRouter
+    or_key = get_provider_api_key("openrouter")
+    or_ok, or_msg, or_balance = check_openrouter_balance(or_key)
+    results["openrouter"] = {"healthy": or_ok, "message": or_msg, "balance": or_balance}
+    
+    # Check Nous
+    nous_key = get_provider_api_key("nous")
+    nous_ok, nous_msg = check_nous_key(nous_key)
+    results["nous"] = {"healthy": nous_ok, "message": nous_msg}
+    
+    # Check Anthropic
+    anthropic_key = get_provider_api_key("anthropic")
+    anthropic_ok, anthropic_msg = check_anthropic_key(anthropic_key)
+    results["anthropic"] = {"healthy": anthropic_ok, "message": anthropic_msg}
+    
+    # Check Ollama
+    ollama_ok, ollama_msg = check_ollama()
+    results["ollama"] = {"healthy": ollama_ok, "message": ollama_msg}
+    
+    # Get configured provider
+    configured = get_configured_provider()
+    
+    # Summary
+    healthy_count = sum(1 for r in results.values() if r["healthy"])
+    total_count = len(results)
+    
+    log("Results: " + str(healthy_count) + "/" + str(total_count) + " providers healthy")
+    
+    for provider, result in results.items():
+        status = "HEALTHY" if result["healthy"] else "UNHEALTHY"
+        extra = ""
+        if provider == "openrouter" and result.get("balance") is not None:
+            extra = " (balance: " + str(result["balance"]) + ")"
+        
+        log("  " + provider + ": " + status + " - " + result["message"] + extra)
+    
+    if configured:
+        log("Configured provider: " + configured)
+        if configured in results and not results[configured]["healthy"]:
+            log("WARNING: Configured provider " + configured + " is UNHEALTHY!")
+    
+    return results, configured
+
+def check_launch_readiness():
+    """Check if we're ready to launch sessions."""
+    results, configured = run_preflight_check()
+    
+    # Check if configured provider is healthy
+    if configured and configured in results:
+        if not results[configured]["healthy"]:
+            log("LAUNCH BLOCKED: Configured provider " + configured + " is unhealthy")
+            return False, configured + " is unhealthy"
+    
+    # Check if at least one provider is healthy
+    healthy_providers = [p for p, r in results.items() if r["healthy"]]
+    if not healthy_providers:
+        log("LAUNCH BLOCKED: No healthy providers available")
+        return False, "No healthy providers"
+    
+    log("LAUNCH READY: " + str(len(healthy_providers)) + " healthy providers available")
+    return True, "Ready"
+
+def show_balance():
+    """Show OpenRouter balance."""
+    api_key = get_provider_api_key("openrouter")
+    if not api_key:
+        print("No OpenRouter API key found")
+        return
+    
+    ok, msg, balance = check_openrouter_balance(api_key)
+    
+    if ok:
+        if balance is not None:
+            print("OpenRouter balance: " + str(balance) + " credits")
+        else:
+            print("OpenRouter: " + msg)
+    else:
+        print("OpenRouter: " + msg)
+
+def main():
+    if "--balance" in sys.argv:
+        show_balance()
+    elif "--launch" in sys.argv:
+        ready, message = check_launch_readiness()
+        if ready:
+            print("READY")
+            sys.exit(0)
+        else:
+            print("BLOCKED: " + message)
+            sys.exit(1)
+    else:
+        run_preflight_check()
+
+if __name__ == "__main__":
+    main()
--- a/cron/jobs.json
+++ b/cron/jobs.json
@@ -196,7 +196,37 @@
      "paused_reason": null,
      "skills": [],
      "skill": null
+    },
+    {
+      "id": "tmux-supervisor-513",
+      "name": "Autonomous Cron Supervisor",
+      "prompt": "Load the tmux-supervisor skill and execute the monitoring protocol.\n\nCheck both `dev` and `timmy` tmux sessions for idle panes. Only send Telegram notifications on actionable events (idle, overflow, failure). Be silent when all agents are working.\n\nSteps:\n1. List all tmux sessions (skip 'Alexander')\n2. For each session, list windows and panes\n3. Capture each pane and classify state (idle vs active)\n4. For idle panes: read context, craft context-aware prompt\n5. Send /queue prompts to idle panes\n6. Verify prompts landed\n7. Only notify via Telegram if:\n   - A pane was prompted (idle detected)\n   - A pane shows context overflow (>80%)\n   - A pane is stuck or crashed\n8. If all panes are active: respond with [SILENT]",
+      "schedule": {
+        "kind": "interval",
+        "minutes": 7,
+        "display": "every 7m"
+      },
+      "schedule_display": "every 7m",
+      "repeat": {
+        "times": null,
+        "completed": 0
+      },
+      "enabled": true,
+      "created_at": "2026-04-15T03:00:00.000000+00:00",
+      "next_run_at": null,
+      "last_run_at": null,
+      "last_status": null,
+      "last_error": null,
+      "deliver": "telegram",
+      "origin": null,
+      "state": "scheduled",
+      "paused_at": null,
+      "paused_reason": null,
+      "skills": [
+        "tmux-supervisor"
+      ],
+      "skill": "tmux-supervisor"
    }
  ],
  "updated_at": "2026-04-13T02:00:00+00:00"
-}
+}
--- a/scripts/config_drift_detector.py
+++ b/scripts/config_drift_detector.py
@@ -1,480 +0,0 @@
-#!/usr/bin/env python3
-"""
-config_drift_detector.py — Detect config drift across fleet nodes.
-
-Collects config from all wizard nodes via SSH, compares against
-canonical timmy-config golden state, and reports differences.
-
-Usage:
-  python3 scripts/config_drift_detector.py                    # Report only
-  python3 scripts/config_drift_detector.py --auto-sync        # Auto-fix drift with golden state
-  python3 scripts/config_drift_detector.py --node allegro      # Check single node
-  python3 scripts/config_drift_detector.py --json              # JSON output for automation
-
-Exit codes:
-  0 — no drift detected
-  1 — drift detected
-  2 — error (SSH failure, missing deps, etc.)
-"""
-
-import argparse
-import json
-import os
-import subprocess
-import sys
-import tempfile
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any
-
-import yaml
-
-# ── Constants ─────────────────────────────────────────────────────────────────
-
-SCRIPT_DIR = Path(__file__).resolve().parent
-REPO_ROOT = SCRIPT_DIR.parent
-ANSIBLE_INVENTORY = REPO_ROOT / "ansible" / "inventory" / "hosts.yml"
-GOLDEN_STATE_PLAYBOOK = REPO_ROOT / "ansible" / "playbooks" / "golden_state.yml"
-
-# Config files to check on each node
-CONFIG_PATHS = [
-    ".hermes/config.yaml",
-    "wizards/{name}/config.yaml",
-]
-
-# Keys that define golden state (from ansible inventory vars)
-GOLDEN_KEYS = [
-    "providers",
-    "provider",
-    "model",
-    "base_url",
-    "api_key_env",
-    "banned_providers",
-    "banned_models_patterns",
-]
-
-
-# ── Data Models ───────────────────────────────────────────────────────────────
-
-@dataclass
-class NodeConfig:
-    name: str
-    host: str
-    configs: dict[str, Any] = field(default_factory=dict)
-    errors: list[str] = field(default_factory=list)
-    reachable: bool = True
-
-
-@dataclass
-class DriftResult:
-    node: str
-    file_path: str
-    diff_type: str  # "missing", "value_mismatch", "key_missing", "extra_key"
-    key: str
-    canonical_value: Any = None
-    node_value: Any = None
-    severity: str = "warning"  # "info", "warning", "critical"
-
-
-# ── Inventory Parsing ─────────────────────────────────────────────────────────
-
-def load_inventory() -> dict:
-    """Load Ansible inventory and extract wizard node definitions."""
-    if not ANSIBLE_INVENTORY.exists():
-        print(f"ERROR: Inventory not found at {ANSIBLE_INVENTORY}", file=sys.stderr)
-        sys.exit(2)
-
-    with open(ANSIBLE_INVENTORY) as f:
-        inventory = yaml.safe_load(f)
-
-    wizards = inventory.get("all", {}).get("children", {}).get("wizards", {}).get("hosts", {})
-    global_vars = inventory.get("all", {}).get("vars", {})
-
-    nodes = {}
-    for name, config in wizards.items():
-        nodes[name] = {
-            "host": config.get("ansible_host", "localhost"),
-            "user": config.get("ansible_user", ""),
-            "wizard_name": config.get("wizard_name", name),
-            "hermes_home": config.get("hermes_home", "~/.hermes"),
-            "wizard_home": config.get("wizard_home", f"~/wizards/{name}"),
-            "machine_type": config.get("machine_type", "unknown"),
-        }
-
-    return nodes, global_vars
-
-
-def load_golden_state(inventory_vars: dict) -> dict:
-    """Extract golden state from inventory vars."""
-    golden = {
-        "providers": inventory_vars.get("golden_state_providers", []),
-        "banned_providers": inventory_vars.get("banned_providers", []),
-        "banned_models_patterns": inventory_vars.get("banned_models_patterns", []),
-    }
-    return golden
-
-
-# ── SSH Collection ────────────────────────────────────────────────────────────
-
-def ssh_collect(node_name: str, node_info: dict, timeout: int = 15) -> NodeConfig:
-    """SSH into a node and collect config files."""
-    host = node_info["host"]
-    user = node_info.get("user", "")
-    hermes_home = node_info.get("hermes_home", "~/.hermes")
-    wizard_home = node_info.get("wizard_home", f"~/wizards/{node_name}")
-
-    result = NodeConfig(name=node_name, host=host)
-
-    # Build SSH target
-    if host in ("localhost", "127.0.0.1"):
-        ssh_target = None  # local
-    else:
-        ssh_target = f"{user}@{host}" if user else host
-
-    # Collect each config path
-    for path_template in CONFIG_PATHS:
-        # Resolve path template
-        remote_path = path_template.replace("{name}", node_name)
-        if not remote_path.startswith("/"):
-            # Resolve relative to home
-            if "wizards/" in remote_path:
-                full_path = f"{wizard_home}/config.yaml"
-            else:
-                full_path = f"{hermes_home}/config.yaml" if ".hermes" in remote_path else f"~/{remote_path}"
-        else:
-            full_path = remote_path
-
-        config_content = _remote_cat(ssh_target, full_path, timeout)
-        if config_content is not None:
-            try:
-                parsed = yaml.safe_load(config_content)
-                if parsed:
-                    result.configs[full_path] = parsed
-            except yaml.YAMLError as e:
-                result.errors.append(f"YAML parse error in {full_path}: {e}")
-        # Don't flag missing files as errors — some paths may not exist on all nodes
-
-    # Also collect banned provider scan
-    banned_check = _remote_grep(
-        ssh_target,
-        hermes_home,
-        r"anthropic|claude-sonnet|claude-opus|claude-haiku",
-        timeout
-    )
-    if banned_check:
-        result.configs["__banned_scan__"] = banned_check
-
-    return result
-
-
-def _remote_cat(ssh_target: str | None, path: str, timeout: int) -> str | None:
-    """Cat a file remotely (or locally)."""
-    if ssh_target is None:
-        cmd = ["cat", path]
-    else:
-        cmd = ["ssh", "-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=no",
-               ssh_target, f"cat {path}"]
-
-    try:
-        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
-        if proc.returncode == 0:
-            return proc.stdout
-    except subprocess.TimeoutExpired:
-        pass
-    except FileNotFoundError:
-        pass
-    return None
-
-
-def _remote_grep(ssh_target: str | None, base_path: str, pattern: str, timeout: int) -> dict:
-    """Grep for banned patterns in config files."""
-    if ssh_target is None:
-        cmd = ["grep", "-rn", "-i", pattern, base_path, "--include=*.yaml", "--include=*.yml"]
-    else:
-        cmd = ["ssh", "-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=no",
-               ssh_target, f"grep -rn -i '{pattern}' {base_path} --include='*.yaml' --include='*.yml' 2>/dev/null || true"]
-
-    try:
-        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
-        if proc.stdout.strip():
-            lines = proc.stdout.strip().split("\n")
-            return {"matches": lines, "count": len(lines)}
-    except subprocess.TimeoutExpired:
-        pass
-    return {}
-
-
-# ── Drift Detection ───────────────────────────────────────────────────────────
-
-def detect_drift(nodes: list[NodeConfig], golden: dict) -> list[DriftResult]:
-    """Compare each node's config against golden state."""
-    results = []
-
-    for node in nodes:
-        if not node.reachable:
-            continue
-
-        # Check for banned providers
-        banned_scan = node.configs.get("__banned_scan__", {})
-        if banned_scan.get("count", 0) > 0:
-            for match in banned_scan.get("matches", []):
-                results.append(DriftResult(
-                    node=node.name,
-                    file_path="(config files)",
-                    diff_type="banned_provider_found",
-                    key="banned_provider_reference",
-                    node_value=match,
-                    severity="critical"
-                ))
-
-        # Check each config file
-        for path, config in node.configs.items():
-            if path == "__banned_scan__":
-                continue
-
-            # Check provider chain
-            if isinstance(config, dict):
-                node_providers = _extract_provider_chain(config)
-                golden_providers = golden.get("providers", [])
-
-                if node_providers and golden_providers:
-                    # Compare provider names in order
-                    node_names = [p.get("name", "") for p in node_providers]
-                    golden_names = [p.get("name", "") for p in golden_providers]
-
-                    if node_names != golden_names:
-                        results.append(DriftResult(
-                            node=node.name,
-                            file_path=path,
-                            diff_type="value_mismatch",
-                            key="provider_chain",
-                            canonical_value=golden_names,
-                            node_value=node_names,
-                            severity="critical"
-                        ))
-
-                    # Check for banned providers in node config
-                    for banned in golden.get("banned_providers", []):
-                        for provider in node_providers:
-                            prov_name = provider.get("name", "").lower()
-                            prov_model = provider.get("model", "").lower()
-                            if banned in prov_name or banned in prov_model:
-                                results.append(DriftResult(
-                                    node=node.name,
-                                    file_path=path,
-                                    diff_type="banned_provider_found",
-                                    key=f"provider.{provider.get('name', 'unknown')}",
-                                    node_value=provider,
-                                    severity="critical"
-                                ))
-
-                # Check for missing critical keys
-                critical_keys = ["display", "providers", "tools", "delegation"]
-                for key in critical_keys:
-                    if key not in config and key in str(config):
-                        results.append(DriftResult(
-                            node=node.name,
-                            file_path=path,
-                            diff_type="key_missing",
-                            key=key,
-                            canonical_value="(present in golden state)",
-                            severity="warning"
-                        ))
-
-    return results
-
-
-def _extract_provider_chain(config: dict) -> list[dict]:
-    """Extract provider list from a config dict (handles multiple formats)."""
-    # Direct providers key
-    if "providers" in config:
-        providers = config["providers"]
-        if isinstance(providers, list):
-            return providers
-
-    # Nested in display or model config
-    for key in ["model", "inference", "llm"]:
-        if key in config and isinstance(config[key], dict):
-            if "providers" in config[key]:
-                return config[key]["providers"]
-
-    # Single provider format
-    if "provider" in config and "model" in config:
-        return [{"name": config["provider"], "model": config["model"]}]
-
-    return []
-
-
-# ── Auto-Sync ─────────────────────────────────────────────────────────────────
-
-def auto_sync(drifts: list[DriftResult], nodes: list[NodeConfig]) -> list[str]:
-    """Auto-sync drifted nodes using golden state playbook."""
-    actions = []
-
-    drifted_nodes = set(d.node for d in drifts if d.severity == "critical")
-    if not drifted_nodes:
-        actions.append("No critical drift to sync.")
-        return actions
-
-    for node_name in drifted_nodes:
-        node_info = next((n for n in nodes if n.name == node_name), None)
-        if not node_info:
-            continue
-
-        actions.append(f"[{node_name}] Running golden state sync...")
-
-        # Run ansible-playbook for this node
-        cmd = [
-            "ansible-playbook",
-            str(GOLDEN_STATE_PLAYBOOK),
-            "-i", str(ANSIBLE_INVENTORY),
-            "-l", node_name,
-            "--tags", "golden",
-        ]
-
-        try:
-            proc = subprocess.run(
-                cmd, capture_output=True, text=True, timeout=120,
-                cwd=str(REPO_ROOT)
-            )
-            if proc.returncode == 0:
-                actions.append(f"[{node_name}] Sync completed successfully.")
-            else:
-                actions.append(f"[{node_name}] Sync FAILED: {proc.stderr[:200]}")
-        except subprocess.TimeoutExpired:
-            actions.append(f"[{node_name}] Sync timed out after 120s.")
-        except FileNotFoundError:
-            actions.append(f"[{node_name}] ansible-playbook not found. Install Ansible or run manually.")
-
-    return actions
-
-
-# ── Reporting ─────────────────────────────────────────────────────────────────
-
-def print_report(drifts: list[DriftResult], nodes: list[NodeConfig], golden: dict):
-    """Print human-readable drift report."""
-    print("=" * 70)
-    print("CONFIG DRIFT DETECTION REPORT")
-    print("=" * 70)
-    print()
-
-    # Summary
-    reachable = sum(1 for n in nodes if n.reachable)
-    print(f"Nodes checked: {len(nodes)} (reachable: {reachable})")
-    print(f"Golden state providers: {' → '.join(p['name'] for p in golden.get('providers', []))}")
-    print(f"Banned providers: {', '.join(golden.get('banned_providers', []))}")
-    print()
-
-    if not drifts:
-        print("[OK] No config drift detected. All nodes match golden state.")
-        return
-
-    # Group by node
-    by_node: dict[str, list[DriftResult]] = {}
-    for d in drifts:
-        by_node.setdefault(d.node, []).append(d)
-
-    for node_name, node_drifts in sorted(by_node.items()):
-        print(f"--- {node_name} ---")
-        for d in node_drifts:
-            severity_icon = {"critical": "[!!]", "warning": "[!]", "info": "[i]"}.get(d.severity, "[?]")
-            print(f"  {severity_icon} {d.diff_type}: {d.key}")
-            if d.canonical_value is not None:
-                print(f"       canonical: {d.canonical_value}")
-            if d.node_value is not None:
-                print(f"       actual:    {d.node_value}")
-        print()
-
-    # Severity summary
-    critical = sum(1 for d in drifts if d.severity == "critical")
-    warning = sum(1 for d in drifts if d.severity == "warning")
-    print(f"Total: {len(drifts)} drift(s) — {critical} critical, {warning} warning")
-
-
-def print_json_report(drifts: list[DriftResult], nodes: list[NodeConfig], golden: dict):
-    """Print JSON report for automation."""
-    report = {
-        "nodes_checked": len(nodes),
-        "reachable": sum(1 for n in nodes if n.reachable),
-        "golden_providers": [p["name"] for p in golden.get("providers", [])],
-        "drift_count": len(drifts),
-        "critical_count": sum(1 for d in drifts if d.severity == "critical"),
-        "drifts": [
-            {
-                "node": d.node,
-                "file": d.file_path,
-                "type": d.diff_type,
-                "key": d.key,
-                "canonical": d.canonical_value,
-                "actual": d.node_value,
-                "severity": d.severity,
-            }
-            for d in drifts
-        ],
-    }
-    print(json.dumps(report, indent=2, default=str))
-
-
-# ── CLI ───────────────────────────────────────────────────────────────────────
-
-def main():
-    parser = argparse.ArgumentParser(description="Detect config drift across fleet nodes")
-    parser.add_argument("--node", help="Check only this node")
-    parser.add_argument("--auto-sync", action="store_true", help="Auto-fix critical drift with golden state")
-    parser.add_argument("--json", action="store_true", help="JSON output")
-    parser.add_argument("--timeout", type=int, default=15, help="SSH timeout per node (seconds)")
-    args = parser.parse_args()
-
-    # Load inventory
-    print("Loading inventory...", file=sys.stderr)
-    node_defs, global_vars = load_inventory()
-    golden = load_golden_state(global_vars)
-
-    # Filter to single node if requested
-    if args.node:
-        if args.node not in node_defs:
-            print(f"ERROR: Node '{args.node}' not in inventory. Available: {', '.join(node_defs.keys())}")
-            sys.exit(2)
-        node_defs = {args.node: node_defs[args.node]}
-
-    # Collect configs from each node
-    print(f"Collecting configs from {len(node_defs)} node(s)...", file=sys.stderr)
-    nodes = []
-    for name, info in node_defs.items():
-        print(f"  {name} ({info['host']})...", file=sys.stderr, end=" ", flush=True)
-        node_config = ssh_collect(name, info, timeout=args.timeout)
-        if node_config.reachable:
-            print(f"OK ({len(node_config.configs)} files)", file=sys.stderr)
-        else:
-            print("UNREACHABLE", file=sys.stderr)
-        nodes.append(node_config)
-
-    # Detect drift
-    print("\nAnalyzing drift...", file=sys.stderr)
-    drifts = detect_drift(nodes, golden)
-
-    # Output
-    if args.json:
-        print_json_report(drifts, nodes, golden)
-    else:
-        print()
-        print_report(drifts, nodes, golden)
-
-    # Auto-sync if requested
-    if args.auto_sync and drifts:
-        print("\n--- AUTO-SYNC ---")
-        actions = auto_sync(drifts, nodes)
-        for a in actions:
-            print(a)
-
-    # Exit code
-    if any(d.severity == "critical" for d in drifts):
-        sys.exit(1)
-    elif drifts:
-        sys.exit(1)
-    else:
-        sys.exit(0)
-
-
-if __name__ == "__main__":
-    main()
Author	SHA1	Message	Date
Alexander Whitestone	b6e3a647b0	feat: add pre-flight provider check script (#508 ) Some checks failed Architecture Lint / Linter Tests (pull_request) Successful in 29s Details PR Checklist / pr-checklist (pull_request) Failing after 7m23s Details Smoke Test / smoke (pull_request) Failing after 20s Details Validate Config / YAML Lint (pull_request) Failing after 14s Details Validate Config / JSON Validate (pull_request) Successful in 15s Details Validate Config / Python Syntax & Import Check (pull_request) Failing after 1m1s Details Validate Config / Shell Script Lint (pull_request) Failing after 46s Details Validate Config / Cron Syntax Check (pull_request) Successful in 9s Details Validate Config / Deploy Script Dry Run (pull_request) Successful in 10s Details Validate Config / Playbook Schema Validation (pull_request) Successful in 28s Details Architecture Lint / Lint Repository (pull_request) Has been cancelled Details Validate Config / Python Test Suite (pull_request) Has been cancelled Details - Checks OpenRouter balance via /api/v1/auth/key - Tests Nous and Anthropic API keys - Verifies Ollama is running - Pre-flight check before session launch - Returns exit code for automation Closes #508	2026-04-15 03:55:04 +00:00
Alexander Whitestone	26e39d8949	feat: add autonomous cron supervisor job (#513 ) - Runs every 7 minutes - Checks dev and timmy sessions - Loads tmux-supervisor skill - Telegram only on actionable events - Silent when all agents busy	2026-04-15 03:33:43 +00:00