#!/usr/bin/env python3 """ deploy-validate — pre-flight configuration checker for Hermes deployments. Catches common configuration errors BEFORE they cause runtime failures. Safe to run at any time: it only reads files and makes lightweight network checks — it never writes state or sends messages. Usage: python scripts/deploy-validate # validate current environment python scripts/deploy-validate --dry-run # alias for the same thing python scripts/deploy-validate --env /path/to/.env Exit codes: 0 All checks passed (or only warnings). 1 One or more blocking errors found. """ from __future__ import annotations import argparse import os import socket import sys import urllib.error import urllib.request from pathlib import Path from typing import Optional # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- RESET = "\033[0m" RED = "\033[91m" YELLOW = "\033[93m" GREEN = "\033[92m" BOLD = "\033[1m" def _color(text: str, code: str) -> str: if sys.stdout.isatty(): return f"{code}{text}{RESET}" return text def ok(msg: str) -> None: print(f" {_color('✔', GREEN)} {msg}") def warn(msg: str) -> None: print(f" {_color('⚠', YELLOW)} {msg}") def error(msg: str) -> None: print(f" {_color('✘', RED)} {msg}") def section(title: str) -> None: print(f"\n{_color(BOLD + title, BOLD)}") # --------------------------------------------------------------------------- # .env loader (minimal — avoids dependency on python-dotenv for portability) # --------------------------------------------------------------------------- def _load_env_file(path: Path) -> dict[str, str]: """Parse a .env file and return a dict of key→value pairs.""" result: dict[str, str] = {} if not path.exists(): return result for line in path.read_text(encoding="utf-8").splitlines(): line = line.strip() if not line or line.startswith("#") or "=" not in line: continue key, _, value = line.partition("=") key = key.strip() # Strip inline comments and surrounding quotes. value = value.split("#")[0].strip().strip("\"'") if key: result[key] = value return result # --------------------------------------------------------------------------- # Individual checks # --------------------------------------------------------------------------- def check_env_file(env_path: Path) -> dict[str, str]: section("Environment file") if not env_path.exists(): error(f".env not found at {env_path}") error("Copy .env.example → .env and fill in your API keys.") return {} ok(f".env found at {env_path}") raw = _load_env_file(env_path) # Warn if any value looks like a placeholder. placeholder_patterns = ("your_", "xxxx", "changeme", "todo", "replace_me") for key, value in raw.items(): if value and any(p in value.lower() for p in placeholder_patterns): warn(f"{key} looks like a placeholder: {value!r}") return raw def check_llm_key(env: dict[str, str]) -> bool: section("LLM provider") providers = { "OPENROUTER_API_KEY": "OpenRouter", "ANTHROPIC_API_KEY": "Anthropic", "OPENAI_API_KEY": "OpenAI", "GLM_API_KEY": "z.ai / GLM", "KIMI_API_KEY": "Kimi / Moonshot", "MINIMAX_API_KEY": "MiniMax", "NOUS_API_KEY": "Nous Portal", "HF_TOKEN": "Hugging Face", "KILOCODE_API_KEY": "KiloCode", "OPENCODE_ZEN_API_KEY": "OpenCode Zen", } found = [name for key, name in providers.items() if env.get(key, "").strip()] if not found: error("No LLM API key detected. Set at least one (e.g. OPENROUTER_API_KEY).") return False ok(f"LLM provider key present: {', '.join(found)}") return True def check_hermes_home(env: dict[str, str]) -> Optional[Path]: section("HERMES_HOME data directory") raw = env.get("HERMES_HOME") or os.environ.get("HERMES_HOME") or "" if raw: home = Path(raw).expanduser() else: home = Path.home() / ".hermes" if not home.exists(): warn(f"HERMES_HOME does not exist yet: {home} (will be created on first run)") return home ok(f"HERMES_HOME exists: {home}") required_dirs = ["logs", "sessions", "cron", "memories", "skills"] for d in required_dirs: if not (home / d).is_dir(): warn(f"Expected subdirectory missing: {home / d} (created automatically at runtime)") if (home / ".env").exists(): ok(f"Data-directory .env present: {home / '.env'}") else: warn(f"No .env in HERMES_HOME ({home}). " "The Docker entrypoint copies .env.example on first run; " "for bare-metal installs copy it manually.") return home def check_gateway_platforms(env: dict[str, str]) -> None: section("Messaging platform tokens") platforms: dict[str, list[str]] = { "Telegram": ["TELEGRAM_BOT_TOKEN"], "Discord": ["DISCORD_BOT_TOKEN"], "Slack": ["SLACK_BOT_TOKEN", "SLACK_APP_TOKEN"], "WhatsApp": [], # pairing-based, no env key required "Email": ["EMAIL_ADDRESS", "EMAIL_PASSWORD"], } any_found = False for platform, keys in platforms.items(): if not keys: continue # WhatsApp — no key check if all(env.get(k, "").strip() for k in keys): ok(f"{platform}: configured ({', '.join(keys)})") any_found = True if not any_found: warn("No messaging platform tokens found. " "The gateway will start but accept no inbound messages. " "Set at least one platform token (e.g. TELEGRAM_BOT_TOKEN).") def check_api_server_reachable(host: str = "127.0.0.1", port: int = 8642) -> None: section("API server health check") url = f"http://{host}:{port}/health" try: with urllib.request.urlopen(url, timeout=5) as resp: body = resp.read().decode() if '"status"' in body and "ok" in body: ok(f"API server healthy: {url}") else: warn(f"Unexpected /health response from {url}: {body[:200]}") except urllib.error.URLError as exc: # Not a failure — the server may not be running in --dry-run mode. warn(f"API server not reachable at {url}: {exc.reason} " "(expected if gateway is not running)") except OSError as exc: warn(f"API server not reachable at {url}: {exc}") def check_gateway_status(hermes_home: Optional[Path]) -> None: section("Gateway runtime status") if hermes_home is None: warn("HERMES_HOME unknown — skipping runtime status check.") return state_file = hermes_home / "gateway_state.json" pid_file = hermes_home / "gateway.pid" if not state_file.exists() and not pid_file.exists(): warn("Gateway does not appear to be running (no PID or state file). " "This is expected before the first start.") return if state_file.exists(): import json try: state = json.loads(state_file.read_text()) gw_state = state.get("gateway_state", "unknown") updated = state.get("updated_at", "?") if gw_state == "running": ok(f"Gateway state: {gw_state} (updated {updated})") platforms = state.get("platforms", {}) for plat, pdata in platforms.items(): pstate = pdata.get("state", "unknown") if pstate in ("connected", "running", "ok"): ok(f" Platform {plat}: {pstate}") else: warn(f" Platform {plat}: {pstate} — {pdata.get('error_message', '')}") elif gw_state in ("stopped", "startup_failed"): error(f"Gateway state: {gw_state} — {state.get('exit_reason', 'no reason recorded')}") else: warn(f"Gateway state: {gw_state}") except Exception as exc: warn(f"Could not parse {state_file}: {exc}") else: warn("State file missing; only PID file found. Gateway may be starting.") def check_docker_available() -> None: section("Docker / compose availability") for cmd in ("docker", "docker compose"): _check_command(cmd.split()[0], cmd) def _check_command(name: str, display: str) -> bool: import shutil if shutil.which(name): ok(f"{display} found") return True warn(f"{display} not found in PATH (only required for Docker deployments)") return False def check_ports_free(ports: list[int] = None) -> None: section("Port availability") if ports is None: ports = [8642] for port in ports: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.settimeout(1) result = s.connect_ex(("127.0.0.1", port)) if result == 0: warn(f"Port {port} is already in use. " "The API server will fail to bind unless you change its port.") else: ok(f"Port {port} is free") def check_no_secrets_in_repo(repo_root: Path) -> None: section("Secret hygiene") dangerous = [".env", "*.pem", "*.key", "id_rsa", "id_ed25519"] gitignore = repo_root / ".gitignore" if gitignore.exists(): content = gitignore.read_text() for pattern in [".env", "*.pem", "*.key"]: if pattern in content or pattern.lstrip("*. ") in content: ok(f".gitignore covers {pattern}") else: warn(f".gitignore does not mention {pattern}. " "Ensure secrets are never committed.") else: warn("No .gitignore found. Secrets could accidentally be committed.") # Check the env file itself isn't tracked. env_file = repo_root / ".env" if env_file.exists(): import subprocess try: out = subprocess.run( ["git", "ls-files", "--error-unmatch", ".env"], cwd=repo_root, capture_output=True, ) if out.returncode == 0: error(".env IS tracked by git! Remove it immediately: git rm --cached .env") else: ok(".env is not tracked by git") except FileNotFoundError: warn("git not found — cannot verify .env tracking status") # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main() -> int: parser = argparse.ArgumentParser( description="Pre-flight configuration validator for Hermes deployments.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__, ) parser.add_argument( "--dry-run", action="store_true", help="Alias for the default mode (no state is written regardless).", ) parser.add_argument( "--env", metavar="PATH", help="Path to .env file (default: .env in repo root).", ) parser.add_argument( "--check-ports", action="store_true", help="Also verify that required ports are free (useful before first start).", ) parser.add_argument( "--skip-health", action="store_true", help="Skip the live /health HTTP check (use when gateway is not running).", ) args = parser.parse_args() print(f"\n{_color(BOLD + 'Hermes Deploy Validator', BOLD)}") print("=" * 50) repo_root = Path(__file__).resolve().parent.parent env_path = Path(args.env) if args.env else repo_root / ".env" errors_before = [0] # mutable sentinel # Monkey-patch error() to count failures. _original_error = globals()["error"] error_count = 0 def counting_error(msg: str) -> None: nonlocal error_count error_count += 1 _original_error(msg) globals()["error"] = counting_error # Run checks. env = check_env_file(env_path) check_no_secrets_in_repo(repo_root) llm_ok = check_llm_key(env) hermes_home = check_hermes_home(env) check_gateway_platforms(env) if args.check_ports: check_ports_free() if not args.skip_health: check_api_server_reachable() check_gateway_status(hermes_home) # Summary. print(f"\n{'=' * 50}") if error_count == 0: print(_color(f"All checks passed (0 errors).", GREEN)) return 0 else: print(_color(f"{error_count} error(s) found. Fix them before deploying.", RED)) return 1 if __name__ == "__main__": sys.exit(main())