#!/usr/bin/env python3 """ [OPS] llama.cpp Fleet Manager Part of the Gemini Sovereign Infrastructure Suite. Manages llama-server instances across the Timmy Foundation fleet. Supports status, restart, and model swapping via SSH. """ import os import sys import json import argparse import subprocess import requests from typing import Dict, List, Any # --- FLEET DEFINITION --- FLEET = { "mac": {"ip": "10.1.10.77", "port": 8080, "role": "hub"}, "ezra": {"ip": "143.198.27.163", "port": 8080, "role": "forge"}, "allegro": {"ip": "167.99.126.228", "port": 8080, "role": "agent-host"}, "bezalel": {"ip": "159.203.146.185", "port": 8080, "role": "world-host"} } class FleetManager: def __init__(self): self.results = {} def run_remote(self, host: str, command: str): ip = FLEET[host]["ip"] ssh_cmd = [ "ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5", f"root@{ip}", command ] # For Mac, we might need a different user or local execution if host == "mac": ssh_cmd = ["bash", "-c", command] try: result = subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=10) return result except subprocess.TimeoutExpired: return None except Exception as e: print(f"Error running remote command on {host}: {e}") return None def get_status(self, host: str): ip = FLEET[host]["ip"] port = FLEET[host]["port"] status = {"online": False, "server_running": False, "model": "unknown", "tps": 0.0} # 1. Check if machine is reachable ping_res = subprocess.run(["ping", "-c", "1", "-W", "1", ip], capture_output=True) if ping_res.returncode == 0: status["online"] = True # 2. Check if llama-server is responding to health check try: url = f"http://{ip}:{port}/health" response = requests.get(url, timeout=2) if response.status_code == 200: status["server_running"] = True data = response.json() # llama.cpp health endpoint usually returns slots info # We'll try to get model info if available status["model"] = data.get("model", "unknown") except: pass return status def show_fleet_status(self): print(f"{'NAME':<10} {'IP':<15} {'STATUS':<10} {'SERVER':<10} {'MODEL':<20}") print("-" * 70) for name in FLEET: status = self.get_status(name) online_str = "✅" if status["online"] else "❌" server_str = "🚀" if status["server_running"] else "💤" print(f"{name:<10} {FLEET[name]['ip']:<15} {online_str:<10} {server_str:<10} {status['model']:<20}") def restart_server(self, host: str): print(f"[*] Restarting llama-server on {host}...") res = self.run_remote(host, "systemctl restart llama-server") if res and res.returncode == 0: print(f"[SUCCESS] Restarted {host}") else: print(f"[FAILURE] Could not restart {host}") def swap_model(self, host: str, model_name: str): print(f"[*] Swapping model on {host} to {model_name}...") # This assumes the provision_wizard.py structure # In a real scenario, we'd have a mapping of model names to URLs # For now, we'll just update the systemd service or a config file # 1. Stop server self.run_remote(host, "systemctl stop llama-server") # 2. Update service file (simplified) # This is a bit risky to do via one-liner, but for the manager: cmd = f"sed -i 's/-m .*\\.gguf/-m \\/opt\\/models\\/{model_name}.gguf/' /etc/systemd/system/llama-server.service" self.run_remote(host, cmd) # 3. Start server self.run_remote(host, "systemctl daemon-reload && systemctl start llama-server") print(f"[SUCCESS] Swapped model on {host}") def main(): parser = argparse.ArgumentParser(description="Gemini Fleet Manager") subparsers = parser.add_subparsers(dest="command") subparsers.add_parser("status", help="Show fleet status") restart_parser = subparsers.add_parser("restart", help="Restart a server") restart_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to restart") swap_parser = subparsers.add_parser("swap", help="Swap model on a host") swap_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to swap") swap_parser.add_argument("model", help="Model name (GGUF)") args = parser.parse_args() manager = FleetManager() if args.command == "status": manager.show_fleet_status() elif args.command == "restart": manager.restart_server(args.host) elif args.command == "swap": manager.swap_model(args.host, args.model) else: parser.print_help() if __name__ == "__main__": main()