diff --git a/scripts/fleet_llama.py b/scripts/fleet_llama.py new file mode 100644 index 00000000..5c73243e --- /dev/null +++ b/scripts/fleet_llama.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +""" +[OPS] llama.cpp Fleet Manager +Part of the Gemini Sovereign Infrastructure Suite. + +Manages llama-server instances across the Timmy Foundation fleet. +Supports status, restart, and model swapping via SSH. +""" + +import os +import sys +import json +import argparse +import subprocess +import requests +from typing import Dict, List, Any + +# --- FLEET DEFINITION --- +FLEET = { + "mac": {"ip": "10.1.10.77", "port": 8080, "role": "hub"}, + "ezra": {"ip": "143.198.27.163", "port": 8080, "role": "forge"}, + "allegro": {"ip": "167.99.126.228", "port": 8080, "role": "agent-host"}, + "bezalel": {"ip": "159.203.146.185", "port": 8080, "role": "world-host"} +} + +class FleetManager: + def __init__(self): + self.results = {} + + def run_remote(self, host: str, command: str): + ip = FLEET[host]["ip"] + ssh_cmd = [ + "ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5", + f"root@{ip}", command + ] + # For Mac, we might need a different user or local execution + if host == "mac": + ssh_cmd = ["bash", "-c", command] + + try: + result = subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=10) + return result + except subprocess.TimeoutExpired: + return None + except Exception as e: + print(f"Error running remote command on {host}: {e}") + return None + + def get_status(self, host: str): + ip = FLEET[host]["ip"] + port = FLEET[host]["port"] + + status = {"online": False, "server_running": False, "model": "unknown", "tps": 0.0} + + # 1. Check if machine is reachable + ping_res = subprocess.run(["ping", "-c", "1", "-W", "1", ip], capture_output=True) + if ping_res.returncode == 0: + status["online"] = True + + # 2. Check if llama-server is responding to health check + try: + url = f"http://{ip}:{port}/health" + response = requests.get(url, timeout=2) + if response.status_code == 200: + status["server_running"] = True + data = response.json() + # llama.cpp health endpoint usually returns slots info + # We'll try to get model info if available + status["model"] = data.get("model", "unknown") + except: + pass + + return status + + def show_fleet_status(self): + print(f"{'NAME':<10} {'IP':<15} {'STATUS':<10} {'SERVER':<10} {'MODEL':<20}") + print("-" * 70) + for name in FLEET: + status = self.get_status(name) + online_str = "✅" if status["online"] else "❌" + server_str = "🚀" if status["server_running"] else "💤" + print(f"{name:<10} {FLEET[name]['ip']:<15} {online_str:<10} {server_str:<10} {status['model']:<20}") + + def restart_server(self, host: str): + print(f"[*] Restarting llama-server on {host}...") + res = self.run_remote(host, "systemctl restart llama-server") + if res and res.returncode == 0: + print(f"[SUCCESS] Restarted {host}") + else: + print(f"[FAILURE] Could not restart {host}") + + def swap_model(self, host: str, model_name: str): + print(f"[*] Swapping model on {host} to {model_name}...") + # This assumes the provision_wizard.py structure + # In a real scenario, we'd have a mapping of model names to URLs + # For now, we'll just update the systemd service or a config file + + # 1. Stop server + self.run_remote(host, "systemctl stop llama-server") + + # 2. Update service file (simplified) + # This is a bit risky to do via one-liner, but for the manager: + cmd = f"sed -i 's/-m .*\\.gguf/-m \\/opt\\/models\\/{model_name}.gguf/' /etc/systemd/system/llama-server.service" + self.run_remote(host, cmd) + + # 3. Start server + self.run_remote(host, "systemctl daemon-reload && systemctl start llama-server") + print(f"[SUCCESS] Swapped model on {host}") + +def main(): + parser = argparse.ArgumentParser(description="Gemini Fleet Manager") + subparsers = parser.add_subparsers(dest="command") + + subparsers.add_parser("status", help="Show fleet status") + + restart_parser = subparsers.add_parser("restart", help="Restart a server") + restart_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to restart") + + swap_parser = subparsers.add_parser("swap", help="Swap model on a host") + swap_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to swap") + swap_parser.add_argument("model", help="Model name (GGUF)") + + args = parser.parse_args() + + manager = FleetManager() + + if args.command == "status": + manager.show_fleet_status() + elif args.command == "restart": + manager.restart_server(args.host) + elif args.command == "swap": + manager.swap_model(args.host, args.model) + else: + parser.print_help() + +if __name__ == "__main__": + main()