feat: implement fleet_llama.py

2026-04-08 11:39:52 +00:00
parent 4179646456
commit ffb85cc10f
1 changed files with 137 additions and 0 deletions
--- a/scripts/fleet_llama.py
+++ b/scripts/fleet_llama.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""
+[OPS] llama.cpp Fleet Manager
+Part of the Gemini Sovereign Infrastructure Suite.
+
+Manages llama-server instances across the Timmy Foundation fleet.
+Supports status, restart, and model swapping via SSH.
+"""
+
+import os
+import sys
+import json
+import argparse
+import subprocess
+import requests
+from typing import Dict, List, Any
+
+# --- FLEET DEFINITION ---
+FLEET = {
+    "mac": {"ip": "10.1.10.77", "port": 8080, "role": "hub"},
+    "ezra": {"ip": "143.198.27.163", "port": 8080, "role": "forge"},
+    "allegro": {"ip": "167.99.126.228", "port": 8080, "role": "agent-host"},
+    "bezalel": {"ip": "159.203.146.185", "port": 8080, "role": "world-host"}
+}
+
+class FleetManager:
+    def __init__(self):
+        self.results = {}
+
+    def run_remote(self, host: str, command: str):
+        ip = FLEET[host]["ip"]
+        ssh_cmd = [
+            "ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
+            f"root@{ip}", command
+        ]
+        # For Mac, we might need a different user or local execution
+        if host == "mac":
+            ssh_cmd = ["bash", "-c", command]
+            
+        try:
+            result = subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=10)
+            return result
+        except subprocess.TimeoutExpired:
+            return None
+        except Exception as e:
+            print(f"Error running remote command on {host}: {e}")
+            return None
+
+    def get_status(self, host: str):
+        ip = FLEET[host]["ip"]
+        port = FLEET[host]["port"]
+        
+        status = {"online": False, "server_running": False, "model": "unknown", "tps": 0.0}
+        
+        # 1. Check if machine is reachable
+        ping_res = subprocess.run(["ping", "-c", "1", "-W", "1", ip], capture_output=True)
+        if ping_res.returncode == 0:
+            status["online"] = True
+            
+            # 2. Check if llama-server is responding to health check
+            try:
+                url = f"http://{ip}:{port}/health"
+                response = requests.get(url, timeout=2)
+                if response.status_code == 200:
+                    status["server_running"] = True
+                    data = response.json()
+                    # llama.cpp health endpoint usually returns slots info
+                    # We'll try to get model info if available
+                    status["model"] = data.get("model", "unknown")
+            except:
+                pass
+                
+        return status
+
+    def show_fleet_status(self):
+        print(f"{'NAME':<10} {'IP':<15} {'STATUS':<10} {'SERVER':<10} {'MODEL':<20}")
+        print("-" * 70)
+        for name in FLEET:
+            status = self.get_status(name)
+            online_str = "✅" if status["online"] else "❌"
+            server_str = "🚀" if status["server_running"] else "💤"
+            print(f"{name:<10} {FLEET[name]['ip']:<15} {online_str:<10} {server_str:<10} {status['model']:<20}")
+
+    def restart_server(self, host: str):
+        print(f"[*] Restarting llama-server on {host}...")
+        res = self.run_remote(host, "systemctl restart llama-server")
+        if res and res.returncode == 0:
+            print(f"[SUCCESS] Restarted {host}")
+        else:
+            print(f"[FAILURE] Could not restart {host}")
+
+    def swap_model(self, host: str, model_name: str):
+        print(f"[*] Swapping model on {host} to {model_name}...")
+        # This assumes the provision_wizard.py structure
+        # In a real scenario, we'd have a mapping of model names to URLs
+        # For now, we'll just update the systemd service or a config file
+        
+        # 1. Stop server
+        self.run_remote(host, "systemctl stop llama-server")
+        
+        # 2. Update service file (simplified)
+        # This is a bit risky to do via one-liner, but for the manager:
+        cmd = f"sed -i 's/-m .*\\.gguf/-m \\/opt\\/models\\/{model_name}.gguf/' /etc/systemd/system/llama-server.service"
+        self.run_remote(host, cmd)
+        
+        # 3. Start server
+        self.run_remote(host, "systemctl daemon-reload && systemctl start llama-server")
+        print(f"[SUCCESS] Swapped model on {host}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Gemini Fleet Manager")
+    subparsers = parser.add_subparsers(dest="command")
+    
+    subparsers.add_parser("status", help="Show fleet status")
+    
+    restart_parser = subparsers.add_parser("restart", help="Restart a server")
+    restart_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to restart")
+    
+    swap_parser = subparsers.add_parser("swap", help="Swap model on a host")
+    swap_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to swap")
+    swap_parser.add_argument("model", help="Model name (GGUF)")
+    
+    args = parser.parse_args()
+    
+    manager = FleetManager()
+    
+    if args.command == "status":
+        manager.show_fleet_status()
+    elif args.command == "restart":
+        manager.restart_server(args.host)
+    elif args.command == "swap":
+        manager.swap_model(args.host, args.model)
+    else:
+        parser.print_help()
+
+if __name__ == "__main__":
+    main()