138 lines
5.0 KiB
Python
138 lines
5.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
[OPS] llama.cpp Fleet Manager
|
|
Part of the Gemini Sovereign Infrastructure Suite.
|
|
|
|
Manages llama-server instances across the Timmy Foundation fleet.
|
|
Supports status, restart, and model swapping via SSH.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import subprocess
|
|
import requests
|
|
from typing import Dict, List, Any
|
|
|
|
# --- FLEET DEFINITION ---
|
|
FLEET = {
|
|
"mac": {"ip": "10.1.10.77", "port": 8080, "role": "hub"},
|
|
"ezra": {"ip": "143.198.27.163", "port": 8080, "role": "forge"},
|
|
"allegro": {"ip": "167.99.126.228", "port": 8080, "role": "agent-host"},
|
|
"bezalel": {"ip": "159.203.146.185", "port": 8080, "role": "world-host"}
|
|
}
|
|
|
|
class FleetManager:
|
|
def __init__(self):
|
|
self.results = {}
|
|
|
|
def run_remote(self, host: str, command: str):
|
|
ip = FLEET[host]["ip"]
|
|
ssh_cmd = [
|
|
"ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
|
|
f"root@{ip}", command
|
|
]
|
|
# For Mac, we might need a different user or local execution
|
|
if host == "mac":
|
|
ssh_cmd = ["bash", "-c", command]
|
|
|
|
try:
|
|
result = subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=10)
|
|
return result
|
|
except subprocess.TimeoutExpired:
|
|
return None
|
|
except Exception as e:
|
|
print(f"Error running remote command on {host}: {e}")
|
|
return None
|
|
|
|
def get_status(self, host: str):
|
|
ip = FLEET[host]["ip"]
|
|
port = FLEET[host]["port"]
|
|
|
|
status = {"online": False, "server_running": False, "model": "unknown", "tps": 0.0}
|
|
|
|
# 1. Check if machine is reachable
|
|
ping_res = subprocess.run(["ping", "-c", "1", "-W", "1", ip], capture_output=True)
|
|
if ping_res.returncode == 0:
|
|
status["online"] = True
|
|
|
|
# 2. Check if llama-server is responding to health check
|
|
try:
|
|
url = f"http://{ip}:{port}/health"
|
|
response = requests.get(url, timeout=2)
|
|
if response.status_code == 200:
|
|
status["server_running"] = True
|
|
data = response.json()
|
|
# llama.cpp health endpoint usually returns slots info
|
|
# We'll try to get model info if available
|
|
status["model"] = data.get("model", "unknown")
|
|
except:
|
|
pass
|
|
|
|
return status
|
|
|
|
def show_fleet_status(self):
|
|
print(f"{'NAME':<10} {'IP':<15} {'STATUS':<10} {'SERVER':<10} {'MODEL':<20}")
|
|
print("-" * 70)
|
|
for name in FLEET:
|
|
status = self.get_status(name)
|
|
online_str = "✅" if status["online"] else "❌"
|
|
server_str = "🚀" if status["server_running"] else "💤"
|
|
print(f"{name:<10} {FLEET[name]['ip']:<15} {online_str:<10} {server_str:<10} {status['model']:<20}")
|
|
|
|
def restart_server(self, host: str):
|
|
print(f"[*] Restarting llama-server on {host}...")
|
|
res = self.run_remote(host, "systemctl restart llama-server")
|
|
if res and res.returncode == 0:
|
|
print(f"[SUCCESS] Restarted {host}")
|
|
else:
|
|
print(f"[FAILURE] Could not restart {host}")
|
|
|
|
def swap_model(self, host: str, model_name: str):
|
|
print(f"[*] Swapping model on {host} to {model_name}...")
|
|
# This assumes the provision_wizard.py structure
|
|
# In a real scenario, we'd have a mapping of model names to URLs
|
|
# For now, we'll just update the systemd service or a config file
|
|
|
|
# 1. Stop server
|
|
self.run_remote(host, "systemctl stop llama-server")
|
|
|
|
# 2. Update service file (simplified)
|
|
# This is a bit risky to do via one-liner, but for the manager:
|
|
cmd = f"sed -i 's/-m .*\\.gguf/-m \\/opt\\/models\\/{model_name}.gguf/' /etc/systemd/system/llama-server.service"
|
|
self.run_remote(host, cmd)
|
|
|
|
# 3. Start server
|
|
self.run_remote(host, "systemctl daemon-reload && systemctl start llama-server")
|
|
print(f"[SUCCESS] Swapped model on {host}")
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Gemini Fleet Manager")
|
|
subparsers = parser.add_subparsers(dest="command")
|
|
|
|
subparsers.add_parser("status", help="Show fleet status")
|
|
|
|
restart_parser = subparsers.add_parser("restart", help="Restart a server")
|
|
restart_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to restart")
|
|
|
|
swap_parser = subparsers.add_parser("swap", help="Swap model on a host")
|
|
swap_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to swap")
|
|
swap_parser.add_argument("model", help="Model name (GGUF)")
|
|
|
|
args = parser.parse_args()
|
|
|
|
manager = FleetManager()
|
|
|
|
if args.command == "status":
|
|
manager.show_fleet_status()
|
|
elif args.command == "restart":
|
|
manager.restart_server(args.host)
|
|
elif args.command == "swap":
|
|
manager.swap_model(args.host, args.model)
|
|
else:
|
|
parser.print_help()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|