feat: implement fleet_llama.py
This commit is contained in:
137
scripts/fleet_llama.py
Normal file
137
scripts/fleet_llama.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
[OPS] llama.cpp Fleet Manager
|
||||
Part of the Gemini Sovereign Infrastructure Suite.
|
||||
|
||||
Manages llama-server instances across the Timmy Foundation fleet.
|
||||
Supports status, restart, and model swapping via SSH.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import subprocess
|
||||
import requests
|
||||
from typing import Dict, List, Any
|
||||
|
||||
# --- FLEET DEFINITION ---
|
||||
FLEET = {
|
||||
"mac": {"ip": "10.1.10.77", "port": 8080, "role": "hub"},
|
||||
"ezra": {"ip": "143.198.27.163", "port": 8080, "role": "forge"},
|
||||
"allegro": {"ip": "167.99.126.228", "port": 8080, "role": "agent-host"},
|
||||
"bezalel": {"ip": "159.203.146.185", "port": 8080, "role": "world-host"}
|
||||
}
|
||||
|
||||
class FleetManager:
|
||||
def __init__(self):
|
||||
self.results = {}
|
||||
|
||||
def run_remote(self, host: str, command: str):
|
||||
ip = FLEET[host]["ip"]
|
||||
ssh_cmd = [
|
||||
"ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
|
||||
f"root@{ip}", command
|
||||
]
|
||||
# For Mac, we might need a different user or local execution
|
||||
if host == "mac":
|
||||
ssh_cmd = ["bash", "-c", command]
|
||||
|
||||
try:
|
||||
result = subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=10)
|
||||
return result
|
||||
except subprocess.TimeoutExpired:
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error running remote command on {host}: {e}")
|
||||
return None
|
||||
|
||||
def get_status(self, host: str):
|
||||
ip = FLEET[host]["ip"]
|
||||
port = FLEET[host]["port"]
|
||||
|
||||
status = {"online": False, "server_running": False, "model": "unknown", "tps": 0.0}
|
||||
|
||||
# 1. Check if machine is reachable
|
||||
ping_res = subprocess.run(["ping", "-c", "1", "-W", "1", ip], capture_output=True)
|
||||
if ping_res.returncode == 0:
|
||||
status["online"] = True
|
||||
|
||||
# 2. Check if llama-server is responding to health check
|
||||
try:
|
||||
url = f"http://{ip}:{port}/health"
|
||||
response = requests.get(url, timeout=2)
|
||||
if response.status_code == 200:
|
||||
status["server_running"] = True
|
||||
data = response.json()
|
||||
# llama.cpp health endpoint usually returns slots info
|
||||
# We'll try to get model info if available
|
||||
status["model"] = data.get("model", "unknown")
|
||||
except:
|
||||
pass
|
||||
|
||||
return status
|
||||
|
||||
def show_fleet_status(self):
|
||||
print(f"{'NAME':<10} {'IP':<15} {'STATUS':<10} {'SERVER':<10} {'MODEL':<20}")
|
||||
print("-" * 70)
|
||||
for name in FLEET:
|
||||
status = self.get_status(name)
|
||||
online_str = "✅" if status["online"] else "❌"
|
||||
server_str = "🚀" if status["server_running"] else "💤"
|
||||
print(f"{name:<10} {FLEET[name]['ip']:<15} {online_str:<10} {server_str:<10} {status['model']:<20}")
|
||||
|
||||
def restart_server(self, host: str):
|
||||
print(f"[*] Restarting llama-server on {host}...")
|
||||
res = self.run_remote(host, "systemctl restart llama-server")
|
||||
if res and res.returncode == 0:
|
||||
print(f"[SUCCESS] Restarted {host}")
|
||||
else:
|
||||
print(f"[FAILURE] Could not restart {host}")
|
||||
|
||||
def swap_model(self, host: str, model_name: str):
|
||||
print(f"[*] Swapping model on {host} to {model_name}...")
|
||||
# This assumes the provision_wizard.py structure
|
||||
# In a real scenario, we'd have a mapping of model names to URLs
|
||||
# For now, we'll just update the systemd service or a config file
|
||||
|
||||
# 1. Stop server
|
||||
self.run_remote(host, "systemctl stop llama-server")
|
||||
|
||||
# 2. Update service file (simplified)
|
||||
# This is a bit risky to do via one-liner, but for the manager:
|
||||
cmd = f"sed -i 's/-m .*\\.gguf/-m \\/opt\\/models\\/{model_name}.gguf/' /etc/systemd/system/llama-server.service"
|
||||
self.run_remote(host, cmd)
|
||||
|
||||
# 3. Start server
|
||||
self.run_remote(host, "systemctl daemon-reload && systemctl start llama-server")
|
||||
print(f"[SUCCESS] Swapped model on {host}")
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Gemini Fleet Manager")
|
||||
subparsers = parser.add_subparsers(dest="command")
|
||||
|
||||
subparsers.add_parser("status", help="Show fleet status")
|
||||
|
||||
restart_parser = subparsers.add_parser("restart", help="Restart a server")
|
||||
restart_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to restart")
|
||||
|
||||
swap_parser = subparsers.add_parser("swap", help="Swap model on a host")
|
||||
swap_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to swap")
|
||||
swap_parser.add_argument("model", help="Model name (GGUF)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
manager = FleetManager()
|
||||
|
||||
if args.command == "status":
|
||||
manager.show_fleet_status()
|
||||
elif args.command == "restart":
|
||||
manager.restart_server(args.host)
|
||||
elif args.command == "swap":
|
||||
manager.swap_model(args.host, args.model)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user