Files
timmy-config/scripts/fleet_llama.py
Alexander Whitestone 19aa0830f4
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 8s
Validate Config / YAML Lint (pull_request) Failing after 5s
Validate Config / JSON Validate (pull_request) Successful in 6s
PR Checklist / pr-checklist (pull_request) Failing after 1m11s
Smoke Test / smoke (pull_request) Failing after 7s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 9s
Validate Config / Shell Script Lint (pull_request) Successful in 15s
Validate Config / Cron Syntax Check (pull_request) Successful in 5s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 5s
Validate Config / Playbook Schema Validation (pull_request) Successful in 7s
Architecture Lint / Lint Repository (pull_request) Failing after 6s
Harden Gemini scripts with verified SSH trust
2026-04-11 15:13:15 -04:00

144 lines
5.3 KiB
Python

#!/usr/bin/env python3
"""
[OPS] llama.cpp Fleet Manager
Part of the Gemini Sovereign Infrastructure Suite.
Manages llama-server instances across the Timmy Foundation fleet.
Supports status, restart, and model swapping via SSH.
"""
import os
import sys
import json
import argparse
import requests
from typing import Dict, List, Any
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
if SCRIPT_DIR not in sys.path:
sys.path.insert(0, SCRIPT_DIR)
from ssh_trust import VerifiedSSHExecutor
# --- FLEET DEFINITION ---
FLEET = {
"mac": {"ip": "10.1.10.77", "port": 8080, "role": "hub"},
"ezra": {"ip": "143.198.27.163", "port": 8080, "role": "forge"},
"allegro": {"ip": "167.99.126.228", "port": 8080, "role": "agent-host"},
"bezalel": {"ip": "159.203.146.185", "port": 8080, "role": "world-host"}
}
class FleetManager:
def __init__(self, executor=None):
self.results = {}
self.executor = executor or VerifiedSSHExecutor()
def run_remote(self, host: str, command: str):
ip = FLEET[host]["ip"]
ssh_cmd = [
"ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
f"root@{ip}", command
]
# For Mac, we might need a different user or local execution
if host == "mac":
ssh_cmd = ["bash", "-c", command]
try:
result = subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=10)
return result
except subprocess.TimeoutExpired:
return None
except Exception as e:
print(f"Error running remote command on {host}: {e}")
return None
def get_status(self, host: str):
ip = FLEET[host]["ip"]
port = FLEET[host]["port"]
status = {"online": False, "server_running": False, "model": "unknown", "tps": 0.0}
# 1. Check if machine is reachable
ping_res = subprocess.run(["ping", "-c", "1", "-W", "1", ip], capture_output=True)
if ping_res.returncode == 0:
status["online"] = True
# 2. Check if llama-server is responding to health check
try:
url = f"http://{ip}:{port}/health"
response = requests.get(url, timeout=2)
if response.status_code == 200:
status["server_running"] = True
data = response.json()
# llama.cpp health endpoint usually returns slots info
# We'll try to get model info if available
status["model"] = data.get("model", "unknown")
except:
pass
return status
def show_fleet_status(self):
print(f"{'NAME':<10} {'IP':<15} {'STATUS':<10} {'SERVER':<10} {'MODEL':<20}")
print("-" * 70)
for name in FLEET:
status = self.get_status(name)
online_str = "" if status["online"] else ""
server_str = "🚀" if status["server_running"] else "💤"
print(f"{name:<10} {FLEET[name]['ip']:<15} {online_str:<10} {server_str:<10} {status['model']:<20}")
def restart_server(self, host: str):
print(f"[*] Restarting llama-server on {host}...")
res = self.run_remote(host, "systemctl restart llama-server")
if res and res.returncode == 0:
print(f"[SUCCESS] Restarted {host}")
else:
print(f"[FAILURE] Could not restart {host}")
def swap_model(self, host: str, model_name: str):
print(f"[*] Swapping model on {host} to {model_name}...")
# This assumes the provision_wizard.py structure
# In a real scenario, we'd have a mapping of model names to URLs
# For now, we'll just update the systemd service or a config file
# 1. Stop server
self.run_remote(host, "systemctl stop llama-server")
# 2. Update service file (simplified)
# This is a bit risky to do via one-liner, but for the manager:
cmd = f"sed -i 's/-m .*\\.gguf/-m \\/opt\\/models\\/{model_name}.gguf/' /etc/systemd/system/llama-server.service"
self.run_remote(host, cmd)
# 3. Start server
self.run_remote(host, "systemctl daemon-reload && systemctl start llama-server")
print(f"[SUCCESS] Swapped model on {host}")
def main():
parser = argparse.ArgumentParser(description="Gemini Fleet Manager")
subparsers = parser.add_subparsers(dest="command")
subparsers.add_parser("status", help="Show fleet status")
restart_parser = subparsers.add_parser("restart", help="Restart a server")
restart_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to restart")
swap_parser = subparsers.add_parser("swap", help="Swap model on a host")
swap_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to swap")
swap_parser.add_argument("model", help="Model name (GGUF)")
args = parser.parse_args()
manager = FleetManager()
if args.command == "status":
manager.show_fleet_status()
elif args.command == "restart":
manager.restart_server(args.host)
elif args.command == "swap":
manager.swap_model(args.host, args.model)
else:
parser.print_help()
if __name__ == "__main__":
main()