Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 32s
Smoke Test / smoke (pull_request) Failing after 29s
Validate Config / YAML Lint (pull_request) Failing after 19s
Validate Config / JSON Validate (pull_request) Successful in 21s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 1m9s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 41s
Validate Config / Cron Syntax Check (pull_request) Successful in 13s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 10s
Validate Config / Playbook Schema Validation (pull_request) Successful in 17s
Architecture Lint / Lint Repository (pull_request) Failing after 26s
PR Checklist / pr-checklist (pull_request) Successful in 7m28s
Remove StrictHostKeyChecking=no and bash -c fallback. Use VerifiedSSHExecutor.run_script() for all remote commands. Quote model_name via shlex.quote() in swap_model sed command. Closes #434
141 lines
5.0 KiB
Python
141 lines
5.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
[OPS] llama.cpp Fleet Manager
|
|
Part of the Gemini Sovereign Infrastructure Suite.
|
|
|
|
Manages llama-server instances across the Timmy Foundation fleet.
|
|
Supports status, restart, and model swapping via SSH.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import requests
|
|
from typing import Dict, List, Any
|
|
import shlex
|
|
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
if SCRIPT_DIR not in sys.path:
|
|
sys.path.insert(0, SCRIPT_DIR)
|
|
|
|
from ssh_trust import VerifiedSSHExecutor
|
|
|
|
# --- FLEET DEFINITION ---
|
|
FLEET = {
|
|
"mac": {"ip": "10.1.10.77", "port": 8080, "role": "hub"},
|
|
"ezra": {"ip": "143.198.27.163", "port": 8080, "role": "forge"},
|
|
"allegro": {"ip": "167.99.126.228", "port": 8080, "role": "agent-host"},
|
|
"bezalel": {"ip": "159.203.146.185", "port": 8080, "role": "world-host"}
|
|
}
|
|
|
|
class FleetManager:
|
|
def __init__(self, executor=None):
|
|
self.results = {}
|
|
self.executor = executor or VerifiedSSHExecutor()
|
|
|
|
def run_remote(self, host: str, command: str):
|
|
ip = FLEET[host]["ip"]
|
|
local_mode = (host == "mac")
|
|
try:
|
|
return self.executor.run_script(
|
|
host=ip,
|
|
script_text=command,
|
|
local=local_mode,
|
|
timeout=10,
|
|
)
|
|
except Exception as e:
|
|
print(f"[ERROR] Error running remote command on {host}: {e}")
|
|
return None
|
|
|
|
def get_status(self, host: str):
|
|
ip = FLEET[host]["ip"]
|
|
port = FLEET[host]["port"]
|
|
|
|
status = {"online": False, "server_running": False, "model": "unknown", "tps": 0.0}
|
|
|
|
# 1. Check if machine is reachable
|
|
ping_res = subprocess.run(["ping", "-c", "1", "-W", "1", ip], capture_output=True)
|
|
if ping_res.returncode == 0:
|
|
status["online"] = True
|
|
|
|
# 2. Check if llama-server is responding to health check
|
|
try:
|
|
url = f"http://{ip}:{port}/health"
|
|
response = requests.get(url, timeout=2)
|
|
if response.status_code == 200:
|
|
status["server_running"] = True
|
|
data = response.json()
|
|
# llama.cpp health endpoint usually returns slots info
|
|
# We'll try to get model info if available
|
|
status["model"] = data.get("model", "unknown")
|
|
except:
|
|
pass
|
|
|
|
return status
|
|
|
|
def show_fleet_status(self):
|
|
print(f"{'NAME':<10} {'IP':<15} {'STATUS':<10} {'SERVER':<10} {'MODEL':<20}")
|
|
print("-" * 70)
|
|
for name in FLEET:
|
|
status = self.get_status(name)
|
|
online_str = "✅" if status["online"] else "❌"
|
|
server_str = "🚀" if status["server_running"] else "💤"
|
|
print(f"{name:<10} {FLEET[name]['ip']:<15} {online_str:<10} {server_str:<10} {status['model']:<20}")
|
|
|
|
def restart_server(self, host: str):
|
|
print(f"[*] Restarting llama-server on {host}...")
|
|
res = self.run_remote(host, "systemctl restart llama-server")
|
|
if res and res.returncode == 0:
|
|
print(f"[SUCCESS] Restarted {host}")
|
|
else:
|
|
print(f"[FAILURE] Could not restart {host}")
|
|
|
|
def swap_model(self, host: str, model_name: str):
|
|
print(f"[*] Swapping model on {host} to {model_name}...")
|
|
# This assumes the provision_wizard.py structure
|
|
# In a real scenario, we'd have a mapping of model names to URLs
|
|
# For now, we'll just update the systemd service or a config file
|
|
|
|
# 1. Stop server
|
|
self.run_remote(host, "systemctl stop llama-server")
|
|
|
|
# 2. Update service file (simplified)
|
|
# This is a bit risky to do via one-liner, but for the manager:
|
|
cmd = f"sed -i 's/-m .*\\.gguf/-m \\/opt\\/models\\/{shlex.quote(model_name)}.gguf/' /etc/systemd/system/llama-server.service"
|
|
self.run_remote(host, cmd)
|
|
|
|
# 3. Start server
|
|
self.run_remote(host, "systemctl daemon-reload && systemctl start llama-server")
|
|
print(f"[SUCCESS] Swapped model on {host}")
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Gemini Fleet Manager")
|
|
subparsers = parser.add_subparsers(dest="command")
|
|
|
|
subparsers.add_parser("status", help="Show fleet status")
|
|
|
|
restart_parser = subparsers.add_parser("restart", help="Restart a server")
|
|
restart_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to restart")
|
|
|
|
swap_parser = subparsers.add_parser("swap", help="Swap model on a host")
|
|
swap_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to swap")
|
|
swap_parser.add_argument("model", help="Model name (GGUF)")
|
|
|
|
args = parser.parse_args()
|
|
|
|
manager = FleetManager()
|
|
|
|
if args.command == "status":
|
|
manager.show_fleet_status()
|
|
elif args.command == "restart":
|
|
manager.restart_server(args.host)
|
|
elif args.command == "swap":
|
|
manager.swap_model(args.host, args.model)
|
|
else:
|
|
parser.print_help()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|