Files
timmy-config/scripts/fleet_llama.py
Alexander Payne ab9d1c0fa4
Some checks failed
Smoke Test / smoke (pull_request) Failing after 23s
Architecture Lint / Linter Tests (pull_request) Successful in 26s
Validate Config / YAML Lint (pull_request) Failing after 15s
Validate Config / JSON Validate (pull_request) Successful in 19s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 1m1s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 1m4s
Validate Config / Cron Syntax Check (pull_request) Successful in 13s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 13s
Validate Config / Playbook Schema Validation (pull_request) Successful in 25s
Architecture Lint / Lint Repository (pull_request) Failing after 22s
PR Checklist / pr-checklist (pull_request) Successful in 5m0s
[GEMINI-HARDEN-01] Replace hard-coded fleet inventory with repo-native config
Add fleet.inventory and fleet.path_contracts to config.yaml:
- Central source of truth for IPs, ports, roles, remote paths
- Introduce get_config_path(), load_fleet_inventory(), get_path_contract()
- Updated fleet_llama.py, self_healing.py, telemetry.py, agent_dispatch.py,
  skill_installer.py to read from config instead of hard-coded dicts/paths
- Documented inventory contract and override mechanism in scripts/README.md

Scripts retain forward-compatible fallback defaults for backwards compatibility.

Closes #433
2026-04-26 22:47:59 -04:00

172 lines
6.2 KiB
Python

#!/usr/bin/env python3
"""
[OPS] llama.cpp Fleet Manager
Part of the Gemini Sovereign Infrastructure Suite.
Manages llama-server instances across the Timmy Foundation fleet.
Supports status, restart, and model swapping via SSH.
"""
import os
import sys
import json
import argparse
import requests
from typing import Dict, List, Any
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
if SCRIPT_DIR not in sys.path:
sys.path.insert(0, SCRIPT_DIR)
from ssh_trust import VerifiedSSHExecutor
import yaml
# --- FLEET DEFINITION ---
def get_config_path():
return os.environ.get('TIMMY_CONFIG') or os.path.join(
os.path.dirname(os.path.abspath(__file__)), '..', 'config.yaml'
)
def load_fleet_inventory():
"""Return dict {{host: {{ip, port, role, remote_root, capabilities}}}} from config.yaml,
or approved fallback defaults."""
try:
with open(get_config_path(), 'r') as f:
cfg = yaml.safe_load(f)
inv = cfg.get('fleet', {}).get('inventory', {})
if inv:
return inv
except Exception:
pass
return {
"mac": {"ip": "10.1.10.77", "port": 8080, "role": "hub"},
"ezra": {"ip": "143.198.27.163", "port": 8080, "role": "forge"},
"allegro": {"ip": "167.99.126.228", "port": 8080, "role": "agent-host"},
"bezalel": {"ip": "159.203.146.185", "port": 8080, "role": "world-host"},
}
def get_path_contract(key, default):
"""Return path contract from config.yaml."""
try:
with open(get_config_path(), 'r') as f:
cfg = yaml.safe_load(f)
return cfg.get('fleet', {}).get('path_contracts', {}).get(key, default)
except Exception:
return default
FLEET = load_fleet_inventory() # dict {host: {{"ip":..,"port":..,"role":..}}}
class FleetManager:
def __init__(self, executor=None):
self.results = {}
self.executor = executor or VerifiedSSHExecutor()
def run_remote(self, host: str, command: str):
ip = FLEET[host]["ip"]
ssh_cmd = [
"ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
f"root@{ip}", command
]
# For Mac, we might need a different user or local execution
if host == "mac":
ssh_cmd = ["bash", "-c", command]
try:
result = subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=10)
return result
except subprocess.TimeoutExpired:
return None
except Exception as e:
print(f"Error running remote command on {host}: {e}")
return None
def get_status(self, host: str):
ip = FLEET[host]["ip"]
port = FLEET[host]["port"]
status = {"online": False, "server_running": False, "model": "unknown", "tps": 0.0}
# 1. Check if machine is reachable
ping_res = subprocess.run(["ping", "-c", "1", "-W", "1", ip], capture_output=True)
if ping_res.returncode == 0:
status["online"] = True
# 2. Check if llama-server is responding to health check
try:
url = f"http://{ip}:{port}/health"
response = requests.get(url, timeout=2)
if response.status_code == 200:
status["server_running"] = True
data = response.json()
# llama.cpp health endpoint usually returns slots info
# We'll try to get model info if available
status["model"] = data.get("model", "unknown")
except:
pass
return status
def show_fleet_status(self):
print(f"{'NAME':<10} {'IP':<15} {'STATUS':<10} {'SERVER':<10} {'MODEL':<20}")
print("-" * 70)
for name in FLEET:
status = self.get_status(name)
online_str = "" if status["online"] else ""
server_str = "🚀" if status["server_running"] else "💤"
print(f"{name:<10} {FLEET[name]['ip']:<15} {online_str:<10} {server_str:<10} {status['model']:<20}")
def restart_server(self, host: str):
print(f"[*] Restarting llama-server on {host}...")
res = self.run_remote(host, "systemctl restart llama-server")
if res and res.returncode == 0:
print(f"[SUCCESS] Restarted {host}")
else:
print(f"[FAILURE] Could not restart {host}")
def swap_model(self, host: str, model_name: str):
print(f"[*] Swapping model on {host} to {model_name}...")
# This assumes the provision_wizard.py structure
# In a real scenario, we'd have a mapping of model names to URLs
# For now, we'll just update the systemd service or a config file
# 1. Stop server
self.run_remote(host, "systemctl stop llama-server")
# 2. Update service file (simplified)
# This is a bit risky to do via one-liner, but for the manager:
cmd = f"sed -i 's/-m .*\\.gguf/-m \\/opt\\/models\\/{model_name}.gguf/' /etc/systemd/system/llama-server.service"
self.run_remote(host, cmd)
# 3. Start server
self.run_remote(host, "systemctl daemon-reload && systemctl start llama-server")
print(f"[SUCCESS] Swapped model on {host}")
def main():
parser = argparse.ArgumentParser(description="Gemini Fleet Manager")
subparsers = parser.add_subparsers(dest="command")
subparsers.add_parser("status", help="Show fleet status")
restart_parser = subparsers.add_parser("restart", help="Restart a server")
restart_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to restart")
swap_parser = subparsers.add_parser("swap", help="Swap model on a host")
swap_parser.add_argument("host", choices=list(FLEET.keys()), help="Host to swap")
swap_parser.add_argument("model", help="Model name (GGUF)")
args = parser.parse_args()
manager = FleetManager()
if args.command == "status":
manager.show_fleet_status()
elif args.command == "restart":
manager.restart_server(args.host)
elif args.command == "swap":
manager.swap_model(args.host, args.model)
else:
parser.print_help()
if __name__ == "__main__":
main()