diff --git a/scripts/provision_wizard.py b/scripts/provision_wizard.py new file mode 100644 index 00000000..93b17139 --- /dev/null +++ b/scripts/provision_wizard.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 +""" +[OPS] Automated VPS Provisioning System (Von Neumann as Code) +Part of the Gemini Sovereign Infrastructure Suite. + +This script automates the creation and configuration of a "Wizard" node +from zero to serving inference via llama.cpp. + +Usage: + python3 provision_wizard.py --name fenrir --size s-2vcpu-4gb --model qwen2.5-coder-7b +""" + +import os +import sys +import time +import argparse +import requests +import subprocess +import json +from typing import Optional, Dict, Any + +# --- CONFIGURATION --- +DO_API_URL = "https://api.digitalocean.com/v2" +# We expect DIGITALOCEAN_TOKEN to be set in the environment. +DO_TOKEN = os.environ.get("DIGITALOCEAN_TOKEN") + +# Default settings +DEFAULT_REGION = "nyc3" +DEFAULT_IMAGE = "ubuntu-22-04-x64" +LLAMA_CPP_REPO = "https://github.com/ggerganov/llama.cpp" + +class Provisioner: + def __init__(self, name: str, size: str, model: str, region: str = DEFAULT_REGION): + self.name = name + self.size = size + self.model = model + self.region = region + self.droplet_id = None + self.ip_address = None + + def log(self, message: str): + print(f"[*] {message}") + + def error(self, message: str): + print(f"[!] ERROR: {message}") + sys.exit(1) + + def check_auth(self): + if not DO_TOKEN: + self.error("DIGITALOCEAN_TOKEN environment variable not set.") + + def create_droplet(self): + self.log(f"Creating droplet '{self.name}' ({self.size}) in {self.region}...") + + # Get SSH keys to add to the droplet + ssh_keys = self.get_ssh_keys() + + payload = { + "name": self.name, + "region": self.region, + "size": self.size, + "image": DEFAULT_IMAGE, + "ssh_keys": ssh_keys, + "backups": False, + "ipv6": True, + "monitoring": True, + "tags": ["wizard", "gemini-provisioned"] + } + + headers = { + "Authorization": f"Bearer {DO_TOKEN}", + "Content-Type": "application/json" + } + + response = requests.post(f"{DO_API_URL}/droplets", json=payload, headers=headers) + if response.status_code != 202: + self.error(f"Failed to create droplet: {response.text}") + + data = response.json() + self.droplet_id = data["droplet"]["id"] + self.log(f"Droplet created (ID: {self.droplet_id}). Waiting for IP...") + + def get_ssh_keys(self) -> list: + # Fetch existing SSH keys from DO account to ensure we can log in + headers = {"Authorization": f"Bearer {DO_TOKEN}"} + response = requests.get(f"{DO_API_URL}/account/keys", headers=headers) + if response.status_code != 200: + self.log("Warning: Could not fetch SSH keys. Droplet might be inaccessible via SSH.") + return [] + return [key["id"] for key in response.json()["ssh_keys"]] + + def wait_for_ip(self): + headers = {"Authorization": f"Bearer {DO_TOKEN}"} + while not self.ip_address: + response = requests.get(f"{DO_API_URL}/droplets/{self.droplet_id}", headers=headers) + data = response.json() + networks = data["droplet"]["networks"]["v4"] + for net in networks: + if net["type"] == "public": + self.ip_address = net["ip_address"] + break + if not self.ip_address: + time.sleep(5) + self.log(f"Droplet IP: {self.ip_address}") + + def run_remote(self, command: str): + # Using subprocess to call ssh. Assumes local machine has the right private key. + ssh_cmd = [ + "ssh", "-o", "StrictHostKeyChecking=no", + f"root@{self.ip_address}", command + ] + result = subprocess.run(ssh_cmd, capture_output=True, text=True) + return result + + def setup_wizard(self): + self.log("Starting remote setup...") + + # Wait for SSH to be ready + retries = 12 + while retries > 0: + res = self.run_remote("echo 'SSH Ready'") + if res.returncode == 0: + break + self.log(f"Waiting for SSH... ({retries} retries left)") + time.sleep(10) + retries -= 1 + + if retries == 0: + self.error("SSH timed out.") + + # 1. Update and install dependencies + self.log("Installing dependencies...") + setup_script = """ + export DEBIAN_FRONTEND=noninteractive + apt-get update && apt-get upgrade -y + apt-get install -y build-essential git cmake curl wget python3 python3-pip + """ + self.run_remote(setup_script) + + # 2. Build llama.cpp + self.log("Building llama.cpp...") + build_script = f""" + if [ ! -d "/opt/llama.cpp" ]; then + git clone {LLAMA_CPP_REPO} /opt/llama.cpp + fi + cd /opt/llama.cpp + mkdir -p build && cd build + cmake .. + cmake --build . --config Release + """ + self.run_remote(build_script) + + # 3. Download Model + self.log(f"Downloading model: {self.model}...") + model_url = self.get_model_url(self.model) + download_script = f""" + mkdir -p /opt/models + if [ ! -f "/opt/models/{self.model}.gguf" ]; then + wget -O /opt/models/{self.model}.gguf {model_url} + fi + """ + self.run_remote(download_script) + + # 4. Create systemd service + self.log("Creating systemd service...") + service_content = f""" +[Unit] +Description=Llama.cpp Server for {self.name} +After=network.target + +[Service] +Type=simple +User=root +WorkingDirectory=/opt/llama.cpp +ExecStart=/opt/llama.cpp/build/bin/llama-server -m /opt/models/{self.model}.gguf --host 0.0.0.0 --port 8080 -c 4096 +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target +""" + # Use cat to write the file to handle multi-line string safely + self.run_remote(f"cat < /etc/systemd/system/llama-server.service\n{service_content}\nEOF") + self.run_remote("systemctl daemon-reload && systemctl enable llama-server && systemctl start llama-server") + + def get_model_url(self, model_name: str) -> str: + # Mapping for common models to GGUF URLs (HuggingFace) + mapping = { + "qwen2.5-coder-7b": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf", + "hermes-3-llama-3.1-8b": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF/resolve/main/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf" + } + return mapping.get(model_name, mapping["hermes-3-llama-3.1-8b"]) + + def health_check(self): + self.log("Performing health check...") + time.sleep(15) # Wait for server to start + try: + url = f"http://{self.ip_address}:8080/health" + response = requests.get(url, timeout=10) + if response.status_code == 200: + self.log(f"[SUCCESS] Wizard {self.name} is healthy and serving inference.") + self.log(f"Endpoint: {url}") + else: + self.log(f"[WARNING] Health check returned status {response.status_code}") + except Exception as e: + self.log(f"[ERROR] Health check failed: {e}") + + def provision(self): + self.check_auth() + self.create_droplet() + self.wait_for_ip() + self.setup_wizard() + self.health_check() + +def main(): + parser = argparse.ArgumentParser(description="Gemini Provisioner") + parser.add_argument("--name", required=True, help="Name of the wizard") + parser.add_argument("--size", default="s-2vcpu-4gb", help="DO droplet size") + parser.add_argument("--model", default="qwen2.5-coder-7b", help="Model to serve") + parser.add_argument("--region", default="nyc3", help="DO region") + + args = parser.parse_args() + + provisioner = Provisioner(args.name, args.size, args.model, args.region) + provisioner.provision() + +if __name__ == "__main__": + main()