#!/usr/bin/env python3 """ [OPS] Automated VPS Provisioning System (Von Neumann as Code) Part of the Gemini Sovereign Infrastructure Suite. This script automates the creation and configuration of a "Wizard" node from zero to serving inference via llama.cpp. Usage: python3 provision_wizard.py --name fenrir --size s-2vcpu-4gb --model qwen2.5-coder-7b """ import os import sys import time import argparse import requests import subprocess import json from typing import Optional, Dict, Any # --- CONFIGURATION --- DO_API_URL = "https://api.digitalocean.com/v2" # We expect DIGITALOCEAN_TOKEN to be set in the environment. DO_TOKEN = os.environ.get("DIGITALOCEAN_TOKEN") # Default settings DEFAULT_REGION = "nyc3" DEFAULT_IMAGE = "ubuntu-22-04-x64" LLAMA_CPP_REPO = "https://github.com/ggerganov/llama.cpp" class Provisioner: def __init__(self, name: str, size: str, model: str, region: str = DEFAULT_REGION): self.name = name self.size = size self.model = model self.region = region self.droplet_id = None self.ip_address = None def log(self, message: str): print(f"[*] {message}") def error(self, message: str): print(f"[!] ERROR: {message}") sys.exit(1) def check_auth(self): if not DO_TOKEN: self.error("DIGITALOCEAN_TOKEN environment variable not set.") def create_droplet(self): self.log(f"Creating droplet '{self.name}' ({self.size}) in {self.region}...") # Get SSH keys to add to the droplet ssh_keys = self.get_ssh_keys() payload = { "name": self.name, "region": self.region, "size": self.size, "image": DEFAULT_IMAGE, "ssh_keys": ssh_keys, "backups": False, "ipv6": True, "monitoring": True, "tags": ["wizard", "gemini-provisioned"] } headers = { "Authorization": f"Bearer {DO_TOKEN}", "Content-Type": "application/json" } response = requests.post(f"{DO_API_URL}/droplets", json=payload, headers=headers) if response.status_code != 202: self.error(f"Failed to create droplet: {response.text}") data = response.json() self.droplet_id = data["droplet"]["id"] self.log(f"Droplet created (ID: {self.droplet_id}). Waiting for IP...") def get_ssh_keys(self) -> list: # Fetch existing SSH keys from DO account to ensure we can log in headers = {"Authorization": f"Bearer {DO_TOKEN}"} response = requests.get(f"{DO_API_URL}/account/keys", headers=headers) if response.status_code != 200: self.log("Warning: Could not fetch SSH keys. Droplet might be inaccessible via SSH.") return [] return [key["id"] for key in response.json()["ssh_keys"]] def wait_for_ip(self): headers = {"Authorization": f"Bearer {DO_TOKEN}"} while not self.ip_address: response = requests.get(f"{DO_API_URL}/droplets/{self.droplet_id}", headers=headers) data = response.json() networks = data["droplet"]["networks"]["v4"] for net in networks: if net["type"] == "public": self.ip_address = net["ip_address"] break if not self.ip_address: time.sleep(5) self.log(f"Droplet IP: {self.ip_address}") def run_remote(self, command: str): # Using subprocess to call ssh. Assumes local machine has the right private key. ssh_cmd = [ "ssh", "-o", "StrictHostKeyChecking=no", f"root@{self.ip_address}", command ] result = subprocess.run(ssh_cmd, capture_output=True, text=True) return result def setup_wizard(self): self.log("Starting remote setup...") # Wait for SSH to be ready retries = 12 while retries > 0: res = self.run_remote("echo 'SSH Ready'") if res.returncode == 0: break self.log(f"Waiting for SSH... ({retries} retries left)") time.sleep(10) retries -= 1 if retries == 0: self.error("SSH timed out.") # 1. Update and install dependencies self.log("Installing dependencies...") setup_script = """ export DEBIAN_FRONTEND=noninteractive apt-get update && apt-get upgrade -y apt-get install -y build-essential git cmake curl wget python3 python3-pip """ self.run_remote(setup_script) # 2. Build llama.cpp self.log("Building llama.cpp...") build_script = f""" if [ ! -d "/opt/llama.cpp" ]; then git clone {LLAMA_CPP_REPO} /opt/llama.cpp fi cd /opt/llama.cpp mkdir -p build && cd build cmake .. cmake --build . --config Release """ self.run_remote(build_script) # 3. Download Model self.log(f"Downloading model: {self.model}...") model_url = self.get_model_url(self.model) download_script = f""" mkdir -p /opt/models if [ ! -f "/opt/models/{self.model}.gguf" ]; then wget -O /opt/models/{self.model}.gguf {model_url} fi """ self.run_remote(download_script) # 4. Create systemd service self.log("Creating systemd service...") service_content = f""" [Unit] Description=Llama.cpp Server for {self.name} After=network.target [Service] Type=simple User=root WorkingDirectory=/opt/llama.cpp ExecStart=/opt/llama.cpp/build/bin/llama-server -m /opt/models/{self.model}.gguf --host 0.0.0.0 --port 8080 -c 4096 Restart=always RestartSec=10 [Install] WantedBy=multi-user.target """ # Use cat to write the file to handle multi-line string safely self.run_remote(f"cat < /etc/systemd/system/llama-server.service\n{service_content}\nEOF") self.run_remote("systemctl daemon-reload && systemctl enable llama-server && systemctl start llama-server") def get_model_url(self, model_name: str) -> str: # Mapping for common models to GGUF URLs (HuggingFace) mapping = { "qwen2.5-coder-7b": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf", "hermes-3-llama-3.1-8b": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF/resolve/main/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf" } return mapping.get(model_name, mapping["hermes-3-llama-3.1-8b"]) def health_check(self): self.log("Performing health check...") time.sleep(15) # Wait for server to start try: url = f"http://{self.ip_address}:8080/health" response = requests.get(url, timeout=10) if response.status_code == 200: self.log(f"[SUCCESS] Wizard {self.name} is healthy and serving inference.") self.log(f"Endpoint: {url}") else: self.log(f"[WARNING] Health check returned status {response.status_code}") except Exception as e: self.log(f"[ERROR] Health check failed: {e}") def provision(self): self.check_auth() self.create_droplet() self.wait_for_ip() self.setup_wizard() self.health_check() def main(): parser = argparse.ArgumentParser(description="Gemini Provisioner") parser.add_argument("--name", required=True, help="Name of the wizard") parser.add_argument("--size", default="s-2vcpu-4gb", help="DO droplet size") parser.add_argument("--model", default="qwen2.5-coder-7b", help="Model to serve") parser.add_argument("--region", default="nyc3", help="DO region") args = parser.parse_args() provisioner = Provisioner(args.name, args.size, args.model, args.region) provisioner.provision() if __name__ == "__main__": main()