timmy-config/scripts/provision_wizard.py

#!/usr/bin/env python3
"""
[OPS] Automated VPS Provisioning System (Von Neumann as Code)
Part of the Gemini Sovereign Infrastructure Suite.

This script automates the creation and configuration of a "Wizard" node
from zero to serving inference via llama.cpp.

Usage:
    python3 provision_wizard.py --name fenrir --size s-2vcpu-4gb --model qwen2.5-coder-7b
"""

import os
import sys
import time
import argparse
import requests
import subprocess
import json
from typing import Optional, Dict, Any

# --- CONFIGURATION ---
DO_API_URL = "https://api.digitalocean.com/v2"
# We expect DIGITALOCEAN_TOKEN to be set in the environment.
DO_TOKEN = os.environ.get("DIGITALOCEAN_TOKEN")

# Default settings
DEFAULT_REGION = "nyc3"
DEFAULT_IMAGE = "ubuntu-22-04-x64"
LLAMA_CPP_REPO = "https://github.com/ggerganov/llama.cpp"

class Provisioner:
    def __init__(self, name: str, size: str, model: str, region: str = DEFAULT_REGION):
        self.name = name
        self.size = size
        self.model = model
        self.region = region
        self.droplet_id = None
        self.ip_address = None

    def log(self, message: str):
        print(f"[*] {message}")

    def error(self, message: str):
        print(f"[!] ERROR: {message}")
        sys.exit(1)

    def check_auth(self):
        if not DO_TOKEN:
            self.error("DIGITALOCEAN_TOKEN environment variable not set.")

    def create_droplet(self):
        self.log(f"Creating droplet '{self.name}' ({self.size}) in {self.region}...")

        # Get SSH keys to add to the droplet
        ssh_keys = self.get_ssh_keys()

        payload = {
            "name": self.name,
            "region": self.region,
            "size": self.size,
            "image": DEFAULT_IMAGE,
            "ssh_keys": ssh_keys,
            "backups": False,
            "ipv6": True,
            "monitoring": True,
            "tags": ["wizard", "gemini-provisioned"]
        }

        headers = {
            "Authorization": f"Bearer {DO_TOKEN}",
            "Content-Type": "application/json"
        }

        response = requests.post(f"{DO_API_URL}/droplets", json=payload, headers=headers)
        if response.status_code != 202:
            self.error(f"Failed to create droplet: {response.text}")

        data = response.json()
        self.droplet_id = data["droplet"]["id"]
        self.log(f"Droplet created (ID: {self.droplet_id}). Waiting for IP...")

    def get_ssh_keys(self) -> list:
        # Fetch existing SSH keys from DO account to ensure we can log in
        headers = {"Authorization": f"Bearer {DO_TOKEN}"}
        response = requests.get(f"{DO_API_URL}/account/keys", headers=headers)
        if response.status_code != 200:
            self.log("Warning: Could not fetch SSH keys. Droplet might be inaccessible via SSH.")
            return []
        return [key["id"] for key in response.json()["ssh_keys"]]

    def wait_for_ip(self):
        headers = {"Authorization": f"Bearer {DO_TOKEN}"}
        while not self.ip_address:
            response = requests.get(f"{DO_API_URL}/droplets/{self.droplet_id}", headers=headers)
            data = response.json()
            networks = data["droplet"]["networks"]["v4"]
            for net in networks:
                if net["type"] == "public":
                    self.ip_address = net["ip_address"]
                    break
            if not self.ip_address:
                time.sleep(5)
        self.log(f"Droplet IP: {self.ip_address}")

    def run_remote(self, command: str):
        # Using subprocess to call ssh. Assumes local machine has the right private key.
        ssh_cmd = [
            "ssh", "-o", "StrictHostKeyChecking=no",
            f"root@{self.ip_address}", command
        ]
        result = subprocess.run(ssh_cmd, capture_output=True, text=True)
        return result

    def setup_wizard(self):
        self.log("Starting remote setup...")

        # Wait for SSH to be ready
        retries = 12
        while retries > 0:
            res = self.run_remote("echo 'SSH Ready'")
            if res.returncode == 0:
                break
            self.log(f"Waiting for SSH... ({retries} retries left)")
            time.sleep(10)
            retries -= 1

        if retries == 0:
            self.error("SSH timed out.")

        # 1. Update and install dependencies
        self.log("Installing dependencies...")
        setup_script = """
        export DEBIAN_FRONTEND=noninteractive
        apt-get update && apt-get upgrade -y
        apt-get install -y build-essential git cmake curl wget python3 python3-pip
        """
        self.run_remote(setup_script)

        # 2. Build llama.cpp
        self.log("Building llama.cpp...")
        build_script = f"""
        if [ ! -d "/opt/llama.cpp" ]; then
            git clone {LLAMA_CPP_REPO} /opt/llama.cpp
        fi
        cd /opt/llama.cpp
        mkdir -p build && cd build
        cmake ..
        cmake --build . --config Release
        """
        self.run_remote(build_script)

        # 3. Download Model
        self.log(f"Downloading model: {self.model}...")
        model_url = self.get_model_url(self.model)
        download_script = f"""
        mkdir -p /opt/models
        if [ ! -f "/opt/models/{self.model}.gguf" ]; then
            wget -O /opt/models/{self.model}.gguf {model_url}
        fi
        """
        self.run_remote(download_script)

        # 4. Create systemd service
        self.log("Creating systemd service...")
        service_content = f"""
[Unit]
Description=Llama.cpp Server for {self.name}
After=network.target

[Service]
Type=simple
User=root
WorkingDirectory=/opt/llama.cpp
ExecStart=/opt/llama.cpp/build/bin/llama-server -m /opt/models/{self.model}.gguf --host 0.0.0.0 --port 8080 -c 4096
Restart=always
RestartSec=10

[Install]
WantedBy=multi-user.target
"""
        # Use cat to write the file to handle multi-line string safely
        self.run_remote(f"cat <<EOF > /etc/systemd/system/llama-server.service\n{service_content}\nEOF")
        self.run_remote("systemctl daemon-reload && systemctl enable llama-server && systemctl start llama-server")

    def get_model_url(self, model_name: str) -> str:
        # Mapping for common models to GGUF URLs (HuggingFace)
        mapping = {
            "qwen2.5-coder-7b": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf",
            "hermes-3-llama-3.1-8b": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF/resolve/main/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf"
        }
        return mapping.get(model_name, mapping["hermes-3-llama-3.1-8b"])

    def health_check(self):
        self.log("Performing health check...")
        time.sleep(15) # Wait for server to start
        try:
            url = f"http://{self.ip_address}:8080/health"
            response = requests.get(url, timeout=10)
            if response.status_code == 200:
                self.log(f"[SUCCESS] Wizard {self.name} is healthy and serving inference.")
                self.log(f"Endpoint: {url}")
            else:
                self.log(f"[WARNING] Health check returned status {response.status_code}")
        except Exception as e:
            self.log(f"[ERROR] Health check failed: {e}")

    def provision(self):
        self.check_auth()
        self.create_droplet()
        self.wait_for_ip()
        self.setup_wizard()
        self.health_check()

def main():
    parser = argparse.ArgumentParser(description="Gemini Provisioner")
    parser.add_argument("--name", required=True, help="Name of the wizard")
    parser.add_argument("--size", default="s-2vcpu-4gb", help="DO droplet size")
    parser.add_argument("--model", default="qwen2.5-coder-7b", help="Model to serve")
    parser.add_argument("--region", default="nyc3", help="DO region")

    args = parser.parse_args()

    provisioner = Provisioner(args.name, args.size, args.model, args.region)
    provisioner.provision()

if __name__ == "__main__":
    main()