229 lines
7.9 KiB
Python
229 lines
7.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
[OPS] Automated VPS Provisioning System (Von Neumann as Code)
|
|
Part of the Gemini Sovereign Infrastructure Suite.
|
|
|
|
This script automates the creation and configuration of a "Wizard" node
|
|
from zero to serving inference via llama.cpp.
|
|
|
|
Usage:
|
|
python3 provision_wizard.py --name fenrir --size s-2vcpu-4gb --model qwen2.5-coder-7b
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import argparse
|
|
import requests
|
|
import subprocess
|
|
import json
|
|
from typing import Optional, Dict, Any
|
|
|
|
# --- CONFIGURATION ---
|
|
DO_API_URL = "https://api.digitalocean.com/v2"
|
|
# We expect DIGITALOCEAN_TOKEN to be set in the environment.
|
|
DO_TOKEN = os.environ.get("DIGITALOCEAN_TOKEN")
|
|
|
|
# Default settings
|
|
DEFAULT_REGION = "nyc3"
|
|
DEFAULT_IMAGE = "ubuntu-22-04-x64"
|
|
LLAMA_CPP_REPO = "https://github.com/ggerganov/llama.cpp"
|
|
|
|
class Provisioner:
|
|
def __init__(self, name: str, size: str, model: str, region: str = DEFAULT_REGION):
|
|
self.name = name
|
|
self.size = size
|
|
self.model = model
|
|
self.region = region
|
|
self.droplet_id = None
|
|
self.ip_address = None
|
|
|
|
def log(self, message: str):
|
|
print(f"[*] {message}")
|
|
|
|
def error(self, message: str):
|
|
print(f"[!] ERROR: {message}")
|
|
sys.exit(1)
|
|
|
|
def check_auth(self):
|
|
if not DO_TOKEN:
|
|
self.error("DIGITALOCEAN_TOKEN environment variable not set.")
|
|
|
|
def create_droplet(self):
|
|
self.log(f"Creating droplet '{self.name}' ({self.size}) in {self.region}...")
|
|
|
|
# Get SSH keys to add to the droplet
|
|
ssh_keys = self.get_ssh_keys()
|
|
|
|
payload = {
|
|
"name": self.name,
|
|
"region": self.region,
|
|
"size": self.size,
|
|
"image": DEFAULT_IMAGE,
|
|
"ssh_keys": ssh_keys,
|
|
"backups": False,
|
|
"ipv6": True,
|
|
"monitoring": True,
|
|
"tags": ["wizard", "gemini-provisioned"]
|
|
}
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {DO_TOKEN}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
response = requests.post(f"{DO_API_URL}/droplets", json=payload, headers=headers)
|
|
if response.status_code != 202:
|
|
self.error(f"Failed to create droplet: {response.text}")
|
|
|
|
data = response.json()
|
|
self.droplet_id = data["droplet"]["id"]
|
|
self.log(f"Droplet created (ID: {self.droplet_id}). Waiting for IP...")
|
|
|
|
def get_ssh_keys(self) -> list:
|
|
# Fetch existing SSH keys from DO account to ensure we can log in
|
|
headers = {"Authorization": f"Bearer {DO_TOKEN}"}
|
|
response = requests.get(f"{DO_API_URL}/account/keys", headers=headers)
|
|
if response.status_code != 200:
|
|
self.log("Warning: Could not fetch SSH keys. Droplet might be inaccessible via SSH.")
|
|
return []
|
|
return [key["id"] for key in response.json()["ssh_keys"]]
|
|
|
|
def wait_for_ip(self):
|
|
headers = {"Authorization": f"Bearer {DO_TOKEN}"}
|
|
while not self.ip_address:
|
|
response = requests.get(f"{DO_API_URL}/droplets/{self.droplet_id}", headers=headers)
|
|
data = response.json()
|
|
networks = data["droplet"]["networks"]["v4"]
|
|
for net in networks:
|
|
if net["type"] == "public":
|
|
self.ip_address = net["ip_address"]
|
|
break
|
|
if not self.ip_address:
|
|
time.sleep(5)
|
|
self.log(f"Droplet IP: {self.ip_address}")
|
|
|
|
def run_remote(self, command: str):
|
|
# Using subprocess to call ssh. Assumes local machine has the right private key.
|
|
ssh_cmd = [
|
|
"ssh", "-o", "StrictHostKeyChecking=no",
|
|
f"root@{self.ip_address}", command
|
|
]
|
|
result = subprocess.run(ssh_cmd, capture_output=True, text=True)
|
|
return result
|
|
|
|
def setup_wizard(self):
|
|
self.log("Starting remote setup...")
|
|
|
|
# Wait for SSH to be ready
|
|
retries = 12
|
|
while retries > 0:
|
|
res = self.run_remote("echo 'SSH Ready'")
|
|
if res.returncode == 0:
|
|
break
|
|
self.log(f"Waiting for SSH... ({retries} retries left)")
|
|
time.sleep(10)
|
|
retries -= 1
|
|
|
|
if retries == 0:
|
|
self.error("SSH timed out.")
|
|
|
|
# 1. Update and install dependencies
|
|
self.log("Installing dependencies...")
|
|
setup_script = """
|
|
export DEBIAN_FRONTEND=noninteractive
|
|
apt-get update && apt-get upgrade -y
|
|
apt-get install -y build-essential git cmake curl wget python3 python3-pip
|
|
"""
|
|
self.run_remote(setup_script)
|
|
|
|
# 2. Build llama.cpp
|
|
self.log("Building llama.cpp...")
|
|
build_script = f"""
|
|
if [ ! -d "/opt/llama.cpp" ]; then
|
|
git clone {LLAMA_CPP_REPO} /opt/llama.cpp
|
|
fi
|
|
cd /opt/llama.cpp
|
|
mkdir -p build && cd build
|
|
cmake ..
|
|
cmake --build . --config Release
|
|
"""
|
|
self.run_remote(build_script)
|
|
|
|
# 3. Download Model
|
|
self.log(f"Downloading model: {self.model}...")
|
|
model_url = self.get_model_url(self.model)
|
|
download_script = f"""
|
|
mkdir -p /opt/models
|
|
if [ ! -f "/opt/models/{self.model}.gguf" ]; then
|
|
wget -O /opt/models/{self.model}.gguf {model_url}
|
|
fi
|
|
"""
|
|
self.run_remote(download_script)
|
|
|
|
# 4. Create systemd service
|
|
self.log("Creating systemd service...")
|
|
service_content = f"""
|
|
[Unit]
|
|
Description=Llama.cpp Server for {self.name}
|
|
After=network.target
|
|
|
|
[Service]
|
|
Type=simple
|
|
User=root
|
|
WorkingDirectory=/opt/llama.cpp
|
|
ExecStart=/opt/llama.cpp/build/bin/llama-server -m /opt/models/{self.model}.gguf --host 0.0.0.0 --port 8080 -c 4096
|
|
Restart=always
|
|
RestartSec=10
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
"""
|
|
# Use cat to write the file to handle multi-line string safely
|
|
self.run_remote(f"cat <<EOF > /etc/systemd/system/llama-server.service\n{service_content}\nEOF")
|
|
self.run_remote("systemctl daemon-reload && systemctl enable llama-server && systemctl start llama-server")
|
|
|
|
def get_model_url(self, model_name: str) -> str:
|
|
# Mapping for common models to GGUF URLs (HuggingFace)
|
|
mapping = {
|
|
"qwen2.5-coder-7b": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf",
|
|
"hermes-3-llama-3.1-8b": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF/resolve/main/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf"
|
|
}
|
|
return mapping.get(model_name, mapping["hermes-3-llama-3.1-8b"])
|
|
|
|
def health_check(self):
|
|
self.log("Performing health check...")
|
|
time.sleep(15) # Wait for server to start
|
|
try:
|
|
url = f"http://{self.ip_address}:8080/health"
|
|
response = requests.get(url, timeout=10)
|
|
if response.status_code == 200:
|
|
self.log(f"[SUCCESS] Wizard {self.name} is healthy and serving inference.")
|
|
self.log(f"Endpoint: {url}")
|
|
else:
|
|
self.log(f"[WARNING] Health check returned status {response.status_code}")
|
|
except Exception as e:
|
|
self.log(f"[ERROR] Health check failed: {e}")
|
|
|
|
def provision(self):
|
|
self.check_auth()
|
|
self.create_droplet()
|
|
self.wait_for_ip()
|
|
self.setup_wizard()
|
|
self.health_check()
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Gemini Provisioner")
|
|
parser.add_argument("--name", required=True, help="Name of the wizard")
|
|
parser.add_argument("--size", default="s-2vcpu-4gb", help="DO droplet size")
|
|
parser.add_argument("--model", default="qwen2.5-coder-7b", help="Model to serve")
|
|
parser.add_argument("--region", default="nyc3", help="DO region")
|
|
|
|
args = parser.parse_args()
|
|
|
|
provisioner = Provisioner(args.name, args.size, args.model, args.region)
|
|
provisioner.provision()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|