#!/usr/bin/env python3 """Deploy GPU instance on RunPod for Big Brain Gemma 4.""" import subprocess, json, os, time, requests # Read RunPod API key RUNPOD_API_KEY = open(os.path.expanduser('~/.config/runpod/access_key')).read().strip() GITEA_TOKEN = open(os.path.expanduser('~/.hermes/gitea_token_vps')).read().strip() GITEA_FORGE = 'https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/timmy-home' def log(msg): print(f"[{time.strftime('%H:%M:%S')}] {msg}") def comment_issue(issue_num, body): """Add comment to Gitea issue.""" subprocess.run( ['curl', '-s', '-X', 'POST', f'{GITEA_FORGE}/issues/{issue_num}/comments', '-H', f'Authorization: token {GITEA_TOKEN}', '-H', 'Content-Type: application/json', '-d', json.dumps({"body": body})], capture_output=True, timeout=10 ) def graphql_query(query, variables=None): """Run GraphQL query against RunPod API.""" payload = {"query": query} if variables: payload["variables"] = variables r = requests.post( 'https://api.runpod.io/graphql', headers={ 'Authorization': f'Bearer {RUNPOD_API_KEY}', 'Content-Type': 'application/json', }, json=payload, timeout=30 ) return r.json() def deploy_pod(gpu_type, name, cloud_type="COMMUNITY"): """Deploy a RunPod pod with Ollama.""" query = """ mutation($input: PodFindAndDeployOnDemandInput!) { podFindAndDeployOnDemand(input: $input) { id desiredStatus machineId warning } } """ variables = { "input": { "cloudType": cloud_type, "gpuCount": 1, "gpuTypeId": gpu_type, "name": name, "containerDiskInGb": 100, "imageName": "runpod/ollama:latest", "ports": "11434/http", "volumeInGb": 50, "volumeMountPath": "/workspace", } } try: result = graphql_query(query, variables) return result except Exception as e: return {"error": str(e)} def check_if_endpoint_exists(name): """Check if endpoint already exists.""" query = "{ endpoints { id name } }" result = graphql_query(query) endpoints = result.get('data', {}).get('endpoints', []) matching = [e for e in endpoints if name.lower() in e.get('name', '').lower()] return matching # Main deployment logic log("Starting Big Brain GPU deployment") log(f"RunPod API key: {RUNPOD_API_KEY[:20]}...{RUNPOD_API_KEY[-10:]}") # Step 1: Get available GPU types log("\n=== Step 1: Getting GPU types ===") gpu_query = "{ gpuTypes { id displayName memoryInGb secureCloud communityCloud } }" result = graphql_query(gpu_query) gpus = result.get('data', {}).get('gpuTypes', []) log(f"Total GPU types: {len(gpus)}") # Filter GPUs with 24GB+ VRAM for Gemma 3 27B suitable_gpus = [] for gpu in gpus: mem = gpu.get('memoryInGb', 0) if mem >= 24: suitable_gpus.append(gpu) log(f"\nGPUs with 24GB+ VRAM:") for gpu in suitable_gpus[:15]: log(f" {gpu.get('id')}: {gpu.get('displayName')} - {gpu.get('memoryInGb')}GB, Secure: {gpu.get('secureCloud')}, Community: {gpu.get('communityCloud')}") # Step 2: Try to find GPU availability # The error was "no instances available" - we need to find available ones # The GPU ID format matters - try the ones from the list pod_name = "big-brain-timmy" # Try different GPUs in order of preference (cheapest first with enough memory) gpu_attempts = [ ("NVIDIA RTX 4090", "COMMUNITY"), # 24GB, ~$0.44/hr ("NVIDIA A40", "COMMUNITY"), # 48GB ("NVIDIA RTX 3090", "COMMUNITY"), # 24GB ("NVIDIA RTX 3090 Ti", "COMMUNITY"), # 24GB ("NVIDIA L40S", "COMMUNITY"), # 48GB ("NVIDIA A6000", "COMMUNITY"), # 48GB # Try secure cloud ("NVIDIA RTX 4090", "SECURE"), ("NVIDIA A40", "SECURE"), ("NVIDIA L40S", "SECURE"), ] log("\n=== Step 2: Attempting deployment ===") deployed = False for gpu_type, cloud_type in gpu_attempts: log(f"Trying {gpu_type} ({cloud_type})...") result = deploy_pod(gpu_type, pod_name, cloud_type) errors = result.get('errors', []) data = result.get('data', {}).get('podFindAndDeployOnDemand', {}) if errors: for err in errors: msg = err.get('message', '') if 'no longer any instances' in msg or 'no instances' in msg: log(f" No instances available") elif 'invalid' in msg.lower() or 'not found' in msg.lower(): log(f" GPU type not found: {msg[:100]}") else: log(f" Error: {msg[:100]}") elif data and data.get('id'): log(f" āœ… SUCCESS! Pod ID: {data.get('id')}") log(f" Machine ID: {data.get('machineId')}") log(f" Status: {data.get('desiredStatus')}") deployed = True break else: log(f" Response: {json.dumps(result)[:200]}") if deployed: pod_id = data.get('id') # Wait for pod to be running log(f"\n=== Step 3: Waiting for pod {pod_id} to start ===") pod_status_query = """ query($podId: String!) { pod(id: $podId) { id desiredStatus runtimeStatus machineId ports } } """ for attempt in range(30): # Wait up to 15 minutes time.sleep(30) result = graphql_query(pod_status_query, {"podId": pod_id}) pod = result.get('data', {}).get('pod', {}) runtime = pod.get('runtimeStatus', 'unknown') desired = pod.get('desiredStatus', 'unknown') log(f" Attempt {attempt+1}: desired={desired}, runtime={runtime}") if runtime == 'RUNNING': log(f" āœ… Pod is RUNNING!") # Get the IP/port ip = f"{pod_id}-11434.proxy.runpod.net" log(f" Ollama endpoint: http://{ip}:11434") log(f" Ollama endpoint: http://{pod_id}.proxy.runpod.net:11434") # Comment on Gitea tickets comment_text = f"""# āœ… SUCCESS: GPU Instance Deployed ## Pod Details - **Pod ID:** {pod_id} - **GPU:** {gpu_type} ({cloud_type} cloud) - **Status:** RUNNING - **Endpoint:** http://{pod_id}.proxy.runpod.net:11434 ## Next Steps 1. **SSH into pod:** ```bash ssh root@{pod_id}.proxy.runpod.net ``` 2. **Pull Gemma 3 27B:** ```bash ollama pull gemma3:27b-instruct-q4_K_M ``` 3. **Verify Ollama is working:** ```bash curl http://localhost:11434/api/tags ``` 4. **Test inference:** ```bash curl http://localhost:11434/api/chat \\ -H "Content-Type: application/json" \\ -d '{{"model": "gemma3:27b-instruct-q4_K_M", "messages": [{{"role": "user", "content": "Hello from Timmy"}}]}}' ``` 5. **Wire to Mac Hermes:** Add to `~/.hermes/config.yaml`: ```yaml providers: big_brain: base_url: "http://{pod_id}.proxy.runpod.net:11434/v1" api_key: "" model: "gemma3:27b-instruct-q4_K_M" ``` 6. **Test Hermes:** ```bash hermes chat --model gemma3:27b-instruct-q4_K_M --provider big_brain ```""" comment_issue(543, comment_text) comment_issue(544, comment_text.replace("Timmy", "Bezalel").replace("Mac Hermes", "Bezalel Hermes")) log("\nšŸŽ‰ Big Brain GPU deployed successfully!") log(f"Pod: {pod_id}") log(f"Endpoint: http://{pod_id}.proxy.runpod.net:11434") log(f"Gitea tickets updated with deployment details") break elif runtime == 'ERROR' or desired == 'TERMINATED' or desired == 'SUSPENDED': log(f" āŒ Pod failed: runtime={runtime}, desired={desired}") break if runtime != 'RUNNING': log(f"\nāš ļø Pod is not running after waiting. Check RunPod dashboard.") else: log("\nāŒ No GPU instances available on RunPod") log("Try Vertex AI or check back later") # Comment on tickets comment_text = """# Deployment Status: RunPod Failed ## Issue No GPU instances available on RunPod. All GPU types returned "no instances available" error. ## Alternatives 1. **Vertex AI** - Google Cloud's managed Gemma endpoints (see ticket for instructions) 2. **Lambda Labs** - Another GPU cloud provider 3. **Vast.ai** - Community GPU marketplace 4. **Wait for RunPod** - Check back in a few hours""" comment_issue(543, comment_text) comment_issue(544, comment_text) """ Write the deployment script write_file('~/.timmy/big_brain_deploy.py', script_content) # Also run it (with timeout) print("Running deployment script... (will check Gitea tickets for results in parallel)")