Files
timmy-home/deploy_big_brain.py

77 lines
2.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import subprocess, json, os, time, requests
RUNPOD_KEY = open(os.path.expanduser('~/.config/runpod/access_key')).read().strip()
def gql(query, variables=None):
payload = {"query": query}
if variables:
payload["variables"] = variables
r = requests.post('https://api.runpod.io/graphql',
headers={'Authorization': f'Bearer {RUNPOD_KEY}',
'Content-Type': 'application/json'},
json=payload, timeout=30)
return r.json()
def deploy(gpu_type, name, cloud="COMMUNITY"):
query = """
mutation {
podFindAndDeployOnDemand(input: {
cloudType: CLOUD_TYPE,
gpuCount: 1,
gpuTypeId: "GPU_TYPE",
name: "POD_NAME",
containerDiskInGb: 100,
imageName: "runpod/ollama:latest",
ports: "11434/http",
volumeInGb: 50,
volumeMountPath: "/workspace"
}) { id desiredStatus machineId }
}
""".replace("CLOUD_TYPE", cloud).replace("GPU_TYPE", gpu_type).replace("POD_NAME", name)
return gql(query)
print("=== Big Brain GPU Deployment ===")
print(f"Key: {RUNPOD_KEY[:20]}...")
# Try multiple GPU types
gpus_to_try = [
("NVIDIA RTX 4090", "COMMUNITY"),
("NVIDIA RTX 3090", "COMMUNITY"),
("NVIDIA A40", "COMMUNITY"),
("NVIDIA L40S", "COMMUNITY"),
]
deployed_pod = None
for gpu, cloud in gpus_to_try:
print(f"Trying {gpu} ({cloud})...")
result = deploy(gpu, f"big-brain-timmy-{gpu.replace(' ','-').lower()}", cloud)
errors = result.get('errors', [])
data = result.get('data', {}).get('podFindAndDeployOnDemand', {})
if errors:
msg = errors[0].get('message', '')
if 'no longer any instances' in msg or 'no instances' in msg:
print(f" No instances available")
else:
print(f" Error: {msg[:100]}")
elif data and data.get('id'):
print(f" SUCCESS! Pod ID: {data['id']}")
deployed_pod = data['id']
break
if deployed_pod:
print(f"\nPod {deployed_pod} deployed!")
endpoint = f"https://{deployed_pod}-11434.proxy.runpod.net"
print(f"Endpoint: {endpoint}")
print("Waiting for pod to start (check in 5 min)...")
# Save endpoint
path = os.path.expanduser('~/.config/big_brain_endpoint.txt')
with open(path, 'w') as f:
f.write(endpoint)
print(f"Endpoint saved to {path}")
else:
print("\nNo GPU available on RunPod")
print("Try Vertex AI instead")