77 lines
2.4 KiB
Python
77 lines
2.4 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
import subprocess, json, os, time, requests
|
||
|
|
|
||
|
|
RUNPOD_KEY = open(os.path.expanduser('~/.config/runpod/access_key')).read().strip()
|
||
|
|
|
||
|
|
def gql(query, variables=None):
|
||
|
|
payload = {"query": query}
|
||
|
|
if variables:
|
||
|
|
payload["variables"] = variables
|
||
|
|
r = requests.post('https://api.runpod.io/graphql',
|
||
|
|
headers={'Authorization': f'Bearer {RUNPOD_KEY}',
|
||
|
|
'Content-Type': 'application/json'},
|
||
|
|
json=payload, timeout=30)
|
||
|
|
return r.json()
|
||
|
|
|
||
|
|
def deploy(gpu_type, name, cloud="COMMUNITY"):
|
||
|
|
query = """
|
||
|
|
mutation {
|
||
|
|
podFindAndDeployOnDemand(input: {
|
||
|
|
cloudType: CLOUD_TYPE,
|
||
|
|
gpuCount: 1,
|
||
|
|
gpuTypeId: "GPU_TYPE",
|
||
|
|
name: "POD_NAME",
|
||
|
|
containerDiskInGb: 100,
|
||
|
|
imageName: "runpod/ollama:latest",
|
||
|
|
ports: "11434/http",
|
||
|
|
volumeInGb: 50,
|
||
|
|
volumeMountPath: "/workspace"
|
||
|
|
}) { id desiredStatus machineId }
|
||
|
|
}
|
||
|
|
""".replace("CLOUD_TYPE", cloud).replace("GPU_TYPE", gpu_type).replace("POD_NAME", name)
|
||
|
|
return gql(query)
|
||
|
|
|
||
|
|
print("=== Big Brain GPU Deployment ===")
|
||
|
|
print(f"Key: {RUNPOD_KEY[:20]}...")
|
||
|
|
|
||
|
|
# Try multiple GPU types
|
||
|
|
gpus_to_try = [
|
||
|
|
("NVIDIA RTX 4090", "COMMUNITY"),
|
||
|
|
("NVIDIA RTX 3090", "COMMUNITY"),
|
||
|
|
("NVIDIA A40", "COMMUNITY"),
|
||
|
|
("NVIDIA L40S", "COMMUNITY"),
|
||
|
|
]
|
||
|
|
|
||
|
|
deployed_pod = None
|
||
|
|
for gpu, cloud in gpus_to_try:
|
||
|
|
print(f"Trying {gpu} ({cloud})...")
|
||
|
|
result = deploy(gpu, f"big-brain-timmy-{gpu.replace(' ','-').lower()}", cloud)
|
||
|
|
errors = result.get('errors', [])
|
||
|
|
data = result.get('data', {}).get('podFindAndDeployOnDemand', {})
|
||
|
|
|
||
|
|
if errors:
|
||
|
|
msg = errors[0].get('message', '')
|
||
|
|
if 'no longer any instances' in msg or 'no instances' in msg:
|
||
|
|
print(f" No instances available")
|
||
|
|
else:
|
||
|
|
print(f" Error: {msg[:100]}")
|
||
|
|
elif data and data.get('id'):
|
||
|
|
print(f" SUCCESS! Pod ID: {data['id']}")
|
||
|
|
deployed_pod = data['id']
|
||
|
|
break
|
||
|
|
|
||
|
|
if deployed_pod:
|
||
|
|
print(f"\nPod {deployed_pod} deployed!")
|
||
|
|
endpoint = f"https://{deployed_pod}-11434.proxy.runpod.net"
|
||
|
|
print(f"Endpoint: {endpoint}")
|
||
|
|
print("Waiting for pod to start (check in 5 min)...")
|
||
|
|
|
||
|
|
# Save endpoint
|
||
|
|
path = os.path.expanduser('~/.config/big_brain_endpoint.txt')
|
||
|
|
with open(path, 'w') as f:
|
||
|
|
f.write(endpoint)
|
||
|
|
print(f"Endpoint saved to {path}")
|
||
|
|
else:
|
||
|
|
print("\nNo GPU available on RunPod")
|
||
|
|
print("Try Vertex AI instead")
|