109 lines
4.0 KiB
Python
109 lines
4.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Add deployment instructions to GPU prove-it tickets."""
|
|
import subprocess, json, os
|
|
|
|
gitea_tok = open(os.path.expanduser('~/.hermes/gitea_token_vps')).read().strip()
|
|
forge = 'https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/timmy-home'
|
|
|
|
runpod_key = open(os.path.expanduser('~/.config/runpod/access_key')).read().strip()
|
|
vertex_key = open(os.path.expanduser('~/.config/vertex/key')).read().strip()
|
|
|
|
def comment(issue_num, body):
|
|
subprocess.run(
|
|
['curl', '-s', '-X', 'POST', forge + '/issues/' + str(issue_num) + '/comments',
|
|
'-H', 'Authorization: token ' + gitea_tok,
|
|
'-H', 'Content-Type: application/json',
|
|
'-d', json.dumps({"body": body})],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
|
|
vertex_endpoint_format = "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/gemma-{version}:generateContent"
|
|
|
|
# TIMMY TICKET
|
|
timmy_deploy = """# Deploying Gemma 4 Big Brain on RunPod + Ollama
|
|
|
|
## Step 1: Create RunPod Pod
|
|
""" + "```bash\nexport RUNPOD_API_KEY=" + runpod_key + """
|
|
|
|
# Find GPU types
|
|
curl -s -X GET https://api.runpod.io/v2/gpus \\
|
|
-H "Authorization: Bearer $RUNPOD_API_KEY"
|
|
|
|
# Deploy A100 40GB with Ollama
|
|
curl -X POST https://api.runpod.io/graphql \\
|
|
-H "Authorization: Bearer $RUNPOD_API_KEY" \\
|
|
-H "Content-Type: application/json" \\
|
|
-d '{
|
|
"query": "mutation { podFindAndDeployOnDemand(input: {
|
|
cloudType: \"SECURE\",
|
|
gpuCount: 1,
|
|
gpuTypeId: \"NVIDIA A100-SXM4-40GB\",
|
|
name: \"big-brain-timmy\",
|
|
containerDiskInGb: 100,
|
|
imageName: \"runpod/ollama:latest\",
|
|
ports: \"11434/http\",
|
|
volumeInGb: 50,
|
|
volumeMountPath: \"/workspace\"
|
|
}) { id desiredStatus machineId } }"
|
|
}'
|
|
```
|
|
|
|
## Step 2: Get Pod IP from RunPod dashboard or API
|
|
|
|
## Step 3: Deploy Ollama + Gemma
|
|
""" + "```bash\nssh root@<POD_IP>\n\n# Pull Gemma (largest quantized)\nollama pull gemma3:27b-instruct-q8_0\nollama list\n```
|
|
|
|
## Step 4: Wire to Mac Hermes
|
|
""" + "```bash\n# Add to ~/.hermes/config.yaml\nproviders:\n big_brain:\n base_url: 'http://<POD_IP>:11434/v1'\n api_key: ''\n model: 'gemma3:27b-instruct-q8_0'\n\n# Test\nhermes chat --model gemma3:27b-instruct-q8_0 --provider big_brain\n```
|
|
|
|
## Alternative: Vertex AI
|
|
**Vertex AI REST Endpoint Format:**
|
|
"""
|
|
"```" + """
|
|
""" + vertex_endpoint_format + """
|
|
"""
|
|
"```" + """
|
|
**Auth:** Use the service account key at ~/.config/vertex/key
|
|
|
|
**Request Format (curl):**
|
|
"""
|
|
"```bash" + """
|
|
curl -X POST "https://us-central1-aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/us-central1/publishers/google/models/gemma-3-27b-it:streamGenerateContent?alt=sse" \\
|
|
-H "Authorization: Bearer $(gcloud auth print-access-token)" \\
|
|
-H "Content-Type: application/json" \\
|
|
-d '{"contents":[{"role":"user","parts":[{"text":"Hello"}]}]}'
|
|
"""
|
|
"```" + """
|
|
|
|
## Acceptance Criteria
|
|
- [ ] GPU instance provisioned
|
|
- [ ] Ollama running with Gemma 4 (or Vertex endpoint configured)
|
|
- [ ] Endpoint accessible from Mac
|
|
- [ ] Mac Hermes can chat via big_brain provider
|
|
"""
|
|
|
|
# BEZALEL TICKET
|
|
bez_deploy = """# Deploying Gemma 4 Big Brain on RunPod for Bezalel
|
|
|
|
## Step 1: Create RunPod Pod (same as Timmy, name as big-brain-bezalel)
|
|
|
|
## Step 2: Deploy Ollama + Gemma
|
|
""" + "```bash\nssh root@<POD_IP>\nollama pull gemma3:27b-instruct-q8_0\n```" + """
|
|
|
|
## Step 3: Wire to Bezalel Hermes
|
|
""" + "```bash\nssh root@104.131.15.18\n\n# Edit Hermes config\nnano /root/wizards/bezalel/home/config.yaml\n\n# Add provider:\nproviders:\n big_brain:\n base_url: 'http://<POD_IP>:11434/v1'\n model: 'gemma3:27b-instruct-q8_0'\n```" + """
|
|
|
|
## Vertex AI Alternative
|
|
Same endpoint format as Timmy's ticket, but ensure the endpoint is accessible from Bezalel VPS (may need public IP or VPC).
|
|
|
|
## Acceptance Criteria
|
|
- [ ] GPU instance provisioned
|
|
- [ ] Ollama running with Gemma 4
|
|
- [ ] Endpoint accessible from Bezalel VPS
|
|
- [ ] Bezalel Hermes can use big_brain provider
|
|
"""
|
|
|
|
comment(543, timmy_deploy)
|
|
comment(544, bez_deploy)
|
|
print("Done: Both tickets updated with deployments instructions")
|