Files
timmy-home/gpu_deploy_comment.py

109 lines
4.0 KiB
Python

#!/usr/bin/env python3
"""Add deployment instructions to GPU prove-it tickets."""
import subprocess, json, os
gitea_tok = open(os.path.expanduser('~/.hermes/gitea_token_vps')).read().strip()
forge = 'https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/timmy-home'
runpod_key = open(os.path.expanduser('~/.config/runpod/access_key')).read().strip()
vertex_key = open(os.path.expanduser('~/.config/vertex/key')).read().strip()
def comment(issue_num, body):
subprocess.run(
['curl', '-s', '-X', 'POST', forge + '/issues/' + str(issue_num) + '/comments',
'-H', 'Authorization: token ' + gitea_tok,
'-H', 'Content-Type: application/json',
'-d', json.dumps({"body": body})],
capture_output=True, text=True, timeout=10
)
vertex_endpoint_format = "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/gemma-{version}:generateContent"
# TIMMY TICKET
timmy_deploy = """# Deploying Gemma 4 Big Brain on RunPod + Ollama
## Step 1: Create RunPod Pod
""" + "```bash\nexport RUNPOD_API_KEY=" + runpod_key + """
# Find GPU types
curl -s -X GET https://api.runpod.io/v2/gpus \\
-H "Authorization: Bearer $RUNPOD_API_KEY"
# Deploy A100 40GB with Ollama
curl -X POST https://api.runpod.io/graphql \\
-H "Authorization: Bearer $RUNPOD_API_KEY" \\
-H "Content-Type: application/json" \\
-d '{
"query": "mutation { podFindAndDeployOnDemand(input: {
cloudType: \"SECURE\",
gpuCount: 1,
gpuTypeId: \"NVIDIA A100-SXM4-40GB\",
name: \"big-brain-timmy\",
containerDiskInGb: 100,
imageName: \"runpod/ollama:latest\",
ports: \"11434/http\",
volumeInGb: 50,
volumeMountPath: \"/workspace\"
}) { id desiredStatus machineId } }"
}'
```
## Step 2: Get Pod IP from RunPod dashboard or API
## Step 3: Deploy Ollama + Gemma
""" + "```bash\nssh root@<POD_IP>\n\n# Pull Gemma (largest quantized)\nollama pull gemma3:27b-instruct-q8_0\nollama list\n```
## Step 4: Wire to Mac Hermes
""" + "```bash\n# Add to ~/.hermes/config.yaml\nproviders:\n big_brain:\n base_url: 'http://<POD_IP>:11434/v1'\n api_key: ''\n model: 'gemma3:27b-instruct-q8_0'\n\n# Test\nhermes chat --model gemma3:27b-instruct-q8_0 --provider big_brain\n```
## Alternative: Vertex AI
**Vertex AI REST Endpoint Format:**
"""
"```" + """
""" + vertex_endpoint_format + """
"""
"```" + """
**Auth:** Use the service account key at ~/.config/vertex/key
**Request Format (curl):**
"""
"```bash" + """
curl -X POST "https://us-central1-aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/us-central1/publishers/google/models/gemma-3-27b-it:streamGenerateContent?alt=sse" \\
-H "Authorization: Bearer $(gcloud auth print-access-token)" \\
-H "Content-Type: application/json" \\
-d '{"contents":[{"role":"user","parts":[{"text":"Hello"}]}]}'
"""
"```" + """
## Acceptance Criteria
- [ ] GPU instance provisioned
- [ ] Ollama running with Gemma 4 (or Vertex endpoint configured)
- [ ] Endpoint accessible from Mac
- [ ] Mac Hermes can chat via big_brain provider
"""
# BEZALEL TICKET
bez_deploy = """# Deploying Gemma 4 Big Brain on RunPod for Bezalel
## Step 1: Create RunPod Pod (same as Timmy, name as big-brain-bezalel)
## Step 2: Deploy Ollama + Gemma
""" + "```bash\nssh root@<POD_IP>\nollama pull gemma3:27b-instruct-q8_0\n```" + """
## Step 3: Wire to Bezalel Hermes
""" + "```bash\nssh root@104.131.15.18\n\n# Edit Hermes config\nnano /root/wizards/bezalel/home/config.yaml\n\n# Add provider:\nproviders:\n big_brain:\n base_url: 'http://<POD_IP>:11434/v1'\n model: 'gemma3:27b-instruct-q8_0'\n```" + """
## Vertex AI Alternative
Same endpoint format as Timmy's ticket, but ensure the endpoint is accessible from Bezalel VPS (may need public IP or VPC).
## Acceptance Criteria
- [ ] GPU instance provisioned
- [ ] Ollama running with Gemma 4
- [ ] Endpoint accessible from Bezalel VPS
- [ ] Bezalel Hermes can use big_brain provider
"""
comment(543, timmy_deploy)
comment(544, bez_deploy)
print("Done: Both tickets updated with deployments instructions")