timmy-home/gpu_deploy_comment.py

#!/usr/bin/env python3
"""Add deployment instructions to GPU prove-it tickets."""
import subprocess, json, os

gitea_tok = open(os.path.expanduser('~/.hermes/gitea_token_vps')).read().strip()
forge = 'https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/timmy-home'

runpod_key = open(os.path.expanduser('~/.config/runpod/access_key')).read().strip()
vertex_key = open(os.path.expanduser('~/.config/vertex/key')).read().strip()

def comment(issue_num, body):
    subprocess.run(
        ['curl', '-s', '-X', 'POST', forge + '/issues/' + str(issue_num) + '/comments',
         '-H', 'Authorization: token ' + gitea_tok,
         '-H', 'Content-Type: application/json',
         '-d', json.dumps({"body": body})],
        capture_output=True, text=True, timeout=10
    )

vertex_endpoint_format = "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/gemma-{version}:generateContent"

# TIMMY TICKET
timmy_deploy = """# Deploying Gemma 4 Big Brain on RunPod + Ollama

## Step 1: Create RunPod Pod
""" + "```bash\nexport RUNPOD_API_KEY=" + runpod_key + """

# Find GPU types
curl -s -X GET https://api.runpod.io/v2/gpus \\
  -H "Authorization: Bearer $RUNPOD_API_KEY"

# Deploy A100 40GB with Ollama
curl -X POST https://api.runpod.io/graphql \\
  -H "Authorization: Bearer $RUNPOD_API_KEY" \\
  -H "Content-Type: application/json" \\
  -d '{
    "query": "mutation { podFindAndDeployOnDemand(input: {
      cloudType: \"SECURE\",
      gpuCount: 1,
      gpuTypeId: \"NVIDIA A100-SXM4-40GB\",
      name: \"big-brain-timmy\",
      containerDiskInGb: 100,
      imageName: \"runpod/ollama:latest\",
      ports: \"11434/http\",
      volumeInGb: 50,
      volumeMountPath: \"/workspace\"
    }) { id desiredStatus machineId } }"
  }'
```

## Step 2: Get Pod IP from RunPod dashboard or API

## Step 3: Deploy Ollama + Gemma
""" + "```bash\nssh root@<POD_IP>\n\n# Pull Gemma (largest quantized)\nollama pull gemma3:27b-instruct-q8_0\nollama list\n```

## Step 4: Wire to Mac Hermes
""" + "```bash\n# Add to ~/.hermes/config.yaml\nproviders:\n  big_brain:\n    base_url: 'http://<POD_IP>:11434/v1'\n    api_key: ''\n    model: 'gemma3:27b-instruct-q8_0'\n\n# Test\nhermes chat --model gemma3:27b-instruct-q8_0 --provider big_brain\n```

## Alternative: Vertex AI
**Vertex AI REST Endpoint Format:**
"""
"```" + """
""" + vertex_endpoint_format + """
"""
"```" + """
**Auth:** Use the service account key at ~/.config/vertex/key

**Request Format (curl):**
"""
"```bash" + """
curl -X POST "https://us-central1-aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/us-central1/publishers/google/models/gemma-3-27b-it:streamGenerateContent?alt=sse" \\
  -H "Authorization: Bearer $(gcloud auth print-access-token)" \\
  -H "Content-Type: application/json" \\
  -d '{"contents":[{"role":"user","parts":[{"text":"Hello"}]}]}'
"""
"```" + """

## Acceptance Criteria
- [ ] GPU instance provisioned
- [ ] Ollama running with Gemma 4 (or Vertex endpoint configured)
- [ ] Endpoint accessible from Mac
- [ ] Mac Hermes can chat via big_brain provider
"""

# BEZALEL TICKET
bez_deploy = """# Deploying Gemma 4 Big Brain on RunPod for Bezalel

## Step 1: Create RunPod Pod (same as Timmy, name as big-brain-bezalel)

## Step 2: Deploy Ollama + Gemma
""" + "```bash\nssh root@<POD_IP>\nollama pull gemma3:27b-instruct-q8_0\n```" + """

## Step 3: Wire to Bezalel Hermes
""" + "```bash\nssh root@104.131.15.18\n\n# Edit Hermes config\nnano /root/wizards/bezalel/home/config.yaml\n\n# Add provider:\nproviders:\n  big_brain:\n    base_url: 'http://<POD_IP>:11434/v1'\n    model: 'gemma3:27b-instruct-q8_0'\n```" + """

## Vertex AI Alternative
Same endpoint format as Timmy's ticket, but ensure the endpoint is accessible from Bezalel VPS (may need public IP or VPC).

## Acceptance Criteria
- [ ] GPU instance provisioned
- [ ] Ollama running with Gemma 4
- [ ] Endpoint accessible from Bezalel VPS
- [ ] Bezalel Hermes can use big_brain provider
"""

comment(543, timmy_deploy)
comment(544, bez_deploy)
print("Done: Both tickets updated with deployments instructions")