#!/usr/bin/env python3 """Add deployment instructions to GPU prove-it tickets.""" import subprocess, json, os gitea_tok = open(os.path.expanduser('~/.hermes/gitea_token_vps')).read().strip() forge = 'https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/timmy-home' runpod_key = open(os.path.expanduser('~/.config/runpod/access_key')).read().strip() vertex_key = open(os.path.expanduser('~/.config/vertex/key')).read().strip() def comment(issue_num, body): subprocess.run( ['curl', '-s', '-X', 'POST', forge + '/issues/' + str(issue_num) + '/comments', '-H', 'Authorization: token ' + gitea_tok, '-H', 'Content-Type: application/json', '-d', json.dumps({"body": body})], capture_output=True, text=True, timeout=10 ) vertex_endpoint_format = "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/gemma-{version}:generateContent" # TIMMY TICKET timmy_deploy = """# Deploying Gemma 4 Big Brain on RunPod + Ollama ## Step 1: Create RunPod Pod """ + "```bash\nexport RUNPOD_API_KEY=" + runpod_key + """ # Find GPU types curl -s -X GET https://api.runpod.io/v2/gpus \\ -H "Authorization: Bearer $RUNPOD_API_KEY" # Deploy A100 40GB with Ollama curl -X POST https://api.runpod.io/graphql \\ -H "Authorization: Bearer $RUNPOD_API_KEY" \\ -H "Content-Type: application/json" \\ -d '{ "query": "mutation { podFindAndDeployOnDemand(input: { cloudType: \"SECURE\", gpuCount: 1, gpuTypeId: \"NVIDIA A100-SXM4-40GB\", name: \"big-brain-timmy\", containerDiskInGb: 100, imageName: \"runpod/ollama:latest\", ports: \"11434/http\", volumeInGb: 50, volumeMountPath: \"/workspace\" }) { id desiredStatus machineId } }" }' ``` ## Step 2: Get Pod IP from RunPod dashboard or API ## Step 3: Deploy Ollama + Gemma """ + "```bash\nssh root@\n\n# Pull Gemma (largest quantized)\nollama pull gemma3:27b-instruct-q8_0\nollama list\n``` ## Step 4: Wire to Mac Hermes """ + "```bash\n# Add to ~/.hermes/config.yaml\nproviders:\n big_brain:\n base_url: 'http://:11434/v1'\n api_key: ''\n model: 'gemma3:27b-instruct-q8_0'\n\n# Test\nhermes chat --model gemma3:27b-instruct-q8_0 --provider big_brain\n``` ## Alternative: Vertex AI **Vertex AI REST Endpoint Format:** """ "```" + """ """ + vertex_endpoint_format + """ """ "```" + """ **Auth:** Use the service account key at ~/.config/vertex/key **Request Format (curl):** """ "```bash" + """ curl -X POST "https://us-central1-aiplatform.googleapis.com/v1/projects/YOUR_PROJECT/locations/us-central1/publishers/google/models/gemma-3-27b-it:streamGenerateContent?alt=sse" \\ -H "Authorization: Bearer $(gcloud auth print-access-token)" \\ -H "Content-Type: application/json" \\ -d '{"contents":[{"role":"user","parts":[{"text":"Hello"}]}]}' """ "```" + """ ## Acceptance Criteria - [ ] GPU instance provisioned - [ ] Ollama running with Gemma 4 (or Vertex endpoint configured) - [ ] Endpoint accessible from Mac - [ ] Mac Hermes can chat via big_brain provider """ # BEZALEL TICKET bez_deploy = """# Deploying Gemma 4 Big Brain on RunPod for Bezalel ## Step 1: Create RunPod Pod (same as Timmy, name as big-brain-bezalel) ## Step 2: Deploy Ollama + Gemma """ + "```bash\nssh root@\nollama pull gemma3:27b-instruct-q8_0\n```" + """ ## Step 3: Wire to Bezalel Hermes """ + "```bash\nssh root@104.131.15.18\n\n# Edit Hermes config\nnano /root/wizards/bezalel/home/config.yaml\n\n# Add provider:\nproviders:\n big_brain:\n base_url: 'http://:11434/v1'\n model: 'gemma3:27b-instruct-q8_0'\n```" + """ ## Vertex AI Alternative Same endpoint format as Timmy's ticket, but ensure the endpoint is accessible from Bezalel VPS (may need public IP or VPC). ## Acceptance Criteria - [ ] GPU instance provisioned - [ ] Ollama running with Gemma 4 - [ ] Endpoint accessible from Bezalel VPS - [ ] Bezalel Hermes can use big_brain provider """ comment(543, timmy_deploy) comment(544, bez_deploy) print("Done: Both tickets updated with deployments instructions")