Tick #1520 - Timmy climbs the Tower. The servers hum. | Bezalel examines the anvil: a thousand scars. | Allegro visits the Tower. Reads the logs. (+5 more)

2026-04-06 22:42:40 -04:00
parent c7f4b838cf
commit 41efe6b26b
2 changed files with 282 additions and 11 deletions
--- a/big_brain_deploy.py
+++ b/big_brain_deploy.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+"""Deploy GPU instance on RunPod for Big Brain Gemma 4."""
+import subprocess, json, os, time, requests
+
+# Read RunPod API key
+RUNPOD_API_KEY = open(os.path.expanduser('~/.config/runpod/access_key')).read().strip()
+GITEA_TOKEN = open(os.path.expanduser('~/.hermes/gitea_token_vps')).read().strip()
+GITEA_FORGE = 'https://forge.alexanderwhitestone.com/api/v1/repos/Timmy_Foundation/timmy-home'
+
+def log(msg):
+    print(f"[{time.strftime('%H:%M:%S')}] {msg}")
+
+def comment_issue(issue_num, body):
+    """Add comment to Gitea issue."""
+    subprocess.run(
+        ['curl', '-s', '-X', 'POST', f'{GITEA_FORGE}/issues/{issue_num}/comments',
+         '-H', f'Authorization: token {GITEA_TOKEN}',
+         '-H', 'Content-Type: application/json',
+         '-d', json.dumps({"body": body})],
+        capture_output=True, timeout=10
+    )
+
+def graphql_query(query, variables=None):
+    """Run GraphQL query against RunPod API."""
+    payload = {"query": query}
+    if variables:
+        payload["variables"] = variables
+    
+    r = requests.post(
+        'https://api.runpod.io/graphql',
+        headers={
+            'Authorization': f'Bearer {RUNPOD_API_KEY}',
+            'Content-Type': 'application/json',
+        },
+        json=payload,
+        timeout=30
+    )
+    return r.json()
+
+def deploy_pod(gpu_type, name, cloud_type="COMMUNITY"):
+    """Deploy a RunPod pod with Ollama."""
+    query = """
+    mutation($input: PodFindAndDeployOnDemandInput!) {
+      podFindAndDeployOnDemand(input: $input) {
+        id
+        desiredStatus
+        machineId
+        warning
+      }
+    }
+    """
+    
+    variables = {
+        "input": {
+            "cloudType": cloud_type,
+            "gpuCount": 1,
+            "gpuTypeId": gpu_type,
+            "name": name,
+            "containerDiskInGb": 100,
+            "imageName": "runpod/ollama:latest",
+            "ports": "11434/http",
+            "volumeInGb": 50,
+            "volumeMountPath": "/workspace",
+        }
+    }
+    
+    try:
+        result = graphql_query(query, variables)
+        return result
+    except Exception as e:
+        return {"error": str(e)}
+
+def check_if_endpoint_exists(name):
+    """Check if endpoint already exists."""
+    query = "{ endpoints { id name } }"
+    result = graphql_query(query)
+    endpoints = result.get('data', {}).get('endpoints', [])
+    matching = [e for e in endpoints if name.lower() in e.get('name', '').lower()]
+    return matching
+
+# Main deployment logic
+log("Starting Big Brain GPU deployment")
+log(f"RunPod API key: {RUNPOD_API_KEY[:20]}...{RUNPOD_API_KEY[-10:]}")
+
+# Step 1: Get available GPU types
+log("\n=== Step 1: Getting GPU types ===")
+gpu_query = "{ gpuTypes { id displayName memoryInGb secureCloud communityCloud } }"
+result = graphql_query(gpu_query)
+
+gpus = result.get('data', {}).get('gpuTypes', [])
+log(f"Total GPU types: {len(gpus)}")
+
+# Filter GPUs with 24GB+ VRAM for Gemma 3 27B
+suitable_gpus = []
+for gpu in gpus:
+    mem = gpu.get('memoryInGb', 0)
+    if mem >= 24:
+        suitable_gpus.append(gpu)
+        
+log(f"\nGPUs with 24GB+ VRAM:")
+for gpu in suitable_gpus[:15]:
+    log(f"  {gpu.get('id')}: {gpu.get('displayName')} - {gpu.get('memoryInGb')}GB, Secure: {gpu.get('secureCloud')}, Community: {gpu.get('communityCloud')}")
+
+# Step 2: Try to find GPU availability
+# The error was "no instances available" - we need to find available ones
+# The GPU ID format matters - try the ones from the list
+
+pod_name = "big-brain-timmy"
+
+# Try different GPUs in order of preference (cheapest first with enough memory)
+gpu_attempts = [
+    ("NVIDIA RTX 4090", "COMMUNITY"),    # 24GB, ~$0.44/hr
+    ("NVIDIA A40", "COMMUNITY"),          # 48GB
+    ("NVIDIA RTX 3090", "COMMUNITY"),    # 24GB
+    ("NVIDIA RTX 3090 Ti", "COMMUNITY"), # 24GB
+    ("NVIDIA L40S", "COMMUNITY"),        # 48GB
+    ("NVIDIA A6000", "COMMUNITY"),       # 48GB
+    # Try secure cloud
+    ("NVIDIA RTX 4090", "SECURE"),
+    ("NVIDIA A40", "SECURE"),
+    ("NVIDIA L40S", "SECURE"),
+]
+
+log("\n=== Step 2: Attempting deployment ===")
+deployed = False
+for gpu_type, cloud_type in gpu_attempts:
+    log(f"Trying {gpu_type} ({cloud_type})...")
+    result = deploy_pod(gpu_type, pod_name, cloud_type)
+    
+    errors = result.get('errors', [])
+    data = result.get('data', {}).get('podFindAndDeployOnDemand', {})
+    
+    if errors:
+        for err in errors:
+            msg = err.get('message', '')
+            if 'no longer any instances' in msg or 'no instances' in msg:
+                log(f"  No instances available")
+            elif 'invalid' in msg.lower() or 'not found' in msg.lower():
+                log(f"  GPU type not found: {msg[:100]}")
+            else:
+                log(f"  Error: {msg[:100]}")
+    elif data and data.get('id'):
+        log(f"  ✅ SUCCESS! Pod ID: {data.get('id')}")
+        log(f"     Machine ID: {data.get('machineId')}")
+        log(f"     Status: {data.get('desiredStatus')}")
+        deployed = True
+        break
+    else:
+        log(f"  Response: {json.dumps(result)[:200]}")
+
+if deployed:
+    pod_id = data.get('id')
+    
+    # Wait for pod to be running
+    log(f"\n=== Step 3: Waiting for pod {pod_id} to start ===")
+    pod_status_query = """
+    query($podId: String!) {
+      pod(id: $podId) {
+        id
+        desiredStatus
+        runtimeStatus
+        machineId
+        ports
+      }
+    }
+    """
+    
+    for attempt in range(30):  # Wait up to 15 minutes
+        time.sleep(30)
+        result = graphql_query(pod_status_query, {"podId": pod_id})
+        pod = result.get('data', {}).get('pod', {})
+        runtime = pod.get('runtimeStatus', 'unknown')
+        desired = pod.get('desiredStatus', 'unknown')
+        log(f"  Attempt {attempt+1}: desired={desired}, runtime={runtime}")
+        
+        if runtime == 'RUNNING':
+            log(f"  ✅ Pod is RUNNING!")
+            
+            # Get the IP/port
+            ip = f"{pod_id}-11434.proxy.runpod.net"
+            log(f"  Ollama endpoint: http://{ip}:11434")
+            log(f"  Ollama endpoint: http://{pod_id}.proxy.runpod.net:11434")
+            
+            # Comment on Gitea tickets
+            comment_text = f"""# ✅ SUCCESS: GPU Instance Deployed
+
+## Pod Details
+- **Pod ID:** {pod_id}
+- **GPU:** {gpu_type} ({cloud_type} cloud)
+- **Status:** RUNNING
+- **Endpoint:** http://{pod_id}.proxy.runpod.net:11434
+
+## Next Steps
+
+1. **SSH into pod:**
+```bash
+ssh root@{pod_id}.proxy.runpod.net
+```
+
+2. **Pull Gemma 3 27B:**
+```bash
+ollama pull gemma3:27b-instruct-q4_K_M
+```
+
+3. **Verify Ollama is working:**
+```bash
+curl http://localhost:11434/api/tags
+```
+
+4. **Test inference:**
+```bash
+curl http://localhost:11434/api/chat \\
+  -H "Content-Type: application/json" \\
+  -d '{{"model": "gemma3:27b-instruct-q4_K_M", "messages": [{{"role": "user", "content": "Hello from Timmy"}}]}}'
+```
+
+5. **Wire to Mac Hermes:**
+Add to `~/.hermes/config.yaml`:
+```yaml
+providers:
+  big_brain:
+    base_url: "http://{pod_id}.proxy.runpod.net:11434/v1"
+    api_key: ""
+    model: "gemma3:27b-instruct-q4_K_M"
+```
+
+6. **Test Hermes:**
+```bash
+hermes chat --model gemma3:27b-instruct-q4_K_M --provider big_brain
+```"""
+            
+            comment_issue(543, comment_text)
+            comment_issue(544, comment_text.replace("Timmy", "Bezalel").replace("Mac Hermes", "Bezalel Hermes"))
+            
+            log("\n🎉 Big Brain GPU deployed successfully!")
+            log(f"Pod: {pod_id}")
+            log(f"Endpoint: http://{pod_id}.proxy.runpod.net:11434")
+            log(f"Gitea tickets updated with deployment details")
+            break
+        elif runtime == 'ERROR' or desired == 'TERMINATED' or desired == 'SUSPENDED':
+            log(f"  ❌ Pod failed: runtime={runtime}, desired={desired}")
+            break
+    
+    if runtime != 'RUNNING':
+        log(f"\n⚠️ Pod is not running after waiting. Check RunPod dashboard.")
+else:
+    log("\n❌ No GPU instances available on RunPod")
+    log("Try Vertex AI or check back later")
+    
+    # Comment on tickets
+    comment_text = """# Deployment Status: RunPod Failed
+
+## Issue
+No GPU instances available on RunPod. All GPU types returned "no instances available" error.
+
+## Alternatives
+1. **Vertex AI** - Google Cloud's managed Gemma endpoints (see ticket for instructions)
+2. **Lambda Labs** - Another GPU cloud provider
+3. **Vast.ai** - Community GPU marketplace
+4. **Wait for RunPod** - Check back in a few hours"""
+    
+    comment_issue(543, comment_text)
+    comment_issue(544, comment_text)
+
+"""
+
+Write the deployment script
+write_file('~/.timmy/big_brain_deploy.py', script_content)
+
+# Also run it (with timeout)
+print("Running deployment script... (will check Gitea tickets for results in parallel)")
--- a/evennia/timmy_world/WORLD_STATE.md
+++ b/evennia/timmy_world/WORLD_STATE.md
@@ -1,18 +1,18 @@
-# The Tower World State — Tick #1519
+# The Tower World State — Tick #1520

-**Time:** 22:40:57
-**Tick:** 1519
+**Time:** 22:42:28
+**Tick:** 1520

 ## Moves This Tick

- Timmy stands at the Threshold, watching.
- Bezalel tests the Forge. The hearth still glows.
- Allegro crosses to the Garden. Listens to the wind.
- Ezra climbs to the Tower. Studies the inscriptions.
- Gemini walks to the Threshold, counting footsteps.
- Claude crosses to the Tower. Studies the structure.
- ClawCode crosses to the Threshold. Checks the exits.
- Kimi crosses to the Threshold. Watches the crew.
+- Timmy climbs the Tower. The servers hum.
+- Bezalel examines the anvil: a thousand scars.
+- Allegro visits the Tower. Reads the logs.
+- Ezra walks the Bridge. The words speak back.
+- Gemini rests on the Bridge. Water moves below.
+- Claude walks the Forge. Everything has a place.
+- ClawCode examines the Bridge. The structure holds.
+- Kimi climbs the Tower. The servers are a library.

 ## Character Locations