timmy-home/scripts/verify_big_brain.py

#!/usr/bin/env python3
"""
Big Brain Pod Verification Script
Verifies that the Big Brain pod is live with gemma3:27b model.
Issue #573: [BIG-BRAIN] Verify pod live: gemma3:27b pulled and responding
"""
import requests
import time
import json
import sys
from datetime import datetime

# Pod configuration
POD_ID = "8lfr3j47a5r3gn"
ENDPOINT = f"https://{POD_ID}-11434.proxy.runpod.net"
COST_PER_HOUR = 0.79  # USD

def check_api_tags():
    """Check if gemma3:27b is in the model list."""
    print(f"[{datetime.now().isoformat()}] Checking /api/tags endpoint...")
    try:
        start_time = time.time()
        response = requests.get(f"{ENDPOINT}/api/tags", timeout=10)
        elapsed = time.time() - start_time

        print(f"  Response status: {response.status_code}")
        print(f"  Response headers: {dict(response.headers)}")

        if response.status_code == 200:
            data = response.json()
            models = [model.get("name", "") for model in data.get("models", [])]
            print(f"  ✓ API responded in {elapsed:.2f}s")
            print(f"  Available models: {models}")

            # Check for gemma3:27b
            has_gemma = any("gemma3:27b" in model.lower() for model in models)
            if has_gemma:
                print("  ✓ gemma3:27b found in model list")
                return True, elapsed, models
            else:
                print("  ✗ gemma3:27b NOT found in model list")
                return False, elapsed, models
        elif response.status_code == 404:
            print(f"  ✗ API endpoint not found (404)")
            print(f"  This might mean Ollama is not running or endpoint is wrong")
            print(f"  Trying to ping the server...")
            try:
                ping_response = requests.get(f"{ENDPOINT}/", timeout=5)
                print(f"  Ping response: {ping_response.status_code}")
            except:
                print("  Ping failed - server unreachable")
            return False, elapsed, []
        else:
            print(f"  ✗ API returned status {response.status_code}")
            return False, elapsed, []
    except Exception as e:
        print(f"  ✗ Error checking API tags: {e}")
        return False, 0, []

def test_generate():
    """Test generate endpoint with a simple prompt."""
    print(f"[{datetime.now().isoformat()}] Testing /api/generate endpoint...")
    try:
        payload = {
            "model": "gemma3:27b",
            "prompt": "Say hello in one word.",
            "stream": False,
            "options": {
                "num_predict": 10
            }
        }

        start_time = time.time()
        response = requests.post(
            f"{ENDPOINT}/api/generate",
            json=payload,
            timeout=30
        )
        elapsed = time.time() - start_time

        if response.status_code == 200:
            data = response.json()
            response_text = data.get("response", "").strip()
            print(f"  ✓ Generate responded in {elapsed:.2f}s")
            print(f"  Response: {response_text[:100]}...")

            if elapsed < 30:
                print("  ✓ Response time under 30 seconds")
                return True, elapsed, response_text
            else:
                print(f"  ✗ Response time {elapsed:.2f}s exceeds 30s limit")
                return False, elapsed, response_text
        else:
            print(f"  ✗ Generate returned status {response.status_code}")
            return False, elapsed, ""
    except Exception as e:
        print(f"  ✗ Error testing generate: {e}")
        return False, 0, ""

def check_uptime():
    """Estimate uptime based on pod creation (simplified)."""
    # In a real implementation, we'd check RunPod API for pod start time
    # For now, we'll just log the check time
    check_time = datetime.now()
    print(f"[{check_time.isoformat()}] Pod verification timestamp")
    return check_time

def main():
    print("=" * 60)
    print("Big Brain Pod Verification")
    print(f"Pod ID: {POD_ID}")
    print(f"Endpoint: {ENDPOINT}")
    print(f"Cost: ${COST_PER_HOUR}/hour")
    print("=" * 60)
    print()

    # Check uptime
    check_time = check_uptime()
    print()

    # Check API tags
    tags_ok, tags_time, models = check_api_tags()
    print()

    # Test generate
    generate_ok, generate_time, response = test_generate()
    print()

    # Summary
    print("=" * 60)
    print("VERIFICATION SUMMARY")
    print("=" * 60)
    print(f"API Tags Check: {'✓ PASS' if tags_ok else '✗ FAIL'}")
    print(f"  Response time: {tags_time:.2f}s")
    print(f"  Models found: {len(models)}")
    print()
    print(f"Generate Test: {'✓ PASS' if generate_ok else '✗ FAIL'}")
    print(f"  Response time: {generate_time:.2f}s")
    print(f"  Under 30s: {'✓ YES' if generate_time < 30 else '✗ NO'}")
    print()

    # Overall status
    overall_ok = tags_ok and generate_ok
    print(f"Overall Status: {'✓ POD LIVE' if overall_ok else '✗ POD ISSUES'}")

    # Cost awareness
    print()
    print(f"Cost Awareness: Pod costs ${COST_PER_HOUR}/hour")
    print(f"Verification time: {check_time.strftime('%Y-%m-%d %H:%M:%S')}")

    # Write results to file
    results = {
        "pod_id": POD_ID,
        "endpoint": ENDPOINT,
        "timestamp": check_time.isoformat(),
        "api_tags_ok": tags_ok,
        "api_tags_time": tags_time,
        "models": models,
        "generate_ok": generate_ok,
        "generate_time": generate_time,
        "generate_response": response[:200] if response else "",
        "overall_ok": overall_ok,
        "cost_per_hour": COST_PER_HOUR
    }

    with open("big_brain_verification.json", "w") as f:
        json.dump(results, f, indent=2)

    print()
    print("Results saved to big_brain_verification.json")

    # Exit with appropriate code
    sys.exit(0 if overall_ok else 1)

if __name__ == "__main__":
    main()