Some checks failed
Smoke Test / smoke (push) Has been cancelled
Merge PR #619
177 lines
5.9 KiB
Python
Executable File
177 lines
5.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Big Brain Pod Verification Script
|
|
Verifies that the Big Brain pod is live with gemma3:27b model.
|
|
Issue #573: [BIG-BRAIN] Verify pod live: gemma3:27b pulled and responding
|
|
"""
|
|
import requests
|
|
import time
|
|
import json
|
|
import sys
|
|
from datetime import datetime
|
|
|
|
# Pod configuration
|
|
POD_ID = "8lfr3j47a5r3gn"
|
|
ENDPOINT = f"https://{POD_ID}-11434.proxy.runpod.net"
|
|
COST_PER_HOUR = 0.79 # USD
|
|
|
|
def check_api_tags():
|
|
"""Check if gemma3:27b is in the model list."""
|
|
print(f"[{datetime.now().isoformat()}] Checking /api/tags endpoint...")
|
|
try:
|
|
start_time = time.time()
|
|
response = requests.get(f"{ENDPOINT}/api/tags", timeout=10)
|
|
elapsed = time.time() - start_time
|
|
|
|
print(f" Response status: {response.status_code}")
|
|
print(f" Response headers: {dict(response.headers)}")
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
models = [model.get("name", "") for model in data.get("models", [])]
|
|
print(f" ✓ API responded in {elapsed:.2f}s")
|
|
print(f" Available models: {models}")
|
|
|
|
# Check for gemma3:27b
|
|
has_gemma = any("gemma3:27b" in model.lower() for model in models)
|
|
if has_gemma:
|
|
print(" ✓ gemma3:27b found in model list")
|
|
return True, elapsed, models
|
|
else:
|
|
print(" ✗ gemma3:27b NOT found in model list")
|
|
return False, elapsed, models
|
|
elif response.status_code == 404:
|
|
print(f" ✗ API endpoint not found (404)")
|
|
print(f" This might mean Ollama is not running or endpoint is wrong")
|
|
print(f" Trying to ping the server...")
|
|
try:
|
|
ping_response = requests.get(f"{ENDPOINT}/", timeout=5)
|
|
print(f" Ping response: {ping_response.status_code}")
|
|
except:
|
|
print(" Ping failed - server unreachable")
|
|
return False, elapsed, []
|
|
else:
|
|
print(f" ✗ API returned status {response.status_code}")
|
|
return False, elapsed, []
|
|
except Exception as e:
|
|
print(f" ✗ Error checking API tags: {e}")
|
|
return False, 0, []
|
|
|
|
def test_generate():
|
|
"""Test generate endpoint with a simple prompt."""
|
|
print(f"[{datetime.now().isoformat()}] Testing /api/generate endpoint...")
|
|
try:
|
|
payload = {
|
|
"model": "gemma3:27b",
|
|
"prompt": "Say hello in one word.",
|
|
"stream": False,
|
|
"options": {
|
|
"num_predict": 10
|
|
}
|
|
}
|
|
|
|
start_time = time.time()
|
|
response = requests.post(
|
|
f"{ENDPOINT}/api/generate",
|
|
json=payload,
|
|
timeout=30
|
|
)
|
|
elapsed = time.time() - start_time
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
response_text = data.get("response", "").strip()
|
|
print(f" ✓ Generate responded in {elapsed:.2f}s")
|
|
print(f" Response: {response_text[:100]}...")
|
|
|
|
if elapsed < 30:
|
|
print(" ✓ Response time under 30 seconds")
|
|
return True, elapsed, response_text
|
|
else:
|
|
print(f" ✗ Response time {elapsed:.2f}s exceeds 30s limit")
|
|
return False, elapsed, response_text
|
|
else:
|
|
print(f" ✗ Generate returned status {response.status_code}")
|
|
return False, elapsed, ""
|
|
except Exception as e:
|
|
print(f" ✗ Error testing generate: {e}")
|
|
return False, 0, ""
|
|
|
|
def check_uptime():
|
|
"""Estimate uptime based on pod creation (simplified)."""
|
|
# In a real implementation, we'd check RunPod API for pod start time
|
|
# For now, we'll just log the check time
|
|
check_time = datetime.now()
|
|
print(f"[{check_time.isoformat()}] Pod verification timestamp")
|
|
return check_time
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("Big Brain Pod Verification")
|
|
print(f"Pod ID: {POD_ID}")
|
|
print(f"Endpoint: {ENDPOINT}")
|
|
print(f"Cost: ${COST_PER_HOUR}/hour")
|
|
print("=" * 60)
|
|
print()
|
|
|
|
# Check uptime
|
|
check_time = check_uptime()
|
|
print()
|
|
|
|
# Check API tags
|
|
tags_ok, tags_time, models = check_api_tags()
|
|
print()
|
|
|
|
# Test generate
|
|
generate_ok, generate_time, response = test_generate()
|
|
print()
|
|
|
|
# Summary
|
|
print("=" * 60)
|
|
print("VERIFICATION SUMMARY")
|
|
print("=" * 60)
|
|
print(f"API Tags Check: {'✓ PASS' if tags_ok else '✗ FAIL'}")
|
|
print(f" Response time: {tags_time:.2f}s")
|
|
print(f" Models found: {len(models)}")
|
|
print()
|
|
print(f"Generate Test: {'✓ PASS' if generate_ok else '✗ FAIL'}")
|
|
print(f" Response time: {generate_time:.2f}s")
|
|
print(f" Under 30s: {'✓ YES' if generate_time < 30 else '✗ NO'}")
|
|
print()
|
|
|
|
# Overall status
|
|
overall_ok = tags_ok and generate_ok
|
|
print(f"Overall Status: {'✓ POD LIVE' if overall_ok else '✗ POD ISSUES'}")
|
|
|
|
# Cost awareness
|
|
print()
|
|
print(f"Cost Awareness: Pod costs ${COST_PER_HOUR}/hour")
|
|
print(f"Verification time: {check_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
|
|
# Write results to file
|
|
results = {
|
|
"pod_id": POD_ID,
|
|
"endpoint": ENDPOINT,
|
|
"timestamp": check_time.isoformat(),
|
|
"api_tags_ok": tags_ok,
|
|
"api_tags_time": tags_time,
|
|
"models": models,
|
|
"generate_ok": generate_ok,
|
|
"generate_time": generate_time,
|
|
"generate_response": response[:200] if response else "",
|
|
"overall_ok": overall_ok,
|
|
"cost_per_hour": COST_PER_HOUR
|
|
}
|
|
|
|
with open("big_brain_verification.json", "w") as f:
|
|
json.dump(results, f, indent=2)
|
|
|
|
print()
|
|
print("Results saved to big_brain_verification.json")
|
|
|
|
# Exit with appropriate code
|
|
sys.exit(0 if overall_ok else 1)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|