Some checks failed
Smoke Test / smoke (pull_request) Failing after 8s
Cost control for L40S pod (/bin/sh.79/hr). Idle watchdog (cron every 15 min): - Tracks last inference request timestamp - If idle > 30 min, stops pod via RunPod GraphQL API - Logs stop/start events with timestamps to cost log Auto-resume manager: - Import before inference to ensure pod is RUNNING - If stopped, resumes and polls until Ollama responds - Updates timestamp on each request Components: - big_brain_idle_watchdog.py: idle check + pod stop - big_brain_manager.py: auto-resume + status - 11 tests covering all states and edge cases Closes #577
195 lines
7.0 KiB
Python
195 lines
7.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Big Brain Manager — Auto-resume pod before inference.
|
|
|
|
Import this module or run standalone to ensure the Big Brain pod is
|
|
RUNNING before sending inference requests. If the pod is stopped,
|
|
resumes it and polls until ready.
|
|
|
|
Usage:
|
|
python3 scripts/big_brain_manager.py --ensure-running # resume if stopped
|
|
python3 scripts/big_brain_manager.py --status # just check status
|
|
python3 scripts/big_brain_manager.py --stop # manually stop
|
|
python3 scripts/big_brain_manager.py --resume # manually resume
|
|
python3 scripts/big_brain_manager.py --json # machine-readable
|
|
|
|
As a library:
|
|
from big_brain_manager import ensure_running, touch_request
|
|
ensure_running() # blocks until pod is RUNNING
|
|
touch_request() # update idle timestamp
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
import urllib.request
|
|
import urllib.error
|
|
from datetime import datetime, timezone
|
|
|
|
# Reuse watchdog internals
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
from big_brain_idle_watchdog import (
|
|
get_pod_status, stop_pod, resume_pod, get_runpod_key,
|
|
touch_last_request, get_last_request_time, log_event,
|
|
DEFAULT_POD_ID, COST_PER_HOUR, TIMESTAMP_FILE,
|
|
)
|
|
|
|
OLLAMA_ENDPOINT = "https://{pod_id}-11434.proxy.runpod.net"
|
|
POLL_INTERVAL_SEC = 10
|
|
MAX_POLL_ATTEMPTS = 30 # 5 minutes max wait
|
|
|
|
|
|
def is_ollama_ready(pod_id: str) -> bool:
|
|
"""Check if Ollama is responding on the pod endpoint."""
|
|
url = OLLAMA_ENDPOINT.format(pod_id=pod_id) + "/api/tags"
|
|
try:
|
|
req = urllib.request.Request(url, method="GET")
|
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
return resp.status == 200
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def ensure_running(pod_id: str = DEFAULT_POD_ID, timeout_sec: int = 300) -> dict:
|
|
"""
|
|
Ensure the Big Brain pod is running. Resume if stopped, wait until ready.
|
|
|
|
Returns status dict with action taken and final state.
|
|
"""
|
|
pod = get_pod_status(pod_id)
|
|
status = pod.get("desiredStatus", "UNKNOWN")
|
|
result = {"pod_id": pod_id, "initial_status": status, "action": "none"}
|
|
|
|
if pod.get("error"):
|
|
result["error"] = pod["error"]
|
|
result["action"] = "error"
|
|
return result
|
|
|
|
if status == "RUNNING":
|
|
# Verify Ollama is actually serving
|
|
if is_ollama_ready(pod_id):
|
|
result["action"] = "already_running"
|
|
result["final_status"] = "RUNNING"
|
|
touch_last_request()
|
|
return result
|
|
else:
|
|
# Pod is running but Ollama not ready yet — wait
|
|
result["action"] = "waiting_for_ollama"
|
|
|
|
elif status in ("EXITED", "STOPPED"):
|
|
# Resume the pod
|
|
resume_result = resume_pod(pod_id)
|
|
if "errors" in resume_result:
|
|
result["action"] = "resume_failed"
|
|
result["error"] = resume_result["errors"][0].get("message", "unknown")
|
|
return result
|
|
result["action"] = "resumed"
|
|
log_event("resume", pod_id, "auto-resume before inference")
|
|
|
|
else:
|
|
result["action"] = "unexpected_status"
|
|
result["final_status"] = status
|
|
return result
|
|
|
|
# Poll until Ollama is ready
|
|
attempts = 0
|
|
while attempts < (timeout_sec / POLL_INTERVAL_SEC):
|
|
time.sleep(POLL_INTERVAL_SEC)
|
|
attempts += 1
|
|
if is_ollama_ready(pod_id):
|
|
result["final_status"] = "RUNNING"
|
|
result["wait_seconds"] = attempts * POLL_INTERVAL_SEC
|
|
touch_last_request()
|
|
return result
|
|
|
|
result["action"] = "timeout"
|
|
result["final_status"] = get_pod_status(pod_id).get("desiredStatus", "UNKNOWN")
|
|
return result
|
|
|
|
|
|
def touch_request():
|
|
"""Public wrapper for updating the last request timestamp."""
|
|
touch_last_request()
|
|
|
|
|
|
def main():
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description="Big Brain pod manager — auto-resume for inference")
|
|
parser.add_argument("--pod-id", default=DEFAULT_POD_ID, help="RunPod pod ID")
|
|
parser.add_argument("--ensure-running", action="store_true", help="Resume if stopped, wait for ready")
|
|
parser.add_argument("--status", action="store_true", help="Show pod status")
|
|
parser.add_argument("--stop", action="store_true", help="Stop the pod")
|
|
parser.add_argument("--resume", action="store_true", help="Resume the pod")
|
|
parser.add_argument("--touch", action="store_true", help="Update last request timestamp")
|
|
parser.add_argument("--json", dest="as_json", action="store_true", help="JSON output")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.touch:
|
|
touch_request()
|
|
print(f"Timestamp updated: {TIMESTAMP_FILE}")
|
|
return
|
|
|
|
if args.ensure_running:
|
|
result = ensure_running(args.pod_id)
|
|
if args.as_json:
|
|
print(json.dumps(result, indent=2))
|
|
else:
|
|
action = result["action"]
|
|
if action == "already_running":
|
|
print(f"[Big Brain] Already RUNNING and Ollama ready")
|
|
elif action == "resumed":
|
|
wait = result.get("wait_seconds", 0)
|
|
print(f"[Big Brain] Resumed pod, Ollama ready in {wait}s")
|
|
elif action == "resume_failed":
|
|
print(f"[Big Brain] RESUME FAILED: {result.get('error', 'unknown')}", file=sys.stderr)
|
|
sys.exit(1)
|
|
elif action == "timeout":
|
|
print(f"[Big Brain] Timed out waiting for Ollama", file=sys.stderr)
|
|
sys.exit(1)
|
|
return
|
|
|
|
if args.stop:
|
|
result = stop_pod(args.pod_id)
|
|
if "errors" in result:
|
|
print(f"Stop failed: {result['errors'][0].get('message')}", file=sys.stderr)
|
|
sys.exit(1)
|
|
log_event("stop", args.pod_id, "manual stop")
|
|
print(f"Pod {args.pod_id} stopped")
|
|
return
|
|
|
|
if args.resume:
|
|
result = resume_pod(args.pod_id)
|
|
if "errors" in result:
|
|
print(f"Resume failed: {result['errors'][0].get('message')}", file=sys.stderr)
|
|
sys.exit(1)
|
|
log_event("resume", args.pod_id, "manual resume")
|
|
print(f"Pod {args.pod_id} resuming...")
|
|
return
|
|
|
|
# Default: status
|
|
pod = get_pod_status(args.pod_id)
|
|
last_req = get_last_request_time()
|
|
ollama_ok = is_ollama_ready(args.pod_id) if pod.get("desiredStatus") == "RUNNING" else False
|
|
|
|
if args.as_json:
|
|
print(json.dumps({
|
|
"pod": pod,
|
|
"ollama_ready": ollama_ok,
|
|
"last_request": last_req.isoformat() if last_req else None,
|
|
}, indent=2))
|
|
else:
|
|
print(f"Pod: {args.pod_id}")
|
|
print(f"Status: {pod.get('desiredStatus', 'UNKNOWN')}")
|
|
print(f"Ollama: {'ready' if ollama_ok else 'not responding'}")
|
|
print(f"Last request: {last_req.isoformat() if last_req else 'never'}")
|
|
print(f"Cost/hr: ${COST_PER_HOUR}")
|
|
if pod.get("uptimeSeconds"):
|
|
hrs = pod["uptimeSeconds"] / 3600
|
|
print(f"Uptime: {hrs:.1f} hrs (${hrs * COST_PER_HOUR:.2f})")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|