timmy-home/scripts/big_brain_manager.py

#!/usr/bin/env python3
"""
Big Brain Manager — Auto-resume pod before inference.

Import this module or run standalone to ensure the Big Brain pod is
RUNNING before sending inference requests. If the pod is stopped,
resumes it and polls until ready.

Usage:
    python3 scripts/big_brain_manager.py --ensure-running    # resume if stopped
    python3 scripts/big_brain_manager.py --status             # just check status
    python3 scripts/big_brain_manager.py --stop               # manually stop
    python3 scripts/big_brain_manager.py --resume              # manually resume
    python3 scripts/big_brain_manager.py --json               # machine-readable

As a library:
    from big_brain_manager import ensure_running, touch_request
    ensure_running()          # blocks until pod is RUNNING
    touch_request()           # update idle timestamp
"""

import json
import os
import sys
import time
import urllib.request
import urllib.error
from datetime import datetime, timezone

# Reuse watchdog internals
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from big_brain_idle_watchdog import (
    get_pod_status, stop_pod, resume_pod, get_runpod_key,
    touch_last_request, get_last_request_time, log_event,
    DEFAULT_POD_ID, COST_PER_HOUR, TIMESTAMP_FILE,
)

OLLAMA_ENDPOINT = "https://{pod_id}-11434.proxy.runpod.net"
POLL_INTERVAL_SEC = 10
MAX_POLL_ATTEMPTS = 30  # 5 minutes max wait


def is_ollama_ready(pod_id: str) -> bool:
    """Check if Ollama is responding on the pod endpoint."""
    url = OLLAMA_ENDPOINT.format(pod_id=pod_id) + "/api/tags"
    try:
        req = urllib.request.Request(url, method="GET")
        with urllib.request.urlopen(req, timeout=10) as resp:
            return resp.status == 200
    except Exception:
        return False


def ensure_running(pod_id: str = DEFAULT_POD_ID, timeout_sec: int = 300) -> dict:
    """
    Ensure the Big Brain pod is running. Resume if stopped, wait until ready.

    Returns status dict with action taken and final state.
    """
    pod = get_pod_status(pod_id)
    status = pod.get("desiredStatus", "UNKNOWN")
    result = {"pod_id": pod_id, "initial_status": status, "action": "none"}

    if pod.get("error"):
        result["error"] = pod["error"]
        result["action"] = "error"
        return result

    if status == "RUNNING":
        # Verify Ollama is actually serving
        if is_ollama_ready(pod_id):
            result["action"] = "already_running"
            result["final_status"] = "RUNNING"
            touch_last_request()
            return result
        else:
            # Pod is running but Ollama not ready yet — wait
            result["action"] = "waiting_for_ollama"

    elif status in ("EXITED", "STOPPED"):
        # Resume the pod
        resume_result = resume_pod(pod_id)
        if "errors" in resume_result:
            result["action"] = "resume_failed"
            result["error"] = resume_result["errors"][0].get("message", "unknown")
            return result
        result["action"] = "resumed"
        log_event("resume", pod_id, "auto-resume before inference")

    else:
        result["action"] = "unexpected_status"
        result["final_status"] = status
        return result

    # Poll until Ollama is ready
    attempts = 0
    while attempts < (timeout_sec / POLL_INTERVAL_SEC):
        time.sleep(POLL_INTERVAL_SEC)
        attempts += 1
        if is_ollama_ready(pod_id):
            result["final_status"] = "RUNNING"
            result["wait_seconds"] = attempts * POLL_INTERVAL_SEC
            touch_last_request()
            return result

    result["action"] = "timeout"
    result["final_status"] = get_pod_status(pod_id).get("desiredStatus", "UNKNOWN")
    return result


def touch_request():
    """Public wrapper for updating the last request timestamp."""
    touch_last_request()


def main():
    import argparse
    parser = argparse.ArgumentParser(description="Big Brain pod manager — auto-resume for inference")
    parser.add_argument("--pod-id", default=DEFAULT_POD_ID, help="RunPod pod ID")
    parser.add_argument("--ensure-running", action="store_true", help="Resume if stopped, wait for ready")
    parser.add_argument("--status", action="store_true", help="Show pod status")
    parser.add_argument("--stop", action="store_true", help="Stop the pod")
    parser.add_argument("--resume", action="store_true", help="Resume the pod")
    parser.add_argument("--touch", action="store_true", help="Update last request timestamp")
    parser.add_argument("--json", dest="as_json", action="store_true", help="JSON output")

    args = parser.parse_args()

    if args.touch:
        touch_request()
        print(f"Timestamp updated: {TIMESTAMP_FILE}")
        return

    if args.ensure_running:
        result = ensure_running(args.pod_id)
        if args.as_json:
            print(json.dumps(result, indent=2))
        else:
            action = result["action"]
            if action == "already_running":
                print(f"[Big Brain] Already RUNNING and Ollama ready")
            elif action == "resumed":
                wait = result.get("wait_seconds", 0)
                print(f"[Big Brain] Resumed pod, Ollama ready in {wait}s")
            elif action == "resume_failed":
                print(f"[Big Brain] RESUME FAILED: {result.get('error', 'unknown')}", file=sys.stderr)
                sys.exit(1)
            elif action == "timeout":
                print(f"[Big Brain] Timed out waiting for Ollama", file=sys.stderr)
                sys.exit(1)
        return

    if args.stop:
        result = stop_pod(args.pod_id)
        if "errors" in result:
            print(f"Stop failed: {result['errors'][0].get('message')}", file=sys.stderr)
            sys.exit(1)
        log_event("stop", args.pod_id, "manual stop")
        print(f"Pod {args.pod_id} stopped")
        return

    if args.resume:
        result = resume_pod(args.pod_id)
        if "errors" in result:
            print(f"Resume failed: {result['errors'][0].get('message')}", file=sys.stderr)
            sys.exit(1)
        log_event("resume", args.pod_id, "manual resume")
        print(f"Pod {args.pod_id} resuming...")
        return

    # Default: status
    pod = get_pod_status(args.pod_id)
    last_req = get_last_request_time()
    ollama_ok = is_ollama_ready(args.pod_id) if pod.get("desiredStatus") == "RUNNING" else False

    if args.as_json:
        print(json.dumps({
            "pod": pod,
            "ollama_ready": ollama_ok,
            "last_request": last_req.isoformat() if last_req else None,
        }, indent=2))
    else:
        print(f"Pod:          {args.pod_id}")
        print(f"Status:       {pod.get('desiredStatus', 'UNKNOWN')}")
        print(f"Ollama:       {'ready' if ollama_ok else 'not responding'}")
        print(f"Last request: {last_req.isoformat() if last_req else 'never'}")
        print(f"Cost/hr:      ${COST_PER_HOUR}")
        if pod.get("uptimeSeconds"):
            hrs = pod["uptimeSeconds"] / 3600
            print(f"Uptime:       {hrs:.1f} hrs (${hrs * COST_PER_HOUR:.2f})")


if __name__ == "__main__":
    main()