Files
timmy-home/scripts/predictive_resource_allocator.py

411 lines
14 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Predictive Resource Allocation Timmy Foundation Fleet
Analyzes historical utilization patterns, predicts workload surges,
and recommends pre-provisioning actions.
Usage:
python3 scripts/predictive_resource_allocator.py \
--metrics metrics/*.jsonl \
--heartbeat heartbeat/*.jsonl \
--horizon 6
# JSON output
python3 scripts/predictive_resource_allocator.py --json
# Quick forecast from default paths
python3 scripts/predictive_resource_allocator.py
"""
import argparse
import glob
import json
import os
import sys
from collections import Counter, defaultdict
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple
# ── Constants ────────────────────────────────────────────────────────────────
SURGE_THRESHOLD = 1.5
HEAVY_TOKEN_THRESHOLD = 10000
DEFAULT_HORIZON_HOURS = 6
DEFAULT_METRICS_GLOB = "metrics/local_*.jsonl"
DEFAULT_HEARTBEAT_GLOB = "heartbeat/ticks_*.jsonl"
SCRIPT_DIR = Path(__file__).resolve().parent
ROOT_DIR = SCRIPT_DIR.parent
# ── Data Loading ─────────────────────────────────────────────────────────────
def _parse_ts(value: str) -> datetime:
"""Parse ISO timestamp to UTC datetime."""
return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone(timezone.utc)
def load_jsonl(paths: Iterable[str]) -> List[dict]:
"""Load JSONL rows from one or more file paths/globs."""
rows: List[dict] = []
for pattern in paths:
for path in glob.glob(pattern):
if not os.path.isfile(path):
continue
with open(path, encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
try:
rows.append(json.loads(line))
except json.JSONDecodeError:
continue
return rows
def _default_paths(glob_pattern: str) -> List[str]:
"""Resolve a glob pattern relative to project root."""
full = os.path.join(ROOT_DIR, glob_pattern)
matches = glob.glob(full)
return matches if matches else [full]
# ── Time-Series Analysis ─────────────────────────────────────────────────────
def compute_rates(
rows: List[dict],
horizon_hours: int,
) -> Tuple[float, float, float, float, float]:
"""
Compare recent window vs baseline window.
Returns:
(recent_rate, baseline_rate, surge_factor, recent_token_rate, baseline_token_rate)
"""
if not rows:
return 0.0, 0.0, 1.0, 0.0, 0.0
latest = max(_parse_ts(r["timestamp"]) for r in rows)
recent_cutoff = latest - timedelta(hours=horizon_hours)
baseline_cutoff = latest - timedelta(hours=horizon_hours * 2)
recent = [r for r in rows if _parse_ts(r["timestamp"]) >= recent_cutoff]
baseline = [
r for r in rows
if baseline_cutoff <= _parse_ts(r["timestamp"]) < recent_cutoff
]
recent_rate = len(recent) / max(horizon_hours, 1)
baseline_rate = (
len(baseline) / max(horizon_hours, 1)
if baseline
else max(0.1, recent_rate)
)
recent_tokens = sum(int(r.get("prompt_len", 0)) for r in recent)
baseline_tokens = sum(int(r.get("prompt_len", 0)) for r in baseline)
recent_token_rate = recent_tokens / max(horizon_hours, 1)
baseline_token_rate = (
baseline_tokens / max(horizon_hours, 1)
if baseline
else max(1.0, recent_token_rate)
)
request_surge = recent_rate / max(baseline_rate, 0.01)
token_surge = recent_token_rate / max(baseline_token_rate, 0.01)
surge_factor = max(request_surge, token_surge)
return recent_rate, baseline_rate, surge_factor, recent_token_rate, baseline_token_rate
def analyze_callers(rows: List[dict], horizon_hours: int) -> List[Dict[str, Any]]:
"""Summarize callers in the recent window."""
if not rows:
return []
latest = max(_parse_ts(r["timestamp"]) for r in rows)
cutoff = latest - timedelta(hours=horizon_hours)
calls: Counter = Counter()
tokens: Counter = Counter()
failures: Counter = Counter()
for row in rows:
ts = _parse_ts(row["timestamp"])
if ts < cutoff:
continue
caller = row.get("caller", "unknown")
calls[caller] += 1
tokens[caller] += int(row.get("prompt_len", 0))
if not row.get("success", True):
failures[caller] += 1
summary = []
for caller in calls:
summary.append({
"caller": caller,
"requests": calls[caller],
"prompt_tokens": tokens[caller],
"failures": failures[caller],
"failure_rate": round(failures[caller] / max(calls[caller], 1) * 100, 1),
})
summary.sort(key=lambda x: (-x["requests"], -x["prompt_tokens"]))
return summary
def analyze_heartbeat(rows: List[dict], horizon_hours: int) -> Dict[str, int]:
"""Count infrastructure risks in recent window."""
if not rows:
return {"gitea_outages": 0, "inference_failures": 0, "total_checks": 0}
latest = max(_parse_ts(r["timestamp"]) for r in rows)
cutoff = latest - timedelta(hours=horizon_hours)
gitea_outages = 0
inference_failures = 0
total = 0
for row in rows:
ts = _parse_ts(row["timestamp"])
if ts < cutoff:
continue
total += 1
perception = row.get("perception", {})
if perception.get("gitea_alive") is False:
gitea_outages += 1
model_health = perception.get("model_health", {})
if model_health.get("inference_ok") is False:
inference_failures += 1
return {
"gitea_outages": gitea_outages,
"inference_failures": inference_failures,
"total_checks": total,
}
# ── Prediction Engine ────────────────────────────────────────────────────────
def predict_demand(
recent_rate: float,
baseline_rate: float,
surge_factor: float,
horizon_hours: int,
) -> Dict[str, Any]:
"""Predict near-term resource demand."""
predicted_rate = round(
max(recent_rate, baseline_rate * max(1.0, surge_factor * 0.75)), 2
)
if surge_factor > 3.0:
demand_level = "critical"
elif surge_factor > SURGE_THRESHOLD:
demand_level = "elevated"
elif surge_factor > 1.0:
demand_level = "normal"
else:
demand_level = "low"
return {
"predicted_requests_per_hour": predicted_rate,
"surge_factor": round(surge_factor, 2),
"demand_level": demand_level,
"horizon_hours": horizon_hours,
}
def determine_posture(
surge_factor: float,
callers: List[Dict[str, Any]],
heartbeat: Dict[str, int],
) -> Tuple[str, str, List[str]]:
"""
Determine fleet posture and recommended actions.
Returns:
(resource_mode, dispatch_posture, actions)
"""
mode = "steady"
posture = "normal"
actions: List[str] = []
# Surge detection
if surge_factor > SURGE_THRESHOLD:
mode = "surge"
actions.append(
"Pre-warm local inference before the next forecast window."
)
# Heavy background callers
heavy = [
c for c in callers
if c["prompt_tokens"] >= HEAVY_TOKEN_THRESHOLD
and ("batch" in c["caller"] or "know-thy-father" in c["caller"])
]
if heavy:
actions.append(
"Throttle or defer large background jobs until off-peak capacity is available."
)
# Caller failure rates
failing = [c for c in callers if c["failure_rate"] > 20 and c["requests"] >= 3]
if failing:
names = ", ".join(c["caller"] for c in failing[:3])
actions.append(
f"Investigate high failure rates in: {names}."
)
# Inference health
if heartbeat["inference_failures"] >= 2:
mode = "surge"
actions.append(
"Investigate local model reliability and reserve headroom for heartbeat traffic."
)
# Forge availability
if heartbeat["gitea_outages"] >= 1:
posture = "degraded"
actions.append(
"Pre-fetch or cache forge state before the next dispatch window."
)
if not actions:
actions.append(
"Maintain current resource allocation; no surge indicators detected."
)
return mode, posture, actions
# ── Main Forecast ────────────────────────────────────────────────────────────
def forecast(
metrics_paths: List[str],
heartbeat_paths: List[str],
horizon_hours: int = DEFAULT_HORIZON_HOURS,
) -> Dict[str, Any]:
"""Full resource forecast from metric and heartbeat logs."""
metric_rows = load_jsonl(metrics_paths)
heartbeat_rows = load_jsonl(heartbeat_paths)
recent_rate, baseline_rate, surge_factor, recent_tok_rate, base_tok_rate = (
compute_rates(metric_rows, horizon_hours)
)
callers = analyze_callers(metric_rows, horizon_hours)
heartbeat = analyze_heartbeat(heartbeat_rows, horizon_hours)
demand = predict_demand(recent_rate, baseline_rate, surge_factor, horizon_hours)
mode, posture, actions = determine_posture(surge_factor, callers, heartbeat)
return {
"resource_mode": mode,
"dispatch_posture": posture,
"horizon_hours": horizon_hours,
"recent_request_rate": round(recent_rate, 2),
"baseline_request_rate": round(baseline_rate, 2),
"predicted_request_rate": demand["predicted_requests_per_hour"],
"surge_factor": demand["surge_factor"],
"demand_level": demand["demand_level"],
"recent_prompt_tokens_per_hour": round(recent_tok_rate, 2),
"baseline_prompt_tokens_per_hour": round(base_tok_rate, 2),
"gitea_outages": heartbeat["gitea_outages"],
"inference_failures": heartbeat["inference_failures"],
"heartbeat_checks": heartbeat["total_checks"],
"top_callers": callers[:10],
"recommended_actions": actions,
}
# ── Output Formatters ────────────────────────────────────────────────────────
def format_markdown(fc: Dict[str, Any]) -> str:
"""Format forecast as markdown report."""
lines = [
"# Predictive Resource Allocation — Fleet Forecast",
"",
f"**Horizon:** {fc['horizon_hours']} hours",
f"**Resource mode:** {fc['resource_mode']}",
f"**Dispatch posture:** {fc['dispatch_posture']}",
f"**Demand level:** {fc['demand_level']}",
"",
"## Demand Metrics",
"",
f"| Metric | Recent | Baseline |",
f"|--------|-------:|---------:|",
f"| Requests/hour | {fc['recent_request_rate']} | {fc['baseline_request_rate']} |",
f"| Prompt tokens/hour | {fc['recent_prompt_tokens_per_hour']} | {fc['baseline_prompt_tokens_per_hour']} |",
"",
f"**Surge factor:** {fc['surge_factor']}x",
f"**Predicted request rate:** {fc['predicted_request_rate']}/hour",
"",
"## Infrastructure Health",
"",
f"- Gitea outages (recent window): {fc['gitea_outages']}",
f"- Inference failures (recent window): {fc['inference_failures']}",
f"- Heartbeat checks analyzed: {fc['heartbeat_checks']}",
"",
"## Recommended Actions",
"",
]
for action in fc["recommended_actions"]:
lines.append(f"- {action}")
if fc["top_callers"]:
lines.extend([
"",
"## Top Callers (Recent Window)",
"",
"| Caller | Requests | Tokens | Failures |",
"|--------|---------:|-------:|---------:|",
])
for c in fc["top_callers"]:
lines.append(
f"| {c['caller']} | {c['requests']} | {c['prompt_tokens']} | {c['failures']} |"
)
return "\n".join(lines) + "\n"
# ── CLI ──────────────────────────────────────────────────────────────────────
def main() -> int:
parser = argparse.ArgumentParser(
description="Predictive resource allocation for the Timmy fleet"
)
parser.add_argument(
"--metrics", nargs="*", default=None,
help="Metric JSONL paths (supports globs). Default: metrics/local_*.jsonl"
)
parser.add_argument(
"--heartbeat", nargs="*", default=None,
help="Heartbeat JSONL paths (supports globs). Default: heartbeat/ticks_*.jsonl"
)
parser.add_argument(
"--horizon", type=int, default=DEFAULT_HORIZON_HOURS,
help="Forecast horizon in hours (default: 6)"
)
parser.add_argument(
"--json", action="store_true",
help="Output raw JSON instead of markdown"
)
args = parser.parse_args()
metrics_paths = args.metrics or _default_paths(DEFAULT_METRICS_GLOB)
heartbeat_paths = args.heartbeat or _default_paths(DEFAULT_HEARTBEAT_GLOB)
fc = forecast(metrics_paths, heartbeat_paths, args.horizon)
if args.json:
print(json.dumps(fc, indent=2))
else:
print(format_markdown(fc))
return 0
if __name__ == "__main__":
sys.exit(main())