411 lines
14 KiB
Python
411 lines
14 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Predictive Resource Allocation — Timmy Foundation Fleet
|
||
|
|
|
||
|
|
Analyzes historical utilization patterns, predicts workload surges,
|
||
|
|
and recommends pre-provisioning actions.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python3 scripts/predictive_resource_allocator.py \
|
||
|
|
--metrics metrics/*.jsonl \
|
||
|
|
--heartbeat heartbeat/*.jsonl \
|
||
|
|
--horizon 6
|
||
|
|
|
||
|
|
# JSON output
|
||
|
|
python3 scripts/predictive_resource_allocator.py --json
|
||
|
|
|
||
|
|
# Quick forecast from default paths
|
||
|
|
python3 scripts/predictive_resource_allocator.py
|
||
|
|
"""
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import glob
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
from collections import Counter, defaultdict
|
||
|
|
from datetime import datetime, timedelta, timezone
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||
|
|
|
||
|
|
|
||
|
|
# ── Constants ────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
SURGE_THRESHOLD = 1.5
|
||
|
|
HEAVY_TOKEN_THRESHOLD = 10000
|
||
|
|
DEFAULT_HORIZON_HOURS = 6
|
||
|
|
DEFAULT_METRICS_GLOB = "metrics/local_*.jsonl"
|
||
|
|
DEFAULT_HEARTBEAT_GLOB = "heartbeat/ticks_*.jsonl"
|
||
|
|
|
||
|
|
SCRIPT_DIR = Path(__file__).resolve().parent
|
||
|
|
ROOT_DIR = SCRIPT_DIR.parent
|
||
|
|
|
||
|
|
|
||
|
|
# ── Data Loading ─────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def _parse_ts(value: str) -> datetime:
|
||
|
|
"""Parse ISO timestamp to UTC datetime."""
|
||
|
|
return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone(timezone.utc)
|
||
|
|
|
||
|
|
|
||
|
|
def load_jsonl(paths: Iterable[str]) -> List[dict]:
|
||
|
|
"""Load JSONL rows from one or more file paths/globs."""
|
||
|
|
rows: List[dict] = []
|
||
|
|
for pattern in paths:
|
||
|
|
for path in glob.glob(pattern):
|
||
|
|
if not os.path.isfile(path):
|
||
|
|
continue
|
||
|
|
with open(path, encoding="utf-8") as f:
|
||
|
|
for line in f:
|
||
|
|
line = line.strip()
|
||
|
|
if line:
|
||
|
|
try:
|
||
|
|
rows.append(json.loads(line))
|
||
|
|
except json.JSONDecodeError:
|
||
|
|
continue
|
||
|
|
return rows
|
||
|
|
|
||
|
|
|
||
|
|
def _default_paths(glob_pattern: str) -> List[str]:
|
||
|
|
"""Resolve a glob pattern relative to project root."""
|
||
|
|
full = os.path.join(ROOT_DIR, glob_pattern)
|
||
|
|
matches = glob.glob(full)
|
||
|
|
return matches if matches else [full]
|
||
|
|
|
||
|
|
|
||
|
|
# ── Time-Series Analysis ─────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def compute_rates(
|
||
|
|
rows: List[dict],
|
||
|
|
horizon_hours: int,
|
||
|
|
) -> Tuple[float, float, float, float, float]:
|
||
|
|
"""
|
||
|
|
Compare recent window vs baseline window.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
(recent_rate, baseline_rate, surge_factor, recent_token_rate, baseline_token_rate)
|
||
|
|
"""
|
||
|
|
if not rows:
|
||
|
|
return 0.0, 0.0, 1.0, 0.0, 0.0
|
||
|
|
|
||
|
|
latest = max(_parse_ts(r["timestamp"]) for r in rows)
|
||
|
|
recent_cutoff = latest - timedelta(hours=horizon_hours)
|
||
|
|
baseline_cutoff = latest - timedelta(hours=horizon_hours * 2)
|
||
|
|
|
||
|
|
recent = [r for r in rows if _parse_ts(r["timestamp"]) >= recent_cutoff]
|
||
|
|
baseline = [
|
||
|
|
r for r in rows
|
||
|
|
if baseline_cutoff <= _parse_ts(r["timestamp"]) < recent_cutoff
|
||
|
|
]
|
||
|
|
|
||
|
|
recent_rate = len(recent) / max(horizon_hours, 1)
|
||
|
|
baseline_rate = (
|
||
|
|
len(baseline) / max(horizon_hours, 1)
|
||
|
|
if baseline
|
||
|
|
else max(0.1, recent_rate)
|
||
|
|
)
|
||
|
|
|
||
|
|
recent_tokens = sum(int(r.get("prompt_len", 0)) for r in recent)
|
||
|
|
baseline_tokens = sum(int(r.get("prompt_len", 0)) for r in baseline)
|
||
|
|
recent_token_rate = recent_tokens / max(horizon_hours, 1)
|
||
|
|
baseline_token_rate = (
|
||
|
|
baseline_tokens / max(horizon_hours, 1)
|
||
|
|
if baseline
|
||
|
|
else max(1.0, recent_token_rate)
|
||
|
|
)
|
||
|
|
|
||
|
|
request_surge = recent_rate / max(baseline_rate, 0.01)
|
||
|
|
token_surge = recent_token_rate / max(baseline_token_rate, 0.01)
|
||
|
|
surge_factor = max(request_surge, token_surge)
|
||
|
|
|
||
|
|
return recent_rate, baseline_rate, surge_factor, recent_token_rate, baseline_token_rate
|
||
|
|
|
||
|
|
|
||
|
|
def analyze_callers(rows: List[dict], horizon_hours: int) -> List[Dict[str, Any]]:
|
||
|
|
"""Summarize callers in the recent window."""
|
||
|
|
if not rows:
|
||
|
|
return []
|
||
|
|
|
||
|
|
latest = max(_parse_ts(r["timestamp"]) for r in rows)
|
||
|
|
cutoff = latest - timedelta(hours=horizon_hours)
|
||
|
|
|
||
|
|
calls: Counter = Counter()
|
||
|
|
tokens: Counter = Counter()
|
||
|
|
failures: Counter = Counter()
|
||
|
|
|
||
|
|
for row in rows:
|
||
|
|
ts = _parse_ts(row["timestamp"])
|
||
|
|
if ts < cutoff:
|
||
|
|
continue
|
||
|
|
caller = row.get("caller", "unknown")
|
||
|
|
calls[caller] += 1
|
||
|
|
tokens[caller] += int(row.get("prompt_len", 0))
|
||
|
|
if not row.get("success", True):
|
||
|
|
failures[caller] += 1
|
||
|
|
|
||
|
|
summary = []
|
||
|
|
for caller in calls:
|
||
|
|
summary.append({
|
||
|
|
"caller": caller,
|
||
|
|
"requests": calls[caller],
|
||
|
|
"prompt_tokens": tokens[caller],
|
||
|
|
"failures": failures[caller],
|
||
|
|
"failure_rate": round(failures[caller] / max(calls[caller], 1) * 100, 1),
|
||
|
|
})
|
||
|
|
|
||
|
|
summary.sort(key=lambda x: (-x["requests"], -x["prompt_tokens"]))
|
||
|
|
return summary
|
||
|
|
|
||
|
|
|
||
|
|
def analyze_heartbeat(rows: List[dict], horizon_hours: int) -> Dict[str, int]:
|
||
|
|
"""Count infrastructure risks in recent window."""
|
||
|
|
if not rows:
|
||
|
|
return {"gitea_outages": 0, "inference_failures": 0, "total_checks": 0}
|
||
|
|
|
||
|
|
latest = max(_parse_ts(r["timestamp"]) for r in rows)
|
||
|
|
cutoff = latest - timedelta(hours=horizon_hours)
|
||
|
|
|
||
|
|
gitea_outages = 0
|
||
|
|
inference_failures = 0
|
||
|
|
total = 0
|
||
|
|
|
||
|
|
for row in rows:
|
||
|
|
ts = _parse_ts(row["timestamp"])
|
||
|
|
if ts < cutoff:
|
||
|
|
continue
|
||
|
|
total += 1
|
||
|
|
perception = row.get("perception", {})
|
||
|
|
if perception.get("gitea_alive") is False:
|
||
|
|
gitea_outages += 1
|
||
|
|
model_health = perception.get("model_health", {})
|
||
|
|
if model_health.get("inference_ok") is False:
|
||
|
|
inference_failures += 1
|
||
|
|
|
||
|
|
return {
|
||
|
|
"gitea_outages": gitea_outages,
|
||
|
|
"inference_failures": inference_failures,
|
||
|
|
"total_checks": total,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
# ── Prediction Engine ────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def predict_demand(
|
||
|
|
recent_rate: float,
|
||
|
|
baseline_rate: float,
|
||
|
|
surge_factor: float,
|
||
|
|
horizon_hours: int,
|
||
|
|
) -> Dict[str, Any]:
|
||
|
|
"""Predict near-term resource demand."""
|
||
|
|
predicted_rate = round(
|
||
|
|
max(recent_rate, baseline_rate * max(1.0, surge_factor * 0.75)), 2
|
||
|
|
)
|
||
|
|
|
||
|
|
if surge_factor > 3.0:
|
||
|
|
demand_level = "critical"
|
||
|
|
elif surge_factor > SURGE_THRESHOLD:
|
||
|
|
demand_level = "elevated"
|
||
|
|
elif surge_factor > 1.0:
|
||
|
|
demand_level = "normal"
|
||
|
|
else:
|
||
|
|
demand_level = "low"
|
||
|
|
|
||
|
|
return {
|
||
|
|
"predicted_requests_per_hour": predicted_rate,
|
||
|
|
"surge_factor": round(surge_factor, 2),
|
||
|
|
"demand_level": demand_level,
|
||
|
|
"horizon_hours": horizon_hours,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def determine_posture(
|
||
|
|
surge_factor: float,
|
||
|
|
callers: List[Dict[str, Any]],
|
||
|
|
heartbeat: Dict[str, int],
|
||
|
|
) -> Tuple[str, str, List[str]]:
|
||
|
|
"""
|
||
|
|
Determine fleet posture and recommended actions.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
(resource_mode, dispatch_posture, actions)
|
||
|
|
"""
|
||
|
|
mode = "steady"
|
||
|
|
posture = "normal"
|
||
|
|
actions: List[str] = []
|
||
|
|
|
||
|
|
# Surge detection
|
||
|
|
if surge_factor > SURGE_THRESHOLD:
|
||
|
|
mode = "surge"
|
||
|
|
actions.append(
|
||
|
|
"Pre-warm local inference before the next forecast window."
|
||
|
|
)
|
||
|
|
|
||
|
|
# Heavy background callers
|
||
|
|
heavy = [
|
||
|
|
c for c in callers
|
||
|
|
if c["prompt_tokens"] >= HEAVY_TOKEN_THRESHOLD
|
||
|
|
and ("batch" in c["caller"] or "know-thy-father" in c["caller"])
|
||
|
|
]
|
||
|
|
if heavy:
|
||
|
|
actions.append(
|
||
|
|
"Throttle or defer large background jobs until off-peak capacity is available."
|
||
|
|
)
|
||
|
|
|
||
|
|
# Caller failure rates
|
||
|
|
failing = [c for c in callers if c["failure_rate"] > 20 and c["requests"] >= 3]
|
||
|
|
if failing:
|
||
|
|
names = ", ".join(c["caller"] for c in failing[:3])
|
||
|
|
actions.append(
|
||
|
|
f"Investigate high failure rates in: {names}."
|
||
|
|
)
|
||
|
|
|
||
|
|
# Inference health
|
||
|
|
if heartbeat["inference_failures"] >= 2:
|
||
|
|
mode = "surge"
|
||
|
|
actions.append(
|
||
|
|
"Investigate local model reliability and reserve headroom for heartbeat traffic."
|
||
|
|
)
|
||
|
|
|
||
|
|
# Forge availability
|
||
|
|
if heartbeat["gitea_outages"] >= 1:
|
||
|
|
posture = "degraded"
|
||
|
|
actions.append(
|
||
|
|
"Pre-fetch or cache forge state before the next dispatch window."
|
||
|
|
)
|
||
|
|
|
||
|
|
if not actions:
|
||
|
|
actions.append(
|
||
|
|
"Maintain current resource allocation; no surge indicators detected."
|
||
|
|
)
|
||
|
|
|
||
|
|
return mode, posture, actions
|
||
|
|
|
||
|
|
|
||
|
|
# ── Main Forecast ────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def forecast(
|
||
|
|
metrics_paths: List[str],
|
||
|
|
heartbeat_paths: List[str],
|
||
|
|
horizon_hours: int = DEFAULT_HORIZON_HOURS,
|
||
|
|
) -> Dict[str, Any]:
|
||
|
|
"""Full resource forecast from metric and heartbeat logs."""
|
||
|
|
metric_rows = load_jsonl(metrics_paths)
|
||
|
|
heartbeat_rows = load_jsonl(heartbeat_paths)
|
||
|
|
|
||
|
|
recent_rate, baseline_rate, surge_factor, recent_tok_rate, base_tok_rate = (
|
||
|
|
compute_rates(metric_rows, horizon_hours)
|
||
|
|
)
|
||
|
|
callers = analyze_callers(metric_rows, horizon_hours)
|
||
|
|
heartbeat = analyze_heartbeat(heartbeat_rows, horizon_hours)
|
||
|
|
demand = predict_demand(recent_rate, baseline_rate, surge_factor, horizon_hours)
|
||
|
|
mode, posture, actions = determine_posture(surge_factor, callers, heartbeat)
|
||
|
|
|
||
|
|
return {
|
||
|
|
"resource_mode": mode,
|
||
|
|
"dispatch_posture": posture,
|
||
|
|
"horizon_hours": horizon_hours,
|
||
|
|
"recent_request_rate": round(recent_rate, 2),
|
||
|
|
"baseline_request_rate": round(baseline_rate, 2),
|
||
|
|
"predicted_request_rate": demand["predicted_requests_per_hour"],
|
||
|
|
"surge_factor": demand["surge_factor"],
|
||
|
|
"demand_level": demand["demand_level"],
|
||
|
|
"recent_prompt_tokens_per_hour": round(recent_tok_rate, 2),
|
||
|
|
"baseline_prompt_tokens_per_hour": round(base_tok_rate, 2),
|
||
|
|
"gitea_outages": heartbeat["gitea_outages"],
|
||
|
|
"inference_failures": heartbeat["inference_failures"],
|
||
|
|
"heartbeat_checks": heartbeat["total_checks"],
|
||
|
|
"top_callers": callers[:10],
|
||
|
|
"recommended_actions": actions,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
# ── Output Formatters ────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def format_markdown(fc: Dict[str, Any]) -> str:
|
||
|
|
"""Format forecast as markdown report."""
|
||
|
|
lines = [
|
||
|
|
"# Predictive Resource Allocation — Fleet Forecast",
|
||
|
|
"",
|
||
|
|
f"**Horizon:** {fc['horizon_hours']} hours",
|
||
|
|
f"**Resource mode:** {fc['resource_mode']}",
|
||
|
|
f"**Dispatch posture:** {fc['dispatch_posture']}",
|
||
|
|
f"**Demand level:** {fc['demand_level']}",
|
||
|
|
"",
|
||
|
|
"## Demand Metrics",
|
||
|
|
"",
|
||
|
|
f"| Metric | Recent | Baseline |",
|
||
|
|
f"|--------|-------:|---------:|",
|
||
|
|
f"| Requests/hour | {fc['recent_request_rate']} | {fc['baseline_request_rate']} |",
|
||
|
|
f"| Prompt tokens/hour | {fc['recent_prompt_tokens_per_hour']} | {fc['baseline_prompt_tokens_per_hour']} |",
|
||
|
|
"",
|
||
|
|
f"**Surge factor:** {fc['surge_factor']}x",
|
||
|
|
f"**Predicted request rate:** {fc['predicted_request_rate']}/hour",
|
||
|
|
"",
|
||
|
|
"## Infrastructure Health",
|
||
|
|
"",
|
||
|
|
f"- Gitea outages (recent window): {fc['gitea_outages']}",
|
||
|
|
f"- Inference failures (recent window): {fc['inference_failures']}",
|
||
|
|
f"- Heartbeat checks analyzed: {fc['heartbeat_checks']}",
|
||
|
|
"",
|
||
|
|
"## Recommended Actions",
|
||
|
|
"",
|
||
|
|
]
|
||
|
|
for action in fc["recommended_actions"]:
|
||
|
|
lines.append(f"- {action}")
|
||
|
|
|
||
|
|
if fc["top_callers"]:
|
||
|
|
lines.extend([
|
||
|
|
"",
|
||
|
|
"## Top Callers (Recent Window)",
|
||
|
|
"",
|
||
|
|
"| Caller | Requests | Tokens | Failures |",
|
||
|
|
"|--------|---------:|-------:|---------:|",
|
||
|
|
])
|
||
|
|
for c in fc["top_callers"]:
|
||
|
|
lines.append(
|
||
|
|
f"| {c['caller']} | {c['requests']} | {c['prompt_tokens']} | {c['failures']} |"
|
||
|
|
)
|
||
|
|
|
||
|
|
return "\n".join(lines) + "\n"
|
||
|
|
|
||
|
|
|
||
|
|
# ── CLI ──────────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
def main() -> int:
|
||
|
|
parser = argparse.ArgumentParser(
|
||
|
|
description="Predictive resource allocation for the Timmy fleet"
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--metrics", nargs="*", default=None,
|
||
|
|
help="Metric JSONL paths (supports globs). Default: metrics/local_*.jsonl"
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--heartbeat", nargs="*", default=None,
|
||
|
|
help="Heartbeat JSONL paths (supports globs). Default: heartbeat/ticks_*.jsonl"
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--horizon", type=int, default=DEFAULT_HORIZON_HOURS,
|
||
|
|
help="Forecast horizon in hours (default: 6)"
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--json", action="store_true",
|
||
|
|
help="Output raw JSON instead of markdown"
|
||
|
|
)
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
metrics_paths = args.metrics or _default_paths(DEFAULT_METRICS_GLOB)
|
||
|
|
heartbeat_paths = args.heartbeat or _default_paths(DEFAULT_HEARTBEAT_GLOB)
|
||
|
|
|
||
|
|
fc = forecast(metrics_paths, heartbeat_paths, args.horizon)
|
||
|
|
|
||
|
|
if args.json:
|
||
|
|
print(json.dumps(fc, indent=2))
|
||
|
|
else:
|
||
|
|
print(format_markdown(fc))
|
||
|
|
|
||
|
|
return 0
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
sys.exit(main())
|