timmy-home/tests/test_predictive_resource_allocator.py

"""Tests for predictive resource allocation."""
import json
import os
import sys
from pathlib import Path

import pytest

SCRIPT_DIR = Path(__file__).resolve().parent.parent / "scripts"
sys.path.insert(0, str(SCRIPT_DIR))

from predictive_resource_allocator import (
    _parse_ts,
    compute_rates,
    analyze_callers,
    analyze_heartbeat,
    predict_demand,
    determine_posture,
    forecast,
    format_markdown,
    load_jsonl,
)


def _write_jsonl(path: Path, rows: list):
    with open(path, "w") as f:
        for row in rows:
            f.write(json.dumps(row) + "\n")


def _make_metrics(count: int, base_hour: int = 0, caller: str = "heartbeat_tick",
                  prompt_len: int = 1000, success: bool = True) -> list:
    rows = []
    for i in range(count):
        rows.append({
            "timestamp": f"2026-03-29T{base_hour + i // 60:02d}:{i % 60:02d}:00+00:00",
            "caller": caller,
            "prompt_len": prompt_len,
            "response_len": 50,
            "success": success,
        })
    return rows


def _make_heartbeat(count: int, base_hour: int = 0,
                    gitea_alive: bool = True, inference_ok: bool = True) -> list:
    rows = []
    for i in range(count):
        rows.append({
            "timestamp": f"2026-03-29T{base_hour + i:02d}:00:00+00:00",
            "perception": {
                "gitea_alive": gitea_alive,
                "model_health": {"inference_ok": inference_ok},
            },
        })
    return rows


# ── Timestamp Parsing ────────────────────────────────────────────────────────

class TestTimestampParsing:
    def test_z_suffix(self):
        dt = _parse_ts("2026-03-29T12:00:00Z")
        assert dt.tzinfo is not None

    def test_explicit_offset(self):
        dt = _parse_ts("2026-03-29T12:00:00+00:00")
        assert dt.hour == 12

    def test_ordering(self):
        earlier = _parse_ts("2026-03-29T10:00:00Z")
        later = _parse_ts("2026-03-29T12:00:00Z")
        assert earlier < later


# ── Rate Computation ─────────────────────────────────────────────────────────

class TestComputeRates:
    def test_empty_returns_defaults(self):
        r_rate, b_rate, surge, _, _ = compute_rates([], 6)
        assert r_rate == 0.0
        assert surge == 1.0

    def test_surge_detected(self):
        # 1 baseline req, 20 recent reqs
        baseline = _make_metrics(1, base_hour=0)
        recent = _make_metrics(20, base_hour=12)
        rows = baseline + recent

        _, _, surge, _, _ = compute_rates(rows, horizon_hours=6)
        assert surge > 1.0

    def test_no_surge_when_stable(self):
        # Same rate in both windows
        early = _make_metrics(6, base_hour=0)
        late = _make_metrics(6, base_hour=12)
        rows = early + late

        _, _, surge, _, _ = compute_rates(rows, horizon_hours=6)
        assert surge < 1.5

    def test_falls_back_to_prior_activity_when_previous_window_is_empty(self):
        baseline = _make_metrics(3, base_hour=0)
        recent = _make_metrics(6, base_hour=12)
        rows = baseline + recent

        recent_rate, baseline_rate, surge, _, _ = compute_rates(rows, horizon_hours=6)

        assert recent_rate == 1.0
        assert baseline_rate == 0.5
        assert surge == 2.0


# ── Caller Analysis ──────────────────────────────────────────────────────────

class TestAnalyzeCallers:
    def test_empty(self):
        assert analyze_callers([], 6) == []

    def test_groups_by_caller(self):
        rows = _make_metrics(3, caller="heartbeat_tick") +                _make_metrics(2, caller="know-thy-father", prompt_len=15000)
        callers = analyze_callers(rows, horizon_hours=24)
        names = [c["caller"] for c in callers]
        assert "heartbeat_tick" in names
        assert "know-thy-father" in names

    def test_sorted_by_request_count(self):
        rows = _make_metrics(1, caller="rare") +                _make_metrics(10, caller="frequent")
        callers = analyze_callers(rows, horizon_hours=24)
        assert callers[0]["caller"] == "frequent"

    def test_failure_rate(self):
        rows = _make_metrics(10, caller="flaky", success=False)
        callers = analyze_callers(rows, horizon_hours=24)
        flaky = [c for c in callers if c["caller"] == "flaky"][0]
        assert flaky["failure_rate"] == 100.0


# ── Heartbeat Analysis ───────────────────────────────────────────────────────

class TestAnalyzeHeartbeat:
    def test_empty(self):
        result = analyze_heartbeat([], 6)
        assert result["gitea_outages"] == 0

    def test_detects_gitea_outage(self):
        rows = _make_heartbeat(3, gitea_alive=False)
        result = analyze_heartbeat(rows, horizon_hours=24)
        assert result["gitea_outages"] == 3

    def test_detects_inference_failure(self):
        rows = _make_heartbeat(2, inference_ok=False)
        result = analyze_heartbeat(rows, horizon_hours=24)
        assert result["inference_failures"] == 2


# ── Demand Prediction ────────────────────────────────────────────────────────

class TestPredictDemand:
    def test_critical_on_extreme_surge(self):
        result = predict_demand(100.0, 10.0, 10.0, 6)
        assert result["demand_level"] == "critical"

    def test_elevated_on_moderate_surge(self):
        result = predict_demand(50.0, 10.0, 2.0, 6)
        assert result["demand_level"] == "elevated"

    def test_normal_on_slight_increase(self):
        result = predict_demand(12.0, 10.0, 1.2, 6)
        assert result["demand_level"] == "normal"

    def test_low_when_decreasing(self):
        result = predict_demand(5.0, 10.0, 0.5, 6)
        assert result["demand_level"] == "low"


# ── Posture Determination ────────────────────────────────────────────────────

class TestDeterminePosture:
    def test_steady_normal_when_no_issues(self):
        mode, posture, actions = determine_posture(1.0, [], {"gitea_outages": 0, "inference_failures": 0, "total_checks": 5})
        assert mode == "steady"
        assert posture == "normal"
        assert "no surge indicators" in actions[0]

    def test_surge_on_high_factor(self):
        mode, posture, actions = determine_posture(2.0, [], {"gitea_outages": 0, "inference_failures": 0, "total_checks": 5})
        assert mode == "surge"
        assert any("Pre-warm" in a for a in actions)

    def test_degraded_on_gitea_outage(self):
        mode, posture, actions = determine_posture(1.0, [], {"gitea_outages": 3, "inference_failures": 0, "total_checks": 5})
        assert posture == "degraded"
        assert any("forge state" in a for a in actions)

    def test_heavy_background_flagged(self):
        callers = [{"caller": "know-thy-father-batch", "requests": 5, "prompt_tokens": 50000, "failures": 0, "failure_rate": 0}]
        _, _, actions = determine_posture(1.0, callers, {"gitea_outages": 0, "inference_failures": 0, "total_checks": 5})
        assert any("Throttle" in a or "background" in a for a in actions)

    def test_failing_callers_flagged(self):
        callers = [{"caller": "bad_actor", "requests": 10, "prompt_tokens": 1000, "failures": 5, "failure_rate": 50.0}]
        _, _, actions = determine_posture(1.0, callers, {"gitea_outages": 0, "inference_failures": 0, "total_checks": 5})
        assert any("failure rate" in a.lower() for a in actions)


# ── Full Forecast ────────────────────────────────────────────────────────────

class TestForecast:
    def test_end_to_end(self, tmp_path):
        metrics_path = tmp_path / "metrics.jsonl"
        heartbeat_path = tmp_path / "heartbeat.jsonl"

        _write_jsonl(metrics_path, _make_metrics(6, base_hour=0) + _make_metrics(30, base_hour=12))
        _write_jsonl(heartbeat_path, _make_heartbeat(5, base_hour=8, inference_ok=False))

        result = forecast([str(metrics_path)], [str(heartbeat_path)], horizon_hours=6)

        assert "resource_mode" in result
        assert "dispatch_posture" in result
        assert "surge_factor" in result
        assert "top_callers" in result
        assert "recommended_actions" in result
        assert isinstance(result["top_callers"], list)
        assert isinstance(result["recommended_actions"], list)

    def test_empty_inputs(self, tmp_path):
        metrics_path = tmp_path / "empty_m.jsonl"
        heartbeat_path = tmp_path / "empty_h.jsonl"
        metrics_path.write_text("")
        heartbeat_path.write_text("")

        result = forecast([str(metrics_path)], [str(heartbeat_path)], horizon_hours=6)
        assert result["resource_mode"] == "steady"
        assert result["surge_factor"] == 1.0


# ── Markdown Output ──────────────────────────────────────────────────────────

class TestFormatMarkdown:
    def test_contains_key_sections(self):
        fc = forecast([], [], horizon_hours=6)
        md = format_markdown(fc)
        assert "Predictive Resource Allocation" in md
        assert "Demand Metrics" in md
        assert "Recommended Actions" in md
        assert "Horizon" in md