|
|
|
|
@@ -1,3 +1,4 @@
|
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
Soul Eval Gate — The Conscience of the Training Pipeline
|
|
|
|
|
|
|
|
|
|
@@ -25,15 +26,18 @@ Usage:
|
|
|
|
|
result = evaluate_candidate(scores_path, baseline_path, candidate_id)
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import glob
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── Gate File Rotation ──────────────────────────────────────────────
|
|
|
|
|
GATE_FILE_MAX_AGE_DAYS = 7
|
|
|
|
|
GATE_FILE_MAX_COUNT = 50
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── SOUL.md Constraints ──────────────────────────────────────────────
|
|
|
|
|
#
|
|
|
|
|
# These are the non-negotiable categories from SOUL.md and the
|
|
|
|
|
@@ -65,10 +69,6 @@ MAX_METRIC_REGRESSION = -0.15
|
|
|
|
|
# Default paths
|
|
|
|
|
DEFAULT_GATE_DIR = Path.home() / ".timmy" / "training-data" / "eval-gates"
|
|
|
|
|
|
|
|
|
|
# Gate file rotation settings (fixes #628: hash dedup growth)
|
|
|
|
|
GATE_FILE_MAX_AGE_DAYS = 7 # Delete gate files older than this
|
|
|
|
|
GATE_FILE_MAX_COUNT = 50 # Keep at most this many gate files (excluding latest)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def evaluate_candidate(
|
|
|
|
|
scores_path: str | Path,
|
|
|
|
|
@@ -245,7 +245,7 @@ def evaluate_candidate(
|
|
|
|
|
latest_file = gate_dir / "eval_gate_latest.json"
|
|
|
|
|
latest_file.write_text(json.dumps(result, indent=2))
|
|
|
|
|
|
|
|
|
|
# Rotate old gate files to prevent unbounded growth (#628)
|
|
|
|
|
# Rotate old gate files to prevent unbounded growth
|
|
|
|
|
_rotate_gate_files(gate_dir)
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
@@ -257,6 +257,48 @@ def _load_json(path: str | Path) -> dict:
|
|
|
|
|
return json.loads(Path(path).read_text())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _rotate_gate_files(gate_dir: Path) -> None:
|
|
|
|
|
"""Clean up old gate files to prevent unbounded directory growth.
|
|
|
|
|
|
|
|
|
|
- Deletes files older than GATE_FILE_MAX_AGE_DAYS
|
|
|
|
|
- Caps total count at GATE_FILE_MAX_COUNT (oldest first)
|
|
|
|
|
- Always preserves eval_gate_latest.json
|
|
|
|
|
"""
|
|
|
|
|
if not gate_dir.exists():
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
latest_name = "eval_gate_latest.json"
|
|
|
|
|
cutoff = datetime.now(timezone.utc).timestamp() - (GATE_FILE_MAX_AGE_DAYS * 86400)
|
|
|
|
|
|
|
|
|
|
gate_files = []
|
|
|
|
|
for f in gate_dir.iterdir():
|
|
|
|
|
if f.name == latest_name or not f.name.startswith("eval_gate_") or f.suffix != ".json":
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
mtime = f.stat().st_mtime
|
|
|
|
|
except OSError:
|
|
|
|
|
continue
|
|
|
|
|
gate_files.append((mtime, f))
|
|
|
|
|
|
|
|
|
|
# Sort oldest first
|
|
|
|
|
gate_files.sort(key=lambda x: x[0])
|
|
|
|
|
|
|
|
|
|
deleted = 0
|
|
|
|
|
for mtime, f in gate_files:
|
|
|
|
|
should_delete = False
|
|
|
|
|
if mtime < cutoff:
|
|
|
|
|
should_delete = True
|
|
|
|
|
elif len(gate_files) - deleted > GATE_FILE_MAX_COUNT:
|
|
|
|
|
should_delete = True
|
|
|
|
|
|
|
|
|
|
if should_delete:
|
|
|
|
|
try:
|
|
|
|
|
f.unlink()
|
|
|
|
|
deleted += 1
|
|
|
|
|
except OSError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _find_category_score(
|
|
|
|
|
sessions: dict[str, dict],
|
|
|
|
|
category: str,
|
|
|
|
|
@@ -296,58 +338,6 @@ def _find_category_score(
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _rotate_gate_files(gate_dir: Path) -> int:
|
|
|
|
|
"""Rotate and clean up old eval gate files.
|
|
|
|
|
|
|
|
|
|
Prevents unbounded growth of the gate file directory by:
|
|
|
|
|
1. Deleting files older than GATE_FILE_MAX_AGE_DAYS
|
|
|
|
|
2. Keeping at most GATE_FILE_MAX_COUNT historical files
|
|
|
|
|
3. Always preserving eval_gate_latest.json
|
|
|
|
|
|
|
|
|
|
Returns the number of files deleted.
|
|
|
|
|
"""
|
|
|
|
|
if not gate_dir.exists():
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
deleted = 0
|
|
|
|
|
now = datetime.now(timezone.utc)
|
|
|
|
|
cutoff = now - timedelta(days=GATE_FILE_MAX_AGE_DAYS)
|
|
|
|
|
|
|
|
|
|
# Find all eval_gate_*.json files, excluding latest
|
|
|
|
|
pattern = str(gate_dir / "eval_gate_*.json")
|
|
|
|
|
all_files = glob.glob(pattern)
|
|
|
|
|
gate_files = [f for f in all_files if not f.endswith("eval_gate_latest.json")]
|
|
|
|
|
|
|
|
|
|
# Sort by modification time (oldest first)
|
|
|
|
|
gate_files.sort(key=lambda f: os.path.getmtime(f))
|
|
|
|
|
|
|
|
|
|
for filepath in gate_files:
|
|
|
|
|
try:
|
|
|
|
|
mtime = datetime.fromtimestamp(os.path.getmtime(filepath), tz=timezone.utc)
|
|
|
|
|
|
|
|
|
|
# Delete if older than max age
|
|
|
|
|
if mtime < cutoff:
|
|
|
|
|
os.remove(filepath)
|
|
|
|
|
deleted += 1
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
except OSError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Enforce max count (delete oldest first)
|
|
|
|
|
remaining = [f for f in gate_files if os.path.exists(f)]
|
|
|
|
|
if len(remaining) > GATE_FILE_MAX_COUNT:
|
|
|
|
|
excess = remaining[:len(remaining) - GATE_FILE_MAX_COUNT]
|
|
|
|
|
for filepath in excess:
|
|
|
|
|
try:
|
|
|
|
|
os.remove(filepath)
|
|
|
|
|
deleted += 1
|
|
|
|
|
except OSError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
return deleted
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ── CLI ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|