timmy-config/scripts/quality_gate_integration.py

#!/usr/bin/env python3
"""
Quality Gate Integration — Pipeline Orchestrator Hook

Integrates the standalone quality gate with the pipeline orchestrator.
Validates outputs before saving. Handles rejection and re-queue.

Usage:
    from quality_gate_integration import validate_before_save
    result = validate_before_save(output, pipeline_name="training-data")
"""

import json
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional

# Quality thresholds
QUALITY_THRESHOLDS = {
    "training-data": {
        "min_length": 50,
        "max_length": 50000,
        "require_json": True,
        "require_fields": ["description"],
        "reject_patterns": ["TODO", "FIXME", "PLACEHOLDER", "lorem ipsum"],
    },
    "scene-descriptions": {
        "min_length": 30,
        "max_length": 2000,
        "require_json": True,
        "require_fields": ["mood", "colors", "description"],
        "reject_patterns": ["TODO", "FIXME"],
    },
    "default": {
        "min_length": 10,
        "max_length": 100000,
        "require_json": False,
        "require_fields": [],
        "reject_patterns": ["TODO", "FIXME"],
    },
}

# Stats tracking
STATS_FILE = Path.home() / ".hermes" / "quality-gate-stats.json"


def load_stats() -> dict:
    try:
        return json.loads(STATS_FILE.read_text())
    except Exception:
        return {"total": 0, "passed": 0, "rejected": 0, "by_pipeline": {}}


def save_stats(stats: dict):
    STATS_FILE.parent.mkdir(parents=True, exist_ok=True)
    STATS_FILE.write_text(json.dumps(stats, indent=2) + "
")


def validate_output(output: str, pipeline: str = "default") -> dict:
    """Validate output against quality gate thresholds."""
    thresholds = QUALITY_THRESHOLDS.get(pipeline, QUALITY_THRESHOLDS["default"])
    errors = []

    # Length check
    if len(output) < thresholds["min_length"]:
        errors.append(f"Too short: {len(output)} < {thresholds['min_length']} chars")
    if len(output) > thresholds["max_length"]:
        errors.append(f"Too long: {len(output)} > {thresholds['max_length']} chars")

    # JSON check
    if thresholds["require_json"]:
        try:
            data = json.loads(output)
            for field in thresholds["require_fields"]:
                if field not in data:
                    errors.append(f"Missing required field: {field}")
        except json.JSONDecodeError:
            errors.append("Not valid JSON")

    # Pattern rejection
    output_lower = output.lower()
    for pattern in thresholds["reject_patterns"]:
        if pattern.lower() in output_lower:
            errors.append(f"Contains rejected pattern: {pattern}")

    return {
        "valid": len(errors) == 0,
        "errors": errors,
        "pipeline": pipeline,
        "output_length": len(output),
        "checked_at": datetime.now(timezone.utc).isoformat(),
    }


def validate_before_save(output: str, pipeline: str = "default",
                         re_queue_on_fail: bool = True) -> dict:
    """Validate output before saving. Returns decision + stats update."""
    result = validate_output(output, pipeline)

    # Update stats
    stats = load_stats()
    stats["total"] = stats.get("total", 0) + 1
    if result["valid"]:
        stats["passed"] = stats.get("passed", 0) + 1
    else:
        stats["rejected"] = stats.get("rejected", 0) + 1
    stats.setdefault("by_pipeline", {}).setdefault(pipeline, {"total": 0, "passed": 0, "rejected": 0})
    stats["by_pipeline"][pipeline]["total"] += 1
    if result["valid"]:
        stats["by_pipeline"][pipeline]["passed"] += 1
    else:
        stats["by_pipeline"][pipeline]["rejected"] += 1
    save_stats(stats)

    decision = {
        "action": "save" if result["valid"] else ("re_queue" if re_queue_on_fail else "reject"),
        "validation": result,
        "stats": {
            "total": stats["total"],
            "pass_rate": stats["passed"] / max(stats["total"], 1),
        },
    }

    return decision


def get_quality_report() -> str:
    """Generate a quality gate report."""
    stats = load_stats()
    lines = [
        "# Quality Gate Report",
        "",
        f"Total validations: {stats.get('total', 0)}",
        f"Passed: {stats.get('passed', 0)}",
        f"Rejected: {stats.get('rejected', 0)}",
        f"Pass rate: {stats.get('passed', 0) / max(stats.get('total', 1), 1):.0%}",
        "",
    ]
    for pipeline, pstats in stats.get("by_pipeline", {}).items():
        rate = pstats.get("passed", 0) / max(pstats.get("total", 1), 1)
        lines.append(f"- {pipeline}: {pstats.get('total', 0)} total, {rate:.0%} pass rate")
    return "
".join(lines)


if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == "report":
        print(get_quality_report())
    elif len(sys.argv) > 2:
        pipeline = sys.argv[1]
        output = sys.argv[2]
        result = validate_before_save(output, pipeline)
        print(json.dumps(result, indent=2))
    else:
        print("Usage: quality_gate_integration.py [report|PIPELINE_NAME OUTPUT]")