Files
timmy-config/scripts/quality_gate_integration.py

159 lines
5.0 KiB
Python

#!/usr/bin/env python3
"""
Quality Gate Integration — Pipeline Orchestrator Hook
Integrates the standalone quality gate with the pipeline orchestrator.
Validates outputs before saving. Handles rejection and re-queue.
Usage:
from quality_gate_integration import validate_before_save
result = validate_before_save(output, pipeline_name="training-data")
"""
import json
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional
# Quality thresholds
QUALITY_THRESHOLDS = {
"training-data": {
"min_length": 50,
"max_length": 50000,
"require_json": True,
"require_fields": ["description"],
"reject_patterns": ["TODO", "FIXME", "PLACEHOLDER", "lorem ipsum"],
},
"scene-descriptions": {
"min_length": 30,
"max_length": 2000,
"require_json": True,
"require_fields": ["mood", "colors", "description"],
"reject_patterns": ["TODO", "FIXME"],
},
"default": {
"min_length": 10,
"max_length": 100000,
"require_json": False,
"require_fields": [],
"reject_patterns": ["TODO", "FIXME"],
},
}
# Stats tracking
STATS_FILE = Path.home() / ".hermes" / "quality-gate-stats.json"
def load_stats() -> dict:
try:
return json.loads(STATS_FILE.read_text())
except Exception:
return {"total": 0, "passed": 0, "rejected": 0, "by_pipeline": {}}
def save_stats(stats: dict):
STATS_FILE.parent.mkdir(parents=True, exist_ok=True)
STATS_FILE.write_text(json.dumps(stats, indent=2) + "
")
def validate_output(output: str, pipeline: str = "default") -> dict:
"""Validate output against quality gate thresholds."""
thresholds = QUALITY_THRESHOLDS.get(pipeline, QUALITY_THRESHOLDS["default"])
errors = []
# Length check
if len(output) < thresholds["min_length"]:
errors.append(f"Too short: {len(output)} < {thresholds['min_length']} chars")
if len(output) > thresholds["max_length"]:
errors.append(f"Too long: {len(output)} > {thresholds['max_length']} chars")
# JSON check
if thresholds["require_json"]:
try:
data = json.loads(output)
for field in thresholds["require_fields"]:
if field not in data:
errors.append(f"Missing required field: {field}")
except json.JSONDecodeError:
errors.append("Not valid JSON")
# Pattern rejection
output_lower = output.lower()
for pattern in thresholds["reject_patterns"]:
if pattern.lower() in output_lower:
errors.append(f"Contains rejected pattern: {pattern}")
return {
"valid": len(errors) == 0,
"errors": errors,
"pipeline": pipeline,
"output_length": len(output),
"checked_at": datetime.now(timezone.utc).isoformat(),
}
def validate_before_save(output: str, pipeline: str = "default",
re_queue_on_fail: bool = True) -> dict:
"""Validate output before saving. Returns decision + stats update."""
result = validate_output(output, pipeline)
# Update stats
stats = load_stats()
stats["total"] = stats.get("total", 0) + 1
if result["valid"]:
stats["passed"] = stats.get("passed", 0) + 1
else:
stats["rejected"] = stats.get("rejected", 0) + 1
stats.setdefault("by_pipeline", {}).setdefault(pipeline, {"total": 0, "passed": 0, "rejected": 0})
stats["by_pipeline"][pipeline]["total"] += 1
if result["valid"]:
stats["by_pipeline"][pipeline]["passed"] += 1
else:
stats["by_pipeline"][pipeline]["rejected"] += 1
save_stats(stats)
decision = {
"action": "save" if result["valid"] else ("re_queue" if re_queue_on_fail else "reject"),
"validation": result,
"stats": {
"total": stats["total"],
"pass_rate": stats["passed"] / max(stats["total"], 1),
},
}
return decision
def get_quality_report() -> str:
"""Generate a quality gate report."""
stats = load_stats()
lines = [
"# Quality Gate Report",
"",
f"Total validations: {stats.get('total', 0)}",
f"Passed: {stats.get('passed', 0)}",
f"Rejected: {stats.get('rejected', 0)}",
f"Pass rate: {stats.get('passed', 0) / max(stats.get('total', 1), 1):.0%}",
"",
]
for pipeline, pstats in stats.get("by_pipeline", {}).items():
rate = pstats.get("passed", 0) / max(pstats.get("total", 1), 1)
lines.append(f"- {pipeline}: {pstats.get('total', 0)} total, {rate:.0%} pass rate")
return "
".join(lines)
if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "report":
print(get_quality_report())
elif len(sys.argv) > 2:
pipeline = sys.argv[1]
output = sys.argv[2]
result = validate_before_save(output, pipeline)
print(json.dumps(result, indent=2))
else:
print("Usage: quality_gate_integration.py [report|PIPELINE_NAME OUTPUT]")