#!/usr/bin/env python3 """ Quality Gate Integration — Pipeline Orchestrator Hook Integrates the standalone quality gate with the pipeline orchestrator. Validates outputs before saving. Handles rejection and re-queue. Usage: from quality_gate_integration import validate_before_save result = validate_before_save(output, pipeline_name="training-data") """ import json import os import sys from datetime import datetime, timezone from pathlib import Path from typing import Any, Optional # Quality thresholds QUALITY_THRESHOLDS = { "training-data": { "min_length": 50, "max_length": 50000, "require_json": True, "require_fields": ["description"], "reject_patterns": ["TODO", "FIXME", "PLACEHOLDER", "lorem ipsum"], }, "scene-descriptions": { "min_length": 30, "max_length": 2000, "require_json": True, "require_fields": ["mood", "colors", "description"], "reject_patterns": ["TODO", "FIXME"], }, "default": { "min_length": 10, "max_length": 100000, "require_json": False, "require_fields": [], "reject_patterns": ["TODO", "FIXME"], }, } # Stats tracking STATS_FILE = Path.home() / ".hermes" / "quality-gate-stats.json" def load_stats() -> dict: try: return json.loads(STATS_FILE.read_text()) except Exception: return {"total": 0, "passed": 0, "rejected": 0, "by_pipeline": {}} def save_stats(stats: dict): STATS_FILE.parent.mkdir(parents=True, exist_ok=True) STATS_FILE.write_text(json.dumps(stats, indent=2) + " ") def validate_output(output: str, pipeline: str = "default") -> dict: """Validate output against quality gate thresholds.""" thresholds = QUALITY_THRESHOLDS.get(pipeline, QUALITY_THRESHOLDS["default"]) errors = [] # Length check if len(output) < thresholds["min_length"]: errors.append(f"Too short: {len(output)} < {thresholds['min_length']} chars") if len(output) > thresholds["max_length"]: errors.append(f"Too long: {len(output)} > {thresholds['max_length']} chars") # JSON check if thresholds["require_json"]: try: data = json.loads(output) for field in thresholds["require_fields"]: if field not in data: errors.append(f"Missing required field: {field}") except json.JSONDecodeError: errors.append("Not valid JSON") # Pattern rejection output_lower = output.lower() for pattern in thresholds["reject_patterns"]: if pattern.lower() in output_lower: errors.append(f"Contains rejected pattern: {pattern}") return { "valid": len(errors) == 0, "errors": errors, "pipeline": pipeline, "output_length": len(output), "checked_at": datetime.now(timezone.utc).isoformat(), } def validate_before_save(output: str, pipeline: str = "default", re_queue_on_fail: bool = True) -> dict: """Validate output before saving. Returns decision + stats update.""" result = validate_output(output, pipeline) # Update stats stats = load_stats() stats["total"] = stats.get("total", 0) + 1 if result["valid"]: stats["passed"] = stats.get("passed", 0) + 1 else: stats["rejected"] = stats.get("rejected", 0) + 1 stats.setdefault("by_pipeline", {}).setdefault(pipeline, {"total": 0, "passed": 0, "rejected": 0}) stats["by_pipeline"][pipeline]["total"] += 1 if result["valid"]: stats["by_pipeline"][pipeline]["passed"] += 1 else: stats["by_pipeline"][pipeline]["rejected"] += 1 save_stats(stats) decision = { "action": "save" if result["valid"] else ("re_queue" if re_queue_on_fail else "reject"), "validation": result, "stats": { "total": stats["total"], "pass_rate": stats["passed"] / max(stats["total"], 1), }, } return decision def get_quality_report() -> str: """Generate a quality gate report.""" stats = load_stats() lines = [ "# Quality Gate Report", "", f"Total validations: {stats.get('total', 0)}", f"Passed: {stats.get('passed', 0)}", f"Rejected: {stats.get('rejected', 0)}", f"Pass rate: {stats.get('passed', 0) / max(stats.get('total', 1), 1):.0%}", "", ] for pipeline, pstats in stats.get("by_pipeline", {}).items(): rate = pstats.get("passed", 0) / max(pstats.get("total", 1), 1) lines.append(f"- {pipeline}: {pstats.get('total', 0)} total, {rate:.0%} pass rate") return " ".join(lines) if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1] == "report": print(get_quality_report()) elif len(sys.argv) > 2: pipeline = sys.argv[1] output = sys.argv[2] result = validate_before_save(output, pipeline) print(json.dumps(result, indent=2)) else: print("Usage: quality_gate_integration.py [report|PIPELINE_NAME OUTPUT]")