From 6a9d232142ec165e64b1c6b2ac0530001c509e8a Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Thu, 16 Apr 2026 00:54:18 +0000 Subject: [PATCH] feat: Quality gate integration with pipeline orchestrator (#627) Validate before save, handle rejection/re-queue, pass statistics. Closes #627. --- scripts/quality_gate_integration.py | 158 ++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 scripts/quality_gate_integration.py diff --git a/scripts/quality_gate_integration.py b/scripts/quality_gate_integration.py new file mode 100644 index 00000000..0fe8b058 --- /dev/null +++ b/scripts/quality_gate_integration.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +""" +Quality Gate Integration — Pipeline Orchestrator Hook + +Integrates the standalone quality gate with the pipeline orchestrator. +Validates outputs before saving. Handles rejection and re-queue. + +Usage: + from quality_gate_integration import validate_before_save + result = validate_before_save(output, pipeline_name="training-data") +""" + +import json +import os +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Optional + +# Quality thresholds +QUALITY_THRESHOLDS = { + "training-data": { + "min_length": 50, + "max_length": 50000, + "require_json": True, + "require_fields": ["description"], + "reject_patterns": ["TODO", "FIXME", "PLACEHOLDER", "lorem ipsum"], + }, + "scene-descriptions": { + "min_length": 30, + "max_length": 2000, + "require_json": True, + "require_fields": ["mood", "colors", "description"], + "reject_patterns": ["TODO", "FIXME"], + }, + "default": { + "min_length": 10, + "max_length": 100000, + "require_json": False, + "require_fields": [], + "reject_patterns": ["TODO", "FIXME"], + }, +} + +# Stats tracking +STATS_FILE = Path.home() / ".hermes" / "quality-gate-stats.json" + + +def load_stats() -> dict: + try: + return json.loads(STATS_FILE.read_text()) + except Exception: + return {"total": 0, "passed": 0, "rejected": 0, "by_pipeline": {}} + + +def save_stats(stats: dict): + STATS_FILE.parent.mkdir(parents=True, exist_ok=True) + STATS_FILE.write_text(json.dumps(stats, indent=2) + " +") + + +def validate_output(output: str, pipeline: str = "default") -> dict: + """Validate output against quality gate thresholds.""" + thresholds = QUALITY_THRESHOLDS.get(pipeline, QUALITY_THRESHOLDS["default"]) + errors = [] + + # Length check + if len(output) < thresholds["min_length"]: + errors.append(f"Too short: {len(output)} < {thresholds['min_length']} chars") + if len(output) > thresholds["max_length"]: + errors.append(f"Too long: {len(output)} > {thresholds['max_length']} chars") + + # JSON check + if thresholds["require_json"]: + try: + data = json.loads(output) + for field in thresholds["require_fields"]: + if field not in data: + errors.append(f"Missing required field: {field}") + except json.JSONDecodeError: + errors.append("Not valid JSON") + + # Pattern rejection + output_lower = output.lower() + for pattern in thresholds["reject_patterns"]: + if pattern.lower() in output_lower: + errors.append(f"Contains rejected pattern: {pattern}") + + return { + "valid": len(errors) == 0, + "errors": errors, + "pipeline": pipeline, + "output_length": len(output), + "checked_at": datetime.now(timezone.utc).isoformat(), + } + + +def validate_before_save(output: str, pipeline: str = "default", + re_queue_on_fail: bool = True) -> dict: + """Validate output before saving. Returns decision + stats update.""" + result = validate_output(output, pipeline) + + # Update stats + stats = load_stats() + stats["total"] = stats.get("total", 0) + 1 + if result["valid"]: + stats["passed"] = stats.get("passed", 0) + 1 + else: + stats["rejected"] = stats.get("rejected", 0) + 1 + stats.setdefault("by_pipeline", {}).setdefault(pipeline, {"total": 0, "passed": 0, "rejected": 0}) + stats["by_pipeline"][pipeline]["total"] += 1 + if result["valid"]: + stats["by_pipeline"][pipeline]["passed"] += 1 + else: + stats["by_pipeline"][pipeline]["rejected"] += 1 + save_stats(stats) + + decision = { + "action": "save" if result["valid"] else ("re_queue" if re_queue_on_fail else "reject"), + "validation": result, + "stats": { + "total": stats["total"], + "pass_rate": stats["passed"] / max(stats["total"], 1), + }, + } + + return decision + + +def get_quality_report() -> str: + """Generate a quality gate report.""" + stats = load_stats() + lines = [ + "# Quality Gate Report", + "", + f"Total validations: {stats.get('total', 0)}", + f"Passed: {stats.get('passed', 0)}", + f"Rejected: {stats.get('rejected', 0)}", + f"Pass rate: {stats.get('passed', 0) / max(stats.get('total', 1), 1):.0%}", + "", + ] + for pipeline, pstats in stats.get("by_pipeline", {}).items(): + rate = pstats.get("passed", 0) / max(pstats.get("total", 1), 1) + lines.append(f"- {pipeline}: {pstats.get('total', 0)} total, {rate:.0%} pass rate") + return " +".join(lines) + + +if __name__ == "__main__": + if len(sys.argv) > 1 and sys.argv[1] == "report": + print(get_quality_report()) + elif len(sys.argv) > 2: + pipeline = sys.argv[1] + output = sys.argv[2] + result = validate_before_save(output, pipeline) + print(json.dumps(result, indent=2)) + else: + print("Usage: quality_gate_integration.py [report|PIPELINE_NAME OUTPUT]")