From 6a9d232142ec165e64b1c6b2ac0530001c509e8a Mon Sep 17 00:00:00 2001
From: Alexander Whitestone <alexander@alexanderwhitestone.com>
Date: Thu, 16 Apr 2026 00:54:18 +0000
Subject: [PATCH] feat: Quality gate integration with pipeline orchestrator
 (#627)

Validate before save, handle rejection/re-queue, pass statistics.
Closes #627.
---
 scripts/quality_gate_integration.py | 158 ++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 scripts/quality_gate_integration.py

diff --git a/scripts/quality_gate_integration.py b/scripts/quality_gate_integration.py
new file mode 100644
index 00000000..0fe8b058
--- /dev/null
+++ b/scripts/quality_gate_integration.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+"""
+Quality Gate Integration — Pipeline Orchestrator Hook
+
+Integrates the standalone quality gate with the pipeline orchestrator.
+Validates outputs before saving. Handles rejection and re-queue.
+
+Usage:
+    from quality_gate_integration import validate_before_save
+    result = validate_before_save(output, pipeline_name="training-data")
+"""
+
+import json
+import os
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Optional
+
+# Quality thresholds
+QUALITY_THRESHOLDS = {
+    "training-data": {
+        "min_length": 50,
+        "max_length": 50000,
+        "require_json": True,
+        "require_fields": ["description"],
+        "reject_patterns": ["TODO", "FIXME", "PLACEHOLDER", "lorem ipsum"],
+    },
+    "scene-descriptions": {
+        "min_length": 30,
+        "max_length": 2000,
+        "require_json": True,
+        "require_fields": ["mood", "colors", "description"],
+        "reject_patterns": ["TODO", "FIXME"],
+    },
+    "default": {
+        "min_length": 10,
+        "max_length": 100000,
+        "require_json": False,
+        "require_fields": [],
+        "reject_patterns": ["TODO", "FIXME"],
+    },
+}
+
+# Stats tracking
+STATS_FILE = Path.home() / ".hermes" / "quality-gate-stats.json"
+
+
+def load_stats() -> dict:
+    try:
+        return json.loads(STATS_FILE.read_text())
+    except Exception:
+        return {"total": 0, "passed": 0, "rejected": 0, "by_pipeline": {}}
+
+
+def save_stats(stats: dict):
+    STATS_FILE.parent.mkdir(parents=True, exist_ok=True)
+    STATS_FILE.write_text(json.dumps(stats, indent=2) + "
+")
+
+
+def validate_output(output: str, pipeline: str = "default") -> dict:
+    """Validate output against quality gate thresholds."""
+    thresholds = QUALITY_THRESHOLDS.get(pipeline, QUALITY_THRESHOLDS["default"])
+    errors = []
+
+    # Length check
+    if len(output) < thresholds["min_length"]:
+        errors.append(f"Too short: {len(output)} < {thresholds['min_length']} chars")
+    if len(output) > thresholds["max_length"]:
+        errors.append(f"Too long: {len(output)} > {thresholds['max_length']} chars")
+
+    # JSON check
+    if thresholds["require_json"]:
+        try:
+            data = json.loads(output)
+            for field in thresholds["require_fields"]:
+                if field not in data:
+                    errors.append(f"Missing required field: {field}")
+        except json.JSONDecodeError:
+            errors.append("Not valid JSON")
+
+    # Pattern rejection
+    output_lower = output.lower()
+    for pattern in thresholds["reject_patterns"]:
+        if pattern.lower() in output_lower:
+            errors.append(f"Contains rejected pattern: {pattern}")
+
+    return {
+        "valid": len(errors) == 0,
+        "errors": errors,
+        "pipeline": pipeline,
+        "output_length": len(output),
+        "checked_at": datetime.now(timezone.utc).isoformat(),
+    }
+
+
+def validate_before_save(output: str, pipeline: str = "default",
+                         re_queue_on_fail: bool = True) -> dict:
+    """Validate output before saving. Returns decision + stats update."""
+    result = validate_output(output, pipeline)
+
+    # Update stats
+    stats = load_stats()
+    stats["total"] = stats.get("total", 0) + 1
+    if result["valid"]:
+        stats["passed"] = stats.get("passed", 0) + 1
+    else:
+        stats["rejected"] = stats.get("rejected", 0) + 1
+    stats.setdefault("by_pipeline", {}).setdefault(pipeline, {"total": 0, "passed": 0, "rejected": 0})
+    stats["by_pipeline"][pipeline]["total"] += 1
+    if result["valid"]:
+        stats["by_pipeline"][pipeline]["passed"] += 1
+    else:
+        stats["by_pipeline"][pipeline]["rejected"] += 1
+    save_stats(stats)
+
+    decision = {
+        "action": "save" if result["valid"] else ("re_queue" if re_queue_on_fail else "reject"),
+        "validation": result,
+        "stats": {
+            "total": stats["total"],
+            "pass_rate": stats["passed"] / max(stats["total"], 1),
+        },
+    }
+
+    return decision
+
+
+def get_quality_report() -> str:
+    """Generate a quality gate report."""
+    stats = load_stats()
+    lines = [
+        "# Quality Gate Report",
+        "",
+        f"Total validations: {stats.get('total', 0)}",
+        f"Passed: {stats.get('passed', 0)}",
+        f"Rejected: {stats.get('rejected', 0)}",
+        f"Pass rate: {stats.get('passed', 0) / max(stats.get('total', 1), 1):.0%}",
+        "",
+    ]
+    for pipeline, pstats in stats.get("by_pipeline", {}).items():
+        rate = pstats.get("passed", 0) / max(pstats.get("total", 1), 1)
+        lines.append(f"- {pipeline}: {pstats.get('total', 0)} total, {rate:.0%} pass rate")
+    return "
+".join(lines)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 1 and sys.argv[1] == "report":
+        print(get_quality_report())
+    elif len(sys.argv) > 2:
+        pipeline = sys.argv[1]
+        output = sys.argv[2]
+        result = validate_before_save(output, pipeline)
+        print(json.dumps(result, indent=2))
+    else:
+        print("Usage: quality_gate_integration.py [report|PIPELINE_NAME OUTPUT]")