Files
hermes-agent/scripts/queue_health_check.py
Alexander Whitestone 73984ca72f
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Contributor Attribution Check / check-attribution (pull_request) Failing after 29s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 31s
Tests / e2e (pull_request) Successful in 2m13s
Tests / test (pull_request) Failing after 28m10s
feat: Add queue health check script
2026-04-17 01:26:07 -04:00

148 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Queue Health Check — Verify dispatch queue is operational.
Checks:
1. Queue file exists and is readable
2. Queue has pending items
3. Queue is not stuck (items processing)
4. Queue age (stale items)
Usage:
python scripts/queue_health_check.py
python scripts/queue_health_check.py --json
"""
import json
import sys
from datetime import datetime, timedelta
from pathlib import Path
def check_queue_health(queue_path: str = "~/.hermes/queue.json") -> dict:
"""Check queue health status."""
path = Path(queue_path).expanduser()
result = {
"healthy": True,
"checks": {},
"warnings": [],
"errors": []
}
# Check 1: File exists
if not path.exists():
result["healthy"] = False
result["errors"].append(f"Queue file not found: {path}")
result["checks"]["file_exists"] = False
return result
result["checks"]["file_exists"] = True
# Check 2: File is readable
try:
with open(path, "r") as f:
data = json.load(f)
except Exception as e:
result["healthy"] = False
result["errors"].append(f"Cannot read queue: {e}")
result["checks"]["readable"] = False
return result
result["checks"]["readable"] = True
# Check 3: Queue structure
if not isinstance(data, dict):
result["healthy"] = False
result["errors"].append("Queue is not a dict")
result["checks"]["valid_structure"] = False
return result
result["checks"]["valid_structure"] = True
# Check 4: Pending items
pending = data.get("pending", [])
processing = data.get("processing", [])
completed = data.get("completed", [])
result["checks"]["pending_count"] = len(pending)
result["checks"]["processing_count"] = len(processing)
result["checks"]["completed_count"] = len(completed)
if len(pending) == 0 and len(processing) == 0:
result["warnings"].append("Queue is empty")
# Check 5: Stale processing items
now = datetime.now()
stale_threshold = timedelta(hours=1)
for item in processing:
started = item.get("started_at")
if started:
try:
started_time = datetime.fromisoformat(started.replace("Z", "+00:00"))
if now - started_time > stale_threshold:
result["warnings"].append(f"Stale item: {item.get('id', 'unknown')} (started {started})")
except:
pass
# Check 6: Queue age
if pending:
oldest = min(pending, key=lambda x: x.get("added_at", ""))
added = oldest.get("added_at")
if added:
try:
added_time = datetime.fromisoformat(added.replace("Z", "+00:00"))
age = now - added_time
if age > timedelta(hours=24):
result["warnings"].append(f"Old item in queue: {oldest.get('id', 'unknown')} (added {added})")
except:
pass
return result
def main():
"""Main function."""
import argparse
parser = argparse.ArgumentParser(description="Queue health check")
parser.add_argument("--queue", default="~/.hermes/queue.json", help="Queue file path")
parser.add_argument("--json", action="store_true", help="Output as JSON")
args = parser.parse_args()
result = check_queue_health(args.queue)
if args.json:
print(json.dumps(result, indent=2))
else:
print("Queue Health Check")
print("=" * 50)
print(f"Healthy: {'' if result['healthy'] else ''}")
print()
print("Checks:")
for check, value in result["checks"].items():
if isinstance(value, bool):
print(f" {check}: {'' if value else ''}")
else:
print(f" {check}: {value}")
if result["warnings"]:
print()
print("Warnings:")
for warning in result["warnings"]:
print(f"{warning}")
if result["errors"]:
print()
print("Errors:")
for error in result["errors"]:
print(f"{error}")
sys.exit(0 if result["healthy"] else 1)
if __name__ == "__main__":
main()