Compare commits

..

2 Commits

Author SHA1 Message Date
d27ca6d39a feat: auto-revert incomplete skill edits (#923)
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Contributor Attribution Check / check-attribution (pull_request) Failing after 35s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 39s
Tests / e2e (pull_request) Successful in 4m13s
Tests / test (pull_request) Failing after 58m1s
2026-04-21 04:52:58 +00:00
c6f2855745 fix: restore _format_error helper for test compatibility (#916)
Some checks failed
Docker Build and Publish / build-and-push (push) Has been skipped
Nix / nix (ubuntu-latest) (push) Failing after 2s
Tests / e2e (push) Successful in 2m47s
Tests / test (push) Failing after 27m41s
Build Skills Index / build-index (push) Has been skipped
Build Skills Index / deploy-with-index (push) Has been skipped
Nix / nix (macos-latest) (push) Has been cancelled
fix: restore _format_error helper for test compatibility (#916)
2026-04-20 23:56:27 +00:00
3 changed files with 150 additions and 147 deletions

View File

@@ -1,147 +0,0 @@
#!/usr/bin/env python3
"""
Queue Health Check — Verify dispatch queue is operational.
Checks:
1. Queue file exists and is readable
2. Queue has pending items
3. Queue is not stuck (items processing)
4. Queue age (stale items)
Usage:
python scripts/queue_health_check.py
python scripts/queue_health_check.py --json
"""
import json
import sys
from datetime import datetime, timedelta
from pathlib import Path
def check_queue_health(queue_path: str = "~/.hermes/queue.json") -> dict:
"""Check queue health status."""
path = Path(queue_path).expanduser()
result = {
"healthy": True,
"checks": {},
"warnings": [],
"errors": []
}
# Check 1: File exists
if not path.exists():
result["healthy"] = False
result["errors"].append(f"Queue file not found: {path}")
result["checks"]["file_exists"] = False
return result
result["checks"]["file_exists"] = True
# Check 2: File is readable
try:
with open(path, "r") as f:
data = json.load(f)
except Exception as e:
result["healthy"] = False
result["errors"].append(f"Cannot read queue: {e}")
result["checks"]["readable"] = False
return result
result["checks"]["readable"] = True
# Check 3: Queue structure
if not isinstance(data, dict):
result["healthy"] = False
result["errors"].append("Queue is not a dict")
result["checks"]["valid_structure"] = False
return result
result["checks"]["valid_structure"] = True
# Check 4: Pending items
pending = data.get("pending", [])
processing = data.get("processing", [])
completed = data.get("completed", [])
result["checks"]["pending_count"] = len(pending)
result["checks"]["processing_count"] = len(processing)
result["checks"]["completed_count"] = len(completed)
if len(pending) == 0 and len(processing) == 0:
result["warnings"].append("Queue is empty")
# Check 5: Stale processing items
now = datetime.now()
stale_threshold = timedelta(hours=1)
for item in processing:
started = item.get("started_at")
if started:
try:
started_time = datetime.fromisoformat(started.replace("Z", "+00:00"))
if now - started_time > stale_threshold:
result["warnings"].append(f"Stale item: {item.get('id', 'unknown')} (started {started})")
except:
pass
# Check 6: Queue age
if pending:
oldest = min(pending, key=lambda x: x.get("added_at", ""))
added = oldest.get("added_at")
if added:
try:
added_time = datetime.fromisoformat(added.replace("Z", "+00:00"))
age = now - added_time
if age > timedelta(hours=24):
result["warnings"].append(f"Old item in queue: {oldest.get('id', 'unknown')} (added {added})")
except:
pass
return result
def main():
"""Main function."""
import argparse
parser = argparse.ArgumentParser(description="Queue health check")
parser.add_argument("--queue", default="~/.hermes/queue.json", help="Queue file path")
parser.add_argument("--json", action="store_true", help="Output as JSON")
args = parser.parse_args()
result = check_queue_health(args.queue)
if args.json:
print(json.dumps(result, indent=2))
else:
print("Queue Health Check")
print("=" * 50)
print(f"Healthy: {'' if result['healthy'] else ''}")
print()
print("Checks:")
for check, value in result["checks"].items():
if isinstance(value, bool):
print(f" {check}: {'' if value else ''}")
else:
print(f" {check}: {value}")
if result["warnings"]:
print()
print("Warnings:")
for warning in result["warnings"]:
print(f"{warning}")
if result["errors"]:
print()
print("Errors:")
for error in result["errors"]:
print(f"{error}")
sys.exit(0 if result["healthy"] else 1)
if __name__ == "__main__":
main()

122
tools/skill_edit_guard.py Normal file
View File

@@ -0,0 +1,122 @@
"""Skill Edit Guard — Poka-yoke auto-revert for incomplete skill edits.
Creates atomic skill edits with automatic rollback on failure.
Prevents broken skills from corrupting future sessions.
Usage:
from tools.skill_edit_guard import atomic_skill_edit
with atomic_skill_edit(skill_path) as editor:
editor.write(new_content)
# If exception occurs, file is automatically reverted
"""
from __future__ import annotations
import logging
import os
import shutil
import tempfile
import time
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
class SkillEditGuard:
"""Atomic skill file editing with auto-revert on failure."""
def __init__(self, skill_path: str):
self._path = Path(skill_path)
self._backup: Optional[Path] = None
self._committed = False
def backup(self) -> bool:
"""Create backup before editing."""
if not self._path.exists():
return True # New file, nothing to backup
backup_dir = self._path.parent / ".skill_backups"
backup_dir.mkdir(exist_ok=True)
ts = int(time.time() * 1000)
self._backup = backup_dir / f"{self._path.name}.{ts}.bak"
shutil.copy2(self._path, self._backup)
logger.debug("Skill backup created: %s", self._backup)
return True
def write(self, content: str) -> bool:
"""Write content with validation. Returns True if valid."""
# Validate YAML frontmatter
if content.startswith("---"):
end = content.find("---", 3)
if end < 0:
logger.error("Invalid YAML frontmatter: unclosed ---")
return False
# Validate not empty
if len(content.strip()) < 10:
logger.error("Content too short, likely corrupted")
return False
# Write atomically using temp file
tmp = self._path.with_suffix(".tmp")
try:
tmp.write_text(content, encoding="utf-8")
tmp.rename(self._path)
return True
except Exception as e:
logger.error("Write failed: %s", e)
if tmp.exists():
tmp.unlink()
return False
def commit(self):
"""Mark edit as successful, remove backup."""
self._committed = True
if self._backup and self._backup.exists():
self._backup.unlink()
logger.debug("Skill backup removed: %s", self._backup)
def rollback(self) -> bool:
"""Revert to backup."""
if self._backup and self._backup.exists():
shutil.copy2(self._backup, self._path)
self._backup.unlink()
logger.warning("Skill reverted from backup: %s", self._path)
return True
return False
def __enter__(self):
self.backup()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type is not None:
self.rollback()
return False # Re-raise exception
if not self._committed:
self.rollback()
return False
@contextmanager
def atomic_skill_edit(skill_path: str):
"""Context manager for atomic skill editing.
Usage:
with atomic_skill_edit("/path/to/skill/SKILL.md") as editor:
success = editor.write(new_content)
if not success:
raise ValueError("Write failed")
# __exit__ commits on success, reverts on exception
"""
guard = SkillEditGuard(skill_path)
guard.backup()
try:
yield guard
guard.commit()
except Exception:
guard.rollback()
raise

View File

@@ -44,6 +44,34 @@ from typing import Dict, Any, Optional, Tuple
logger = logging.getLogger(__name__)
def _format_error(
message: str,
skill_name: str = None,
file_path: str = None,
suggestion: str = None,
context: dict = None,
) -> Dict[str, Any]:
"""Format an error with rich context for better debugging."""
parts = [message]
if skill_name:
parts.append(f"Skill: {skill_name}")
if file_path:
parts.append(f"File: {file_path}")
if suggestion:
parts.append(f"Suggestion: {suggestion}")
if context:
for key, value in context.items():
parts.append(f"{key}: {value}")
return {
"success": False,
"error": " | ".join(parts),
"skill_name": skill_name,
"file_path": file_path,
"suggestion": suggestion,
}
# Import security scanner — agent-created skills get the same scrutiny as
# community hub installs.
try: