Harden agent attack surface: scan writes to memory, skills, cron, and context files

The security scanner (skills_guard.py) was only wired into the hub install path. All other write paths to persistent state — skills created by the agent, memory entries, cron prompts, and context files — bypassed it entirely. This closes those gaps: - file_operations: deny-list blocks writes to ~/.ssh, ~/.aws, ~/.hermes/.env, etc. - code_execution_tool: filter secret env vars from sandbox child process - skill_manager_tool: wire scan_skill() into create/edit/patch/write_file with rollback - skills_guard: add "agent-created" trust level (same policy as community) - memory_tool: scan content for injection/exfil before system prompt injection - prompt_builder: scan AGENTS.md, .cursorrules, SOUL.md for prompt injection - cronjob_tools: scan cron prompts for critical threats before scheduling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 23:43:15 -05:00
parent 0310170869
commit 95b6bd5df6
7 changed files with 278 additions and 8 deletions
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@@ -33,12 +33,38 @@ Directory layout for user skills:
 """

 import json
+import logging
 import os
 import re
 import shutil
 from pathlib import Path
 from typing import Dict, Any, Optional

+logger = logging.getLogger(__name__)
+
+# Import security scanner — agent-created skills get the same scrutiny as
+# community hub installs.
+try:
+    from tools.skills_guard import scan_skill, should_allow_install, format_scan_report
+    _GUARD_AVAILABLE = True
+except ImportError:
+    _GUARD_AVAILABLE = False
+
+
+def _security_scan_skill(skill_dir: Path) -> Optional[str]:
+    """Scan a skill directory after write. Returns error string if blocked, else None."""
+    if not _GUARD_AVAILABLE:
+        return None
+    try:
+        result = scan_skill(skill_dir, source="agent-created")
+        allowed, reason = should_allow_install(result)
+        if not allowed:
+            report = format_scan_report(result)
+            return f"Security scan blocked this skill ({reason}):\n{report}"
+    except Exception as e:
+        logger.warning("Security scan failed for %s: %s", skill_dir, e)
+    return None
+
 import yaml


@@ -196,6 +222,12 @@ def _create_skill(name: str, content: str, category: str = None) -> Dict[str, An
    skill_md = skill_dir / "SKILL.md"
    skill_md.write_text(content, encoding="utf-8")

+    # Security scan — roll back on block
+    scan_error = _security_scan_skill(skill_dir)
+    if scan_error:
+        shutil.rmtree(skill_dir, ignore_errors=True)
+        return {"success": False, "error": scan_error}
+
    result = {
        "success": True,
        "message": f"Skill '{name}' created.",
@@ -222,8 +254,17 @@ def _edit_skill(name: str, content: str) -> Dict[str, Any]:
        return {"success": False, "error": f"Skill '{name}' not found. Use skills_list() to see available skills."}

    skill_md = existing["path"] / "SKILL.md"
+    # Back up original content for rollback
+    original_content = skill_md.read_text(encoding="utf-8") if skill_md.exists() else None
    skill_md.write_text(content, encoding="utf-8")

+    # Security scan — roll back on block
+    scan_error = _security_scan_skill(existing["path"])
+    if scan_error:
+        if original_content is not None:
+            skill_md.write_text(original_content, encoding="utf-8")
+        return {"success": False, "error": scan_error}
+
    return {
        "success": True,
        "message": f"Skill '{name}' updated.",
@@ -300,8 +341,15 @@ def _patch_skill(
                "error": f"Patch would break SKILL.md structure: {err}",
            }

+    original_content = content  # for rollback
    target.write_text(new_content, encoding="utf-8")

+    # Security scan — roll back on block
+    scan_error = _security_scan_skill(skill_dir)
+    if scan_error:
+        target.write_text(original_content, encoding="utf-8")
+        return {"success": False, "error": scan_error}
+
    replacements = count if replace_all else 1
    return {
        "success": True,
@@ -344,8 +392,19 @@ def _write_file(name: str, file_path: str, file_content: str) -> Dict[str, Any]:

    target = existing["path"] / file_path
    target.parent.mkdir(parents=True, exist_ok=True)
+    # Back up for rollback
+    original_content = target.read_text(encoding="utf-8") if target.exists() else None
    target.write_text(file_content, encoding="utf-8")

+    # Security scan — roll back on block
+    scan_error = _security_scan_skill(existing["path"])
+    if scan_error:
+        if original_content is not None:
+            target.write_text(original_content, encoding="utf-8")
+        else:
+            target.unlink(missing_ok=True)
+        return {"success": False, "error": scan_error}
+
    return {
        "success": True,
        "message": f"File '{file_path}' written to skill '{name}'.",