hermes-agent/scripts/import-cybersecurity-skills.py

#!/usr/bin/env python3
"""
import-cybersecurity-skills.py — Import Anthropic Cybersecurity Skills into Hermes.

Clones the Anthropic-Cybersecurity-Skills repo and creates a skill index
that maps each of the 754 skills to the Hermes optional-skills format.

Usage:
    python3 scripts/import-cybersecurity-skills.py --clone          # Clone repo
    python3 scripts/import-cybersecurity-skills.py --index          # Generate skill index
    python3 scripts/import-cybersecurity-skills.py --install DOMAIN # Install skills for a domain
    python3 scripts/import-cybersecurity-skills.py --list           # List all domains
    python3 scripts/import-cybersecurity-skills.py --status         # Import status
"""

import argparse
import json
import os
import subprocess
import sys
import yaml
from pathlib import Path
from collections import defaultdict

REPO_URL = "https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git"
SKILLS_DIR = Path.home() / ".hermes" / "cybersecurity-skills"
INDEX_PATH = SKILLS_DIR / "skill-index.json"
OPTIONAL_SKILLS_DIR = Path.home() / ".hermes" / "optional-skills" / "cybersecurity"

# Domain → hermes category mapping
DOMAIN_CATEGORIES = {
    "cloud-security": "security",
    "threat-hunting": "security",
    "threat-intelligence": "security",
    "web-app-security": "security",
    "network-security": "security",
    "malware-analysis": "security",
    "digital-forensics": "security",
    "security-operations": "security",
    "identity-access-management": "security",
    "soc-operations": "security",
    "container-security": "security",
    "ot-ics-security": "security",
    "api-security": "security",
    "vulnerability-management": "security",
    "incident-response": "security",
    "red-teaming": "security",
    "penetration-testing": "security",
    "endpoint-security": "security",
    "devsecops": "devops",
    "phishing-defense": "security",
    "cryptography": "security",
}


def cmd_clone():
    """Clone the cybersecurity skills repository."""
    if SKILLS_DIR.exists():
        print(f"Updating existing clone at {SKILLS_DIR}")
        subprocess.run(["git", "-C", str(SKILLS_DIR), "pull"], capture_output=True)
    else:
        SKILLS_DIR.parent.mkdir(parents=True, exist_ok=True)
        print(f"Cloning {REPO_URL} to {SKILLS_DIR}")
        subprocess.run(["git", "clone", "--depth", "1", REPO_URL, str(SKILLS_DIR)], capture_output=True)

    # Count skills
    skill_files = list(SKILLS_DIR.rglob("*.md"))
    print(f"Found {len(skill_files)} skill files")


def cmd_index():
    """Generate a skill index from the cloned repo."""
    if not SKILLS_DIR.exists():
        print("Run --clone first", file=sys.stderr)
        sys.exit(1)

    skills = []
    domains = defaultdict(list)

    for md_file in SKILLS_DIR.rglob("*.md"):
        if md_file.name in ("README.md", "LICENSE.md", "DESCRIPTION.md"):
            continue

        try:
            content = md_file.read_text(errors="ignore")
        except OSError:
            continue

        # Parse YAML frontmatter
        if content.startswith("---"):
            parts = content.split("---", 2)
            if len(parts) >= 3:
                try:
                    frontmatter = yaml.safe_load(parts[1]) or {}
                except yaml.YAMLError:
                    frontmatter = {}
            else:
                frontmatter = {}
        else:
            frontmatter = {}

        # Extract metadata
        name = frontmatter.get("name", md_file.stem)
        description = frontmatter.get("description", "")
        domain = frontmatter.get("domain", frontmatter.get("subdomain", "general"))
        tags = frontmatter.get("tags", [])
        frameworks = frontmatter.get("nist_csf", []) + frontmatter.get("mitre_attack", [])

        skill = {
            "name": name,
            "file": str(md_file.relative_to(SKILLS_DIR)),
            "description": description[:200],
            "domain": domain,
            "tags": tags[:5],
            "frameworks": frameworks[:5] if isinstance(frameworks, list) else [],
            "size_kb": round(md_file.stat().st_size / 1024, 1),
        }
        skills.append(skill)
        domains[domain].append(name)

    # Build index
    index = {
        "total_skills": len(skills),
        "total_domains": len(domains),
        "domains": {k: len(v) for k, v in sorted(domains.items())},
        "skills": sorted(skills, key=lambda s: s["domain"]),
        "generated_from": REPO_URL,
    }

    INDEX_PATH.write_text(json.dumps(index, indent=2))
    print(f"Indexed {len(skills)} skills across {len(domains)} domains")
    print(f"Written to {INDEX_PATH}")

    # Print domain summary
    print("\nDomains:")
    for domain, count in sorted(domains.items(), key=lambda x: -len(x[1])):
        print(f"  {domain}: {count} skills")


def cmd_list():
    """List all security domains."""
    if not INDEX_PATH.exists():
        print("Run --index first", file=sys.stderr)
        sys.exit(1)

    index = json.loads(INDEX_PATH.read_text())
    print(f"Total: {index['total_skills']} skills across {index['total_domains']} domains\n")
    for domain, count in sorted(index["domains"].items(), key=lambda x: -x[1]):
        print(f"  {domain:<35} {count:>4} skills")


def cmd_install(domain: str = None):
    """Install skills for a domain into optional-skills."""
    if not INDEX_PATH.exists():
        print("Run --index first", file=sys.stderr)
        sys.exit(1)

    index = json.loads(INDEX_PATH.read_text())
    skills = index["skills"]

    if domain:
        skills = [s for s in skills if s["domain"] == domain]
        if not skills:
            print(f"No skills found for domain: {domain}")
            sys.exit(1)

    installed = 0
    for skill in skills:
        # Create skill directory
        category = DOMAIN_CATEGORIES.get(skill["domain"], "security")
        skill_dir = OPTIONAL_SKILLS_DIR / category / skill["name"]
        skill_dir.mkdir(parents=True, exist_ok=True)

        # Copy source file
        src = SKILLS_DIR / skill["file"]
        if src.exists():
            dst = skill_dir / "SKILL.md"
            dst.write_text(src.read_text(errors="ignore"))
            installed += 1

    print(f"Installed {installed} skills to {OPTIONAL_SKILLS_DIR}")


def cmd_status():
    """Show import status."""
    print(f"Clone dir: {SKILLS_DIR}")
    print(f"  Exists: {SKILLS_DIR.exists()}")

    print(f"Index: {INDEX_PATH}")
    print(f"  Exists: {INDEX_PATH.exists()}")
    if INDEX_PATH.exists():
        index = json.loads(INDEX_PATH.read_text())
        print(f"  Skills: {index['total_skills']}")
        print(f"  Domains: {index['total_domains']}")

    print(f"Install dir: {OPTIONAL_SKILLS_DIR}")
    print(f"  Exists: {OPTIONAL_SKILLS_DIR.exists()}")
    if OPTIONAL_SKILLS_DIR.exists():
        installed = len(list(OPTIONAL_SKILLS_DIR.rglob("SKILL.md")))
        print(f"  Installed skills: {installed}")


def main():
    parser = argparse.ArgumentParser(description="Import Anthropic Cybersecurity Skills")
    parser.add_argument("--clone", action="store_true", help="Clone the skills repo")
    parser.add_argument("--index", action="store_true", help="Generate skill index")
    parser.add_argument("--list", action="store_true", help="List all domains")
    parser.add_argument("--install", metavar="DOMAIN", nargs="?", const="all", help="Install skills for domain")
    parser.add_argument("--status", action="store_true", help="Import status")
    args = parser.parse_args()

    if args.clone:
        cmd_clone()
    elif args.index:
        cmd_index()
    elif args.list:
        cmd_list()
    elif args.install is not None:
        cmd_install(None if args.install == "all" else args.install)
    elif args.status:
        cmd_status()
    else:
        parser.print_help()


if __name__ == "__main__":
    main()