diff --git a/scripts/import-cybersecurity-skills.py b/scripts/import-cybersecurity-skills.py new file mode 100644 index 000000000..09833b09e --- /dev/null +++ b/scripts/import-cybersecurity-skills.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +""" +import-cybersecurity-skills.py — Import Anthropic Cybersecurity Skills into Hermes. + +Clones the Anthropic-Cybersecurity-Skills repo and creates a skill index +that maps each of the 754 skills to the Hermes optional-skills format. + +Usage: + python3 scripts/import-cybersecurity-skills.py --clone # Clone repo + python3 scripts/import-cybersecurity-skills.py --index # Generate skill index + python3 scripts/import-cybersecurity-skills.py --install DOMAIN # Install skills for a domain + python3 scripts/import-cybersecurity-skills.py --list # List all domains + python3 scripts/import-cybersecurity-skills.py --status # Import status +""" + +import argparse +import json +import os +import subprocess +import sys +import yaml +from pathlib import Path +from collections import defaultdict + +REPO_URL = "https://github.com/mukul975/Anthropic-Cybersecurity-Skills.git" +SKILLS_DIR = Path.home() / ".hermes" / "cybersecurity-skills" +INDEX_PATH = SKILLS_DIR / "skill-index.json" +OPTIONAL_SKILLS_DIR = Path.home() / ".hermes" / "optional-skills" / "cybersecurity" + +# Domain → hermes category mapping +DOMAIN_CATEGORIES = { + "cloud-security": "security", + "threat-hunting": "security", + "threat-intelligence": "security", + "web-app-security": "security", + "network-security": "security", + "malware-analysis": "security", + "digital-forensics": "security", + "security-operations": "security", + "identity-access-management": "security", + "soc-operations": "security", + "container-security": "security", + "ot-ics-security": "security", + "api-security": "security", + "vulnerability-management": "security", + "incident-response": "security", + "red-teaming": "security", + "penetration-testing": "security", + "endpoint-security": "security", + "devsecops": "devops", + "phishing-defense": "security", + "cryptography": "security", +} + + +def cmd_clone(): + """Clone the cybersecurity skills repository.""" + if SKILLS_DIR.exists(): + print(f"Updating existing clone at {SKILLS_DIR}") + subprocess.run(["git", "-C", str(SKILLS_DIR), "pull"], capture_output=True) + else: + SKILLS_DIR.parent.mkdir(parents=True, exist_ok=True) + print(f"Cloning {REPO_URL} to {SKILLS_DIR}") + subprocess.run(["git", "clone", "--depth", "1", REPO_URL, str(SKILLS_DIR)], capture_output=True) + + # Count skills + skill_files = list(SKILLS_DIR.rglob("*.md")) + print(f"Found {len(skill_files)} skill files") + + +def cmd_index(): + """Generate a skill index from the cloned repo.""" + if not SKILLS_DIR.exists(): + print("Run --clone first", file=sys.stderr) + sys.exit(1) + + skills = [] + domains = defaultdict(list) + + for md_file in SKILLS_DIR.rglob("*.md"): + if md_file.name in ("README.md", "LICENSE.md", "DESCRIPTION.md"): + continue + + try: + content = md_file.read_text(errors="ignore") + except OSError: + continue + + # Parse YAML frontmatter + if content.startswith("---"): + parts = content.split("---", 2) + if len(parts) >= 3: + try: + frontmatter = yaml.safe_load(parts[1]) or {} + except yaml.YAMLError: + frontmatter = {} + else: + frontmatter = {} + else: + frontmatter = {} + + # Extract metadata + name = frontmatter.get("name", md_file.stem) + description = frontmatter.get("description", "") + domain = frontmatter.get("domain", frontmatter.get("subdomain", "general")) + tags = frontmatter.get("tags", []) + frameworks = frontmatter.get("nist_csf", []) + frontmatter.get("mitre_attack", []) + + skill = { + "name": name, + "file": str(md_file.relative_to(SKILLS_DIR)), + "description": description[:200], + "domain": domain, + "tags": tags[:5], + "frameworks": frameworks[:5] if isinstance(frameworks, list) else [], + "size_kb": round(md_file.stat().st_size / 1024, 1), + } + skills.append(skill) + domains[domain].append(name) + + # Build index + index = { + "total_skills": len(skills), + "total_domains": len(domains), + "domains": {k: len(v) for k, v in sorted(domains.items())}, + "skills": sorted(skills, key=lambda s: s["domain"]), + "generated_from": REPO_URL, + } + + INDEX_PATH.write_text(json.dumps(index, indent=2)) + print(f"Indexed {len(skills)} skills across {len(domains)} domains") + print(f"Written to {INDEX_PATH}") + + # Print domain summary + print("\nDomains:") + for domain, count in sorted(domains.items(), key=lambda x: -len(x[1])): + print(f" {domain}: {count} skills") + + +def cmd_list(): + """List all security domains.""" + if not INDEX_PATH.exists(): + print("Run --index first", file=sys.stderr) + sys.exit(1) + + index = json.loads(INDEX_PATH.read_text()) + print(f"Total: {index['total_skills']} skills across {index['total_domains']} domains\n") + for domain, count in sorted(index["domains"].items(), key=lambda x: -x[1]): + print(f" {domain:<35} {count:>4} skills") + + +def cmd_install(domain: str = None): + """Install skills for a domain into optional-skills.""" + if not INDEX_PATH.exists(): + print("Run --index first", file=sys.stderr) + sys.exit(1) + + index = json.loads(INDEX_PATH.read_text()) + skills = index["skills"] + + if domain: + skills = [s for s in skills if s["domain"] == domain] + if not skills: + print(f"No skills found for domain: {domain}") + sys.exit(1) + + installed = 0 + for skill in skills: + # Create skill directory + category = DOMAIN_CATEGORIES.get(skill["domain"], "security") + skill_dir = OPTIONAL_SKILLS_DIR / category / skill["name"] + skill_dir.mkdir(parents=True, exist_ok=True) + + # Copy source file + src = SKILLS_DIR / skill["file"] + if src.exists(): + dst = skill_dir / "SKILL.md" + dst.write_text(src.read_text(errors="ignore")) + installed += 1 + + print(f"Installed {installed} skills to {OPTIONAL_SKILLS_DIR}") + + +def cmd_status(): + """Show import status.""" + print(f"Clone dir: {SKILLS_DIR}") + print(f" Exists: {SKILLS_DIR.exists()}") + + print(f"Index: {INDEX_PATH}") + print(f" Exists: {INDEX_PATH.exists()}") + if INDEX_PATH.exists(): + index = json.loads(INDEX_PATH.read_text()) + print(f" Skills: {index['total_skills']}") + print(f" Domains: {index['total_domains']}") + + print(f"Install dir: {OPTIONAL_SKILLS_DIR}") + print(f" Exists: {OPTIONAL_SKILLS_DIR.exists()}") + if OPTIONAL_SKILLS_DIR.exists(): + installed = len(list(OPTIONAL_SKILLS_DIR.rglob("SKILL.md"))) + print(f" Installed skills: {installed}") + + +def main(): + parser = argparse.ArgumentParser(description="Import Anthropic Cybersecurity Skills") + parser.add_argument("--clone", action="store_true", help="Clone the skills repo") + parser.add_argument("--index", action="store_true", help="Generate skill index") + parser.add_argument("--list", action="store_true", help="List all domains") + parser.add_argument("--install", metavar="DOMAIN", nargs="?", const="all", help="Install skills for domain") + parser.add_argument("--status", action="store_true", help="Import status") + args = parser.parse_args() + + if args.clone: + cmd_clone() + elif args.index: + cmd_index() + elif args.list: + cmd_list() + elif args.install is not None: + cmd_install(None if args.install == "all" else args.install) + elif args.status: + cmd_status() + else: + parser.print_help() + + +if __name__ == "__main__": + main()