hermes-agent/agent/skill_security.py

"""Security utilities for skill loading and validation.

Provides path traversal protection and input validation for skill names
to prevent security vulnerabilities like V-011 (Skills Guard Bypass).
"""

import re
from pathlib import Path
from typing import Optional, Tuple

# Strict skill name validation: alphanumeric, hyphens, underscores only
# This prevents path traversal attacks via skill names like "../../../etc/passwd"
VALID_SKILL_NAME_PATTERN = re.compile(r'^[a-zA-Z0-9._-]+$')

# Maximum skill name length to prevent other attack vectors
MAX_SKILL_NAME_LENGTH = 256

# Suspicious patterns that indicate path traversal attempts
PATH_TRAVERSAL_PATTERNS = [
    "..",           # Parent directory reference
    "~",            # Home directory expansion
    "/",            # Absolute path (Unix)
    "\\",           # Windows path separator
    "//",           # Protocol-relative or UNC path
    "file:",        # File protocol
    "ftp:",         # FTP protocol
    "http:",        # HTTP protocol
    "https:",       # HTTPS protocol
    "data:",        # Data URI
    "javascript:",  # JavaScript protocol
    "vbscript:",    # VBScript protocol
]

# Characters that should never appear in skill names
INVALID_CHARACTERS = set([
    '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
    '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
    '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
    '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
    '<', '>', '|', '&', ';', '$', '`', '"', "'",
])


class SkillSecurityError(Exception):
    """Raised when a skill name fails security validation."""
    pass


class PathTraversalError(SkillSecurityError):
    """Raised when path traversal is detected in a skill name."""
    pass


class InvalidSkillNameError(SkillSecurityError):
    """Raised when a skill name contains invalid characters."""
    pass


def validate_skill_name(name: str, allow_path_separator: bool = False) -> None:
    """Validate a skill name for security issues.

    Args:
        name: The skill name or identifier to validate
        allow_path_separator: If True, allows '/' for category/skill paths (e.g., "mlops/axolotl")

    Raises:
        PathTraversalError: If path traversal patterns are detected
        InvalidSkillNameError: If the name contains invalid characters
        SkillSecurityError: For other security violations
    """
    if not name or not isinstance(name, str):
        raise InvalidSkillNameError("Skill name must be a non-empty string")

    if len(name) > MAX_SKILL_NAME_LENGTH:
        raise InvalidSkillNameError(
            f"Skill name exceeds maximum length of {MAX_SKILL_NAME_LENGTH} characters"
        )

    # Check for null bytes and other control characters
    for char in name:
        if char in INVALID_CHARACTERS:
            raise InvalidSkillNameError(
                f"Skill name contains invalid character: {repr(char)}"
            )

    # Validate against allowed character pattern first
    pattern = r'^[a-zA-Z0-9._-]+$' if not allow_path_separator else r'^[a-zA-Z0-9._/-]+$'
    if not re.match(pattern, name):
        invalid_chars = set(c for c in name if not re.match(r'[a-zA-Z0-9._/-]', c))
        raise InvalidSkillNameError(
            f"Skill name contains invalid characters: {sorted(invalid_chars)}. "
            "Only alphanumeric characters, hyphens, underscores, dots, "
            f"{'and forward slashes ' if allow_path_separator else ''}are allowed."
        )

    # Check for path traversal patterns (excluding '/' when path separators are allowed)
    name_lower = name.lower()
    patterns_to_check = PATH_TRAVERSAL_PATTERNS.copy()
    if allow_path_separator:
        # Remove '/' from patterns when path separators are allowed
        patterns_to_check = [p for p in patterns_to_check if p != '/']

    for pattern in patterns_to_check:
        if pattern in name_lower:
            raise PathTraversalError(
                f"Path traversal detected in skill name: '{pattern}' is not allowed"
            )


def resolve_skill_path(
    skill_name: str,
    skills_base_dir: Path,
    allow_path_separator: bool = True
) -> Tuple[Path, Optional[str]]:
    """Safely resolve a skill name to a path within the skills directory.

    Args:
        skill_name: The skill name or path (e.g., "axolotl" or "mlops/axolotl")
        skills_base_dir: The base skills directory
        allow_path_separator: Whether to allow '/' in skill names for categories

    Returns:
        Tuple of (resolved_path, error_message)
        - If successful: (resolved_path, None)
        - If failed: (skills_base_dir, error_message)

    Raises:
        PathTraversalError: If the resolved path would escape the skills directory
    """
    try:
        validate_skill_name(skill_name, allow_path_separator=allow_path_separator)
    except SkillSecurityError as e:
        return skills_base_dir, str(e)

    # Build the target path
    try:
        target_path = (skills_base_dir / skill_name).resolve()
    except (OSError, ValueError) as e:
        return skills_base_dir, f"Invalid skill path: {e}"

    # Ensure the resolved path is within the skills directory
    try:
        target_path.relative_to(skills_base_dir.resolve())
    except ValueError:
        raise PathTraversalError(
            f"Skill path '{skill_name}' resolves outside the skills directory boundary"
        )

    return target_path, None


def sanitize_skill_identifier(identifier: str) -> str:
    """Sanitize a skill identifier by removing dangerous characters.

    This is a defensive fallback for cases where strict validation
    cannot be applied. It removes or replaces dangerous characters.

    Args:
        identifier: The raw skill identifier

    Returns:
        A sanitized version of the identifier
    """
    if not identifier:
        return ""

    # Replace path traversal sequences
    sanitized = identifier.replace("..", "")
    sanitized = sanitized.replace("//", "/")

    # Remove home directory expansion
    if sanitized.startswith("~"):
        sanitized = sanitized[1:]

    # Remove protocol handlers
    for protocol in ["file:", "ftp:", "http:", "https:", "data:", "javascript:", "vbscript:"]:
        sanitized = sanitized.replace(protocol, "")
        sanitized = sanitized.replace(protocol.upper(), "")

    # Remove null bytes and control characters
    for char in INVALID_CHARACTERS:
        sanitized = sanitized.replace(char, "")

    # Normalize path separators to forward slash
    sanitized = sanitized.replace("\\", "/")

    # Remove leading/trailing slashes and whitespace
    sanitized = sanitized.strip("/ ").strip()

    return sanitized


def is_safe_skill_path(path: Path, allowed_base_dirs: list[Path]) -> bool:
    """Check if a path is safely within allowed directories.

    Args:
        path: The path to check
        allowed_base_dirs: List of allowed base directories

    Returns:
        True if the path is within allowed boundaries, False otherwise
    """
    try:
        resolved = path.resolve()
        for base_dir in allowed_base_dirs:
            try:
                resolved.relative_to(base_dir.resolve())
                return True
            except ValueError:
                continue
        return False
    except (OSError, ValueError):
        return False