Files
hermes-agent/agent/skill_security.py

214 lines
7.2 KiB
Python
Raw Normal View History

"""Security utilities for skill loading and validation.
Provides path traversal protection and input validation for skill names
to prevent security vulnerabilities like V-011 (Skills Guard Bypass).
"""
import re
from pathlib import Path
from typing import Optional, Tuple
# Strict skill name validation: alphanumeric, hyphens, underscores only
# This prevents path traversal attacks via skill names like "../../../etc/passwd"
VALID_SKILL_NAME_PATTERN = re.compile(r'^[a-zA-Z0-9._-]+$')
# Maximum skill name length to prevent other attack vectors
MAX_SKILL_NAME_LENGTH = 256
# Suspicious patterns that indicate path traversal attempts
PATH_TRAVERSAL_PATTERNS = [
"..", # Parent directory reference
"~", # Home directory expansion
"/", # Absolute path (Unix)
"\\", # Windows path separator
"//", # Protocol-relative or UNC path
"file:", # File protocol
"ftp:", # FTP protocol
"http:", # HTTP protocol
"https:", # HTTPS protocol
"data:", # Data URI
"javascript:", # JavaScript protocol
"vbscript:", # VBScript protocol
]
# Characters that should never appear in skill names
INVALID_CHARACTERS = set([
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
'<', '>', '|', '&', ';', '$', '`', '"', "'",
])
class SkillSecurityError(Exception):
"""Raised when a skill name fails security validation."""
pass
class PathTraversalError(SkillSecurityError):
"""Raised when path traversal is detected in a skill name."""
pass
class InvalidSkillNameError(SkillSecurityError):
"""Raised when a skill name contains invalid characters."""
pass
def validate_skill_name(name: str, allow_path_separator: bool = False) -> None:
"""Validate a skill name for security issues.
Args:
name: The skill name or identifier to validate
allow_path_separator: If True, allows '/' for category/skill paths (e.g., "mlops/axolotl")
Raises:
PathTraversalError: If path traversal patterns are detected
InvalidSkillNameError: If the name contains invalid characters
SkillSecurityError: For other security violations
"""
if not name or not isinstance(name, str):
raise InvalidSkillNameError("Skill name must be a non-empty string")
if len(name) > MAX_SKILL_NAME_LENGTH:
raise InvalidSkillNameError(
f"Skill name exceeds maximum length of {MAX_SKILL_NAME_LENGTH} characters"
)
# Check for null bytes and other control characters
for char in name:
if char in INVALID_CHARACTERS:
raise InvalidSkillNameError(
f"Skill name contains invalid character: {repr(char)}"
)
# Validate against allowed character pattern first
pattern = r'^[a-zA-Z0-9._-]+$' if not allow_path_separator else r'^[a-zA-Z0-9._/-]+$'
if not re.match(pattern, name):
invalid_chars = set(c for c in name if not re.match(r'[a-zA-Z0-9._/-]', c))
raise InvalidSkillNameError(
f"Skill name contains invalid characters: {sorted(invalid_chars)}. "
"Only alphanumeric characters, hyphens, underscores, dots, "
f"{'and forward slashes ' if allow_path_separator else ''}are allowed."
)
# Check for path traversal patterns (excluding '/' when path separators are allowed)
name_lower = name.lower()
patterns_to_check = PATH_TRAVERSAL_PATTERNS.copy()
if allow_path_separator:
# Remove '/' from patterns when path separators are allowed
patterns_to_check = [p for p in patterns_to_check if p != '/']
for pattern in patterns_to_check:
if pattern in name_lower:
raise PathTraversalError(
f"Path traversal detected in skill name: '{pattern}' is not allowed"
)
def resolve_skill_path(
skill_name: str,
skills_base_dir: Path,
allow_path_separator: bool = True
) -> Tuple[Path, Optional[str]]:
"""Safely resolve a skill name to a path within the skills directory.
Args:
skill_name: The skill name or path (e.g., "axolotl" or "mlops/axolotl")
skills_base_dir: The base skills directory
allow_path_separator: Whether to allow '/' in skill names for categories
Returns:
Tuple of (resolved_path, error_message)
- If successful: (resolved_path, None)
- If failed: (skills_base_dir, error_message)
Raises:
PathTraversalError: If the resolved path would escape the skills directory
"""
try:
validate_skill_name(skill_name, allow_path_separator=allow_path_separator)
except SkillSecurityError as e:
return skills_base_dir, str(e)
# Build the target path
try:
target_path = (skills_base_dir / skill_name).resolve()
except (OSError, ValueError) as e:
return skills_base_dir, f"Invalid skill path: {e}"
# Ensure the resolved path is within the skills directory
try:
target_path.relative_to(skills_base_dir.resolve())
except ValueError:
raise PathTraversalError(
f"Skill path '{skill_name}' resolves outside the skills directory boundary"
)
return target_path, None
def sanitize_skill_identifier(identifier: str) -> str:
"""Sanitize a skill identifier by removing dangerous characters.
This is a defensive fallback for cases where strict validation
cannot be applied. It removes or replaces dangerous characters.
Args:
identifier: The raw skill identifier
Returns:
A sanitized version of the identifier
"""
if not identifier:
return ""
# Replace path traversal sequences
sanitized = identifier.replace("..", "")
sanitized = sanitized.replace("//", "/")
# Remove home directory expansion
if sanitized.startswith("~"):
sanitized = sanitized[1:]
# Remove protocol handlers
for protocol in ["file:", "ftp:", "http:", "https:", "data:", "javascript:", "vbscript:"]:
sanitized = sanitized.replace(protocol, "")
sanitized = sanitized.replace(protocol.upper(), "")
# Remove null bytes and control characters
for char in INVALID_CHARACTERS:
sanitized = sanitized.replace(char, "")
# Normalize path separators to forward slash
sanitized = sanitized.replace("\\", "/")
# Remove leading/trailing slashes and whitespace
sanitized = sanitized.strip("/ ").strip()
return sanitized
def is_safe_skill_path(path: Path, allowed_base_dirs: list[Path]) -> bool:
"""Check if a path is safely within allowed directories.
Args:
path: The path to check
allowed_base_dirs: List of allowed base directories
Returns:
True if the path is within allowed boundaries, False otherwise
"""
try:
resolved = path.resolve()
for base_dir in allowed_base_dirs:
try:
resolved.relative_to(base_dir.resolve())
return True
except ValueError:
continue
return False
except (OSError, ValueError):
return False