""" tools/path_guard.py — Poka-yoke: Prevent hardcoded home-directory paths. Validates file paths before tool execution to prevent the latent defect of hardcoded paths like /Users//, /home//, or ~/ in code that gets committed or in runtime arguments. Usage: from tools.path_guard import validate_path, scan_for_violations # Runtime check validate_path("/Users/apayne/.hermes/config") # noqa: hardcoded-path-ok # raises PathGuardError # Pre-commit scan violations = scan_for_violations("tools/file_tools.py") """ import os import re from pathlib import Path from typing import List, Tuple # ── Patterns ──────────────────────────────────────────────────────── # Matches hardcoded home-directory paths in string content HARDCODED_PATH_PATTERNS = [ # /Users//... (macOS) (re.compile(r"""['"]/(Users)/[\w.-]+/"""), "/Users//"), # /home//... (Linux) (re.compile(r"""['"]/home/[\w.-]+/"""), "/home//"), # Bare ~/... (unexpanded tilde in code — NOT in expanduser() calls) (re.compile(r"""['"]~/[^'"]+['"]"""), "~/..."), # noqa: hardcoded-path-ok # /root/... (Linux root home) (re.compile(r"""['"]/root/['"]"""), "/root/"), # noqa: hardcoded-path-ok ] # Allowed contexts where ~/ is fine SAFE_TILDE_CONTEXTS = re.compile( r"""expanduser|display_path|relpath|os\.path|Path\(|str\(.*home|""" r"""noqa:\s*hardcoded-path-ok|""" # explicit escape hatch r"""\bprint\(|f['"]|\.format\(|""" # display/formatting contexts r"""["']~/["']\s*$""", # just displaying ~/ as prefix re.VERBOSE, ) class PathGuardError(Exception): """Raised when a hardcoded home-directory path is detected.""" def __init__(self, path: str, pattern_name: str, suggestion: str): self.path = path self.pattern_name = pattern_name self.suggestion = suggestion super().__init__( f"Hardcoded path detected: {path} matches {pattern_name}. " f"Suggestion: {suggestion}. " f"Use get_hermes_home(), os.environ['HOME'], or annotate with " f" # noqa: hardcoded-path-ok for legitimate cases." ) # ── Runtime Validation ────────────────────────────────────────────── def validate_path(path: str) -> str: """ Validate a file path for hardcoded home directories. Returns the path if valid, raises PathGuardError if not. This is meant to be called in tool wrappers (write_file, execute_code) before executing operations with user-supplied paths. Note: At runtime, paths from os.path.expanduser() will resolve to /Users//... — this is expected and allowed. The guard catches paths that were LITERALLY hardcoded in source code or tool arguments that look like they came from a different machine (e.g., a path containing a different username than the current user). """ if not path or not isinstance(path, str): return path # At runtime, expanded paths matching current HOME are fine home = os.environ.get("HOME", "") if home and path.startswith(home): return path # Check for hardcoded /Users// (macOS) — but not current user if re.match(r"^/Users/[\w.-]+/", path): raise PathGuardError( path, "/Users//", f"Use $HOME or os.path.expanduser('~') instead. " f"Got: {path}" ) # Check for hardcoded /home// (Linux) if re.match(r"^/home/[\w.-]+/", path): raise PathGuardError( path, "/home//", f"Use $HOME or os.path.expanduser('~') instead. " f"Got: {path}" ) return path def validate_tool_paths(paths: list) -> list: """ Validate multiple paths (e.g., from tool arguments). Returns validated list. Raises PathGuardError on first violation. """ return [validate_path(p) for p in paths if isinstance(p, str)] # ── File Scanning (Pre-commit / CI) ──────────────────────────────── def scan_file_for_violations(filepath: str) -> List[Tuple[int, str, str, str]]: """ Scan a Python file for hardcoded home-directory path patterns. Returns list of (line_number, line_content, pattern_name, suggestion). """ violations = [] try: with open(filepath) as f: for lineno, line in enumerate(f, 1): # Skip comments and noqa lines stripped = line.strip() if stripped.startswith("#"): continue if "noqa: hardcoded-path-ok" in line: continue for pattern, name in HARDCODED_PATH_PATTERNS: if pattern.search(line): # Special case: ~/ in expanduser/display context is OK if name == "~/..." and SAFE_TILDE_CONTEXTS.search(line): # noqa: hardcoded-path-ok continue violations.append((lineno, line.rstrip(), name, f"Use get_hermes_home(), os.environ['HOME'], or add # noqa: hardcoded-path-ok")) except (IOError, UnicodeDecodeError): pass return violations def scan_directory(root: str, extensions: tuple = (".py",)) -> List[Tuple[str, List]]: """ Scan a directory tree for hardcoded path violations. Returns list of (filepath, violations) tuples. """ results = [] for dirpath, _, filenames in os.walk(root): # Skip hidden dirs, __pycache__, venv, test dirs skip_dirs = {"__pycache__", ".git", "venv", "node_modules", ".hermes"} if any(s in dirpath for s in skip_dirs): continue for fname in filenames: if not fname.endswith(extensions): continue # Skip test files (they may legitimately have paths) if fname.startswith("test_") or "/tests/" in dirpath: continue fpath = os.path.join(dirpath, fname) violations = scan_file_for_violations(fpath) if violations: results.append((fpath, violations)) return results