From 9e00a5979124d1ef1a35991ba39ad2fecf06ba61 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Tue, 21 Apr 2026 22:26:54 -0400 Subject: [PATCH] test: verify hardcoded-home path guard from burn/921 branch Cherry-picks tools/path_guard.py and tests/test_path_guard.py from burn/921-poka-yoke-hardcoded-paths (commit 5dcb905). All 21 tests pass: - hardcoded /Users// paths are rejected at runtime - hardcoded /home// paths are rejected at runtime - ~/.hermes/... via expanduser() passes (safe, expanded at runtime) - valid relative and /tmp/ absolute paths pass - static scanner catches violations and respects # noqa: hardcoded-path-ok - comments are skipped by scanner - directory scanner skips test files and __pycache__ Refs #962 Co-Authored-By: Claude Sonnet 4.6 --- tests/test_path_guard.py | 127 ++++++++++++++++++++++++++++++ tools/path_guard.py | 165 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 292 insertions(+) create mode 100644 tests/test_path_guard.py create mode 100644 tools/path_guard.py diff --git a/tests/test_path_guard.py b/tests/test_path_guard.py new file mode 100644 index 000000000..fa08aad9d --- /dev/null +++ b/tests/test_path_guard.py @@ -0,0 +1,127 @@ +"""Tests for tools/path_guard.py — poka-yoke hardcoded path detection.""" + +import os +import tempfile +from pathlib import Path + +import pytest + +from tools.path_guard import ( + PathGuardError, + scan_directory, + scan_file_for_violations, + validate_path, + validate_tool_paths, +) + + +class TestValidatePath: + """Runtime path validation.""" + + def test_valid_relative_path(self): + assert validate_path("tools/file_tools.py") == "tools/file_tools.py" + + def test_valid_absolute_path(self): + assert validate_path("/tmp/test.txt") == "/tmp/test.txt" + + def test_valid_hermes_home(self): + assert validate_path(os.path.expanduser("~/.hermes/config.yaml")) is not None + + def test_reject_users_hardcoded(self): + with pytest.raises(PathGuardError, match="/Users/"): + validate_path("/Users/someone_else/.hermes/config") + + def test_reject_home_hardcoded(self): + with pytest.raises(PathGuardError, match="/home/"): + validate_path("/home/user/.hermes/config") + + def test_empty_path(self): + assert validate_path("") == "" + assert validate_path(None) is None + + def test_non_string(self): + assert validate_path(42) == 42 + + +class TestValidateToolPaths: + """Batch path validation.""" + + def test_all_valid(self): + paths = ["tools/file.py", "/tmp/x.txt", "relative/path.py"] + assert validate_tool_paths(paths) == paths + + def test_mixed_invalid(self): + with pytest.raises(PathGuardError): + validate_tool_paths(["tools/file.py", "/Users/someone_else/secret.txt"]) + + def test_skips_non_strings(self): + assert validate_tool_paths([None, 42, "valid.py"]) == ["valid.py"] + + +class TestScanFileForViolations: + """Static file scanning.""" + + def test_clean_file(self, tmp_path): + f = tmp_path / "clean.py" + f.write_text("import os\nHOME = os.environ['HOME']\n") + assert scan_file_for_violations(str(f)) == [] + + def test_hardcoded_users(self, tmp_path): + f = tmp_path / "bad.py" + f.write_text("CONFIG = '/Users/apayne/.hermes/config.yaml'\n") + violations = scan_file_for_violations(str(f)) + assert len(violations) == 1 + assert "/Users//" in violations[0][2] + + def test_hardcoded_home(self, tmp_path): + f = tmp_path / "bad2.py" + f.write_text("PATH = '/home/deploy/.hermes/state.db'\n") + violations = scan_file_for_violations(str(f)) + assert len(violations) == 1 + assert "/home//" in violations[0][2] + + def test_tilde_in_expanduser_ok(self, tmp_path): + f = tmp_path / "ok.py" + f.write_text("p = os.path.expanduser('~/.hermes/config')\n") + assert scan_file_for_violations(str(f)) == [] + + def test_tilde_in_display_ok(self, tmp_path): + f = tmp_path / "ok2.py" + f.write_text('print("~/config saved")\n') + assert scan_file_for_violations(str(f)) == [] + + def test_noqa_escape(self, tmp_path): + f = tmp_path / "noqa.py" + f.write_text("PATH = '/Users/apayne/test' # noqa: hardcoded-path-ok\n") + assert scan_file_for_violations(str(f)) == [] + + def test_comments_skipped(self, tmp_path): + f = tmp_path / "comment.py" + f.write_text("# PATH = '/Users/apayne/test'\n") + assert scan_file_for_violations(str(f)) == [] + + +class TestScanDirectory: + """Directory scanning.""" + + def test_clean_tree(self, tmp_path): + (tmp_path / "clean.py").write_text("import os\n") + (tmp_path / "sub").mkdir() + (tmp_path / "sub" / "also_clean.py").write_text("x = 1\n") + assert scan_directory(str(tmp_path)) == [] + + def test_finds_violations(self, tmp_path): + (tmp_path / "bad.py").write_text("P = '/Users/x/.hermes'\n") + results = scan_directory(str(tmp_path)) + assert len(results) == 1 + assert results[0][0].endswith("bad.py") + + def test_skips_tests(self, tmp_path): + (tmp_path / "test_something.py").write_text("P = '/Users/x/.hermes'\n") + assert scan_directory(str(tmp_path)) == [] + + def test_skips_pycache(self, tmp_path): + cache = tmp_path / "__pycache__" + cache.mkdir() + (cache / "cached.py").write_text("P = '/Users/x/.hermes'\n") + assert scan_directory(str(tmp_path)) == [] diff --git a/tools/path_guard.py b/tools/path_guard.py new file mode 100644 index 000000000..b65ed0ece --- /dev/null +++ b/tools/path_guard.py @@ -0,0 +1,165 @@ +""" +tools/path_guard.py — Poka-yoke: Prevent hardcoded home-directory paths. + +Validates file paths before tool execution to prevent the latent defect +of hardcoded paths like /Users//, /home//, or ~/ in code +that gets committed or in runtime arguments. + +Usage: + from tools.path_guard import validate_path, scan_for_violations + + # Runtime check + validate_path("/Users/apayne/.hermes/config") # noqa: hardcoded-path-ok # raises PathGuardError + + # Pre-commit scan + violations = scan_for_violations("tools/file_tools.py") +""" + +import os +import re +from pathlib import Path +from typing import List, Tuple + +# ── Patterns ──────────────────────────────────────────────────────── + +# Matches hardcoded home-directory paths in string content +HARDCODED_PATH_PATTERNS = [ + # /Users//... (macOS) + (re.compile(r"""['"]/(Users)/[\w.-]+/"""), "/Users//"), + # /home//... (Linux) + (re.compile(r"""['"]/home/[\w.-]+/"""), "/home//"), + # Bare ~/... (unexpanded tilde in code — NOT in expanduser() calls) + (re.compile(r"""['"]~/[^'"]+['"]"""), "~/..."), # noqa: hardcoded-path-ok + # /root/... (Linux root home) + (re.compile(r"""['"]/root/['"]"""), "/root/"), # noqa: hardcoded-path-ok +] + +# Allowed contexts where ~/ is fine +SAFE_TILDE_CONTEXTS = re.compile( + r"""expanduser|display_path|relpath|os\.path|Path\(|str\(.*home|""" + r"""noqa:\s*hardcoded-path-ok|""" # explicit escape hatch + r"""\bprint\(|f['"]|\.format\(|""" # display/formatting contexts + r"""["']~/["']\s*$""", # just displaying ~/ as prefix + re.VERBOSE, +) + + +class PathGuardError(Exception): + """Raised when a hardcoded home-directory path is detected.""" + + def __init__(self, path: str, pattern_name: str, suggestion: str): + self.path = path + self.pattern_name = pattern_name + self.suggestion = suggestion + super().__init__( + f"Hardcoded path detected: {path} matches {pattern_name}. " + f"Suggestion: {suggestion}. " + f"Use get_hermes_home(), os.environ['HOME'], or annotate with " + f" # noqa: hardcoded-path-ok for legitimate cases." + ) + + +# ── Runtime Validation ────────────────────────────────────────────── + +def validate_path(path: str) -> str: + """ + Validate a file path for hardcoded home directories. + Returns the path if valid, raises PathGuardError if not. + + This is meant to be called in tool wrappers (write_file, execute_code) + before executing operations with user-supplied paths. + + Note: At runtime, paths from os.path.expanduser() will resolve to + /Users//... — this is expected and allowed. The guard catches + paths that were LITERALLY hardcoded in source code or tool arguments + that look like they came from a different machine (e.g., a path + containing a different username than the current user). + """ + if not path or not isinstance(path, str): + return path + + # At runtime, expanded paths matching current HOME are fine + home = os.environ.get("HOME", "") + if home and path.startswith(home): + return path + + # Check for hardcoded /Users// (macOS) — but not current user + if re.match(r"^/Users/[\w.-]+/", path): + raise PathGuardError( + path, "/Users//", + f"Use $HOME or os.path.expanduser('~') instead. " + f"Got: {path}" + ) + + # Check for hardcoded /home// (Linux) + if re.match(r"^/home/[\w.-]+/", path): + raise PathGuardError( + path, "/home//", + f"Use $HOME or os.path.expanduser('~') instead. " + f"Got: {path}" + ) + + return path + + +def validate_tool_paths(paths: list) -> list: + """ + Validate multiple paths (e.g., from tool arguments). + Returns validated list. Raises PathGuardError on first violation. + """ + return [validate_path(p) for p in paths if isinstance(p, str)] + + +# ── File Scanning (Pre-commit / CI) ──────────────────────────────── + +def scan_file_for_violations(filepath: str) -> List[Tuple[int, str, str, str]]: + """ + Scan a Python file for hardcoded home-directory path patterns. + Returns list of (line_number, line_content, pattern_name, suggestion). + """ + violations = [] + try: + with open(filepath) as f: + for lineno, line in enumerate(f, 1): + # Skip comments and noqa lines + stripped = line.strip() + if stripped.startswith("#"): + continue + if "noqa: hardcoded-path-ok" in line: + continue + + for pattern, name in HARDCODED_PATH_PATTERNS: + if pattern.search(line): + # Special case: ~/ in expanduser/display context is OK + if name == "~/..." and SAFE_TILDE_CONTEXTS.search(line): # noqa: hardcoded-path-ok + continue + violations.append((lineno, line.rstrip(), name, + f"Use get_hermes_home(), os.environ['HOME'], or add # noqa: hardcoded-path-ok")) + except (IOError, UnicodeDecodeError): + pass + return violations + + +def scan_directory(root: str, extensions: tuple = (".py",)) -> List[Tuple[str, List]]: + """ + Scan a directory tree for hardcoded path violations. + Returns list of (filepath, violations) tuples. + """ + results = [] + for dirpath, _, filenames in os.walk(root): + # Skip hidden dirs, __pycache__, venv, test dirs + skip_dirs = {"__pycache__", ".git", "venv", "node_modules", ".hermes"} + if any(s in dirpath for s in skip_dirs): + continue + + for fname in filenames: + if not fname.endswith(extensions): + continue + # Skip test files (they may legitimately have paths) + if fname.startswith("test_") or "/tests/" in dirpath: + continue + fpath = os.path.join(dirpath, fname) + violations = scan_file_for_violations(fpath) + if violations: + results.append((fpath, violations)) + return results