diff --git a/agent/skill_utils.py b/agent/skill_utils.py index f7979122e..1ddbf2b34 100644 --- a/agent/skill_utils.py +++ b/agent/skill_utils.py @@ -268,7 +268,7 @@ def extract_skill_config_vars(frontmatter: Dict[str, Any]) -> List[Dict[str, Any config: - key: wiki.path description: Path to the LLM Wiki knowledge base directory - default: "~/wiki" + default: "~/wiki" # noqa: hardcoded-path-ok prompt: Wiki directory path Returns a list of dicts with keys: ``key``, ``description``, ``default``, diff --git a/environments/terminal_test_env/terminal_test_env.py b/environments/terminal_test_env/terminal_test_env.py index 4d151ee7b..797e5ff74 100644 --- a/environments/terminal_test_env/terminal_test_env.py +++ b/environments/terminal_test_env/terminal_test_env.py @@ -58,17 +58,17 @@ logger = logging.getLogger(__name__) TRAIN_TASKS = [ { "prompt": "Create a file at ~/greeting.txt containing exactly the text: Hello from Hermes Agent", - "verify_path": "~/greeting.txt", + "verify_path": "~/greeting.txt", # noqa: hardcoded-path-ok "expected_content": "Hello from Hermes Agent", }, { "prompt": "Create a file at ~/count.txt containing the numbers 1 through 5, one per line", - "verify_path": "~/count.txt", + "verify_path": "~/count.txt", # noqa: hardcoded-path-ok "expected_content": "1\n2\n3\n4\n5", }, { "prompt": "Create a file at ~/answer.txt containing the result of 123 + 456", - "verify_path": "~/answer.txt", + "verify_path": "~/answer.txt", # noqa: hardcoded-path-ok "expected_content": "579", }, ] @@ -76,7 +76,7 @@ TRAIN_TASKS = [ EVAL_TASKS = [ { "prompt": "Create a file at ~/result.txt containing the result of 6 * 7", - "verify_path": "~/result.txt", + "verify_path": "~/result.txt", # noqa: hardcoded-path-ok "expected_content": "42", }, ] diff --git a/hermes_cli/config.py b/hermes_cli/config.py index d06338aa1..a1967531c 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -619,7 +619,7 @@ DEFAULT_CONFIG = { # Each path is expanded (~, ${VAR}) and resolved. Read-only — skill creation # always goes to ~/.hermes/skills/. "skills": { - "external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"] + "external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"] # noqa: hardcoded-path-ok }, # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth. diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index b89a80409..4b11f1f7d 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -538,7 +538,7 @@ def run_doctor(args): _cmd_link_display = "$PREFIX/bin" else: _cmd_link_dir = Path.home() / ".local" / "bin" - _cmd_link_display = "~/.local/bin" + _cmd_link_display = "~/.local/bin" # noqa: hardcoded-path-ok _cmd_link = _cmd_link_dir / "hermes" if _venv_bin is None: diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 143860a69..86284f370 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -916,7 +916,7 @@ def _claude_code_only_status() -> Dict[str, Any]: return { "logged_in": True, "source": "claude_code_cli", - "source_label": "~/.claude/.credentials.json", + "source_label": "~/.claude/.credentials.json", # noqa: hardcoded-path-ok "token_preview": _truncate_token(creds.get("accessToken")), "expires_at": creds.get("expiresAt"), "has_refresh_token": bool(creds.get("refreshToken")), diff --git a/hooks/pre-commit-path-guard.py b/hooks/pre-commit-path-guard.py new file mode 100644 index 000000000..a56eca606 --- /dev/null +++ b/hooks/pre-commit-path-guard.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +""" +Pre-commit hook: Reject hardcoded home-directory paths. + +Scans staged Python files for patterns like: + - /Users//... + - /home//... + - ~/... (in string literals outside expanduser context) + +Escape hatch: add `# noqa: hardcoded-path-ok` to any legitimate line. + +Install: + cp hooks/pre-commit-path-guard.py .git/hooks/pre-commit + chmod +x .git/hooks/pre-commit +""" + +import subprocess +import sys +from pathlib import Path + +# Add project root to path so we can import path_guard +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from tools.path_guard import scan_file_for_violations + + +def get_staged_files(): + """Get list of staged .py files.""" + result = subprocess.run( + ["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"], + capture_output=True, text=True + ) + return [f for f in result.stdout.strip().splitlines() if f.endswith(".py")] + + +def main(): + files = get_staged_files() + if not files: + sys.exit(0) + + all_violations = [] + for filepath in files: + if not Path(filepath).exists(): + continue + violations = scan_file_for_violations(filepath) + if violations: + all_violations.append((filepath, violations)) + + if all_violations: + print("\n❌ HARDCODED PATH DETECTED — commit rejected") + print("=" * 60) + for filepath, violations in all_violations: + print(f"\n {filepath}:") + for lineno, line, pattern, suggestion in violations: + print(f" Line {lineno}: {line[:80]}") + print(f" Pattern: {pattern}") + print(f" Fix: {suggestion}") + print("\n" + "=" * 60) + print("Options:") + print(" 1. Use get_hermes_home(), os.environ['HOME'], or relative paths") + print(" 2. Add # noqa: hardcoded-path-ok to the line for legitimate cases") + print("") + sys.exit(1) + + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py b/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py index beb32aba2..b12a14072 100644 --- a/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py +++ b/optional-skills/migration/openclaw-migration/scripts/openclaw_to_hermes.py @@ -2725,7 +2725,7 @@ def main() -> int: seen_kinds.add(label) dest = item.get("destination") or "" if dest.startswith(str(report["target_root"])): - dest = "~/.hermes/" + dest[len(str(report["target_root"])) + 1:] + dest = "~/.hermes/" + dest[len(str(report["target_root"])) + 1:] # noqa: hardcoded-path-ok meta = MIGRATION_OPTION_METADATA.get(label, {}) display = meta.get("label", label) print(f" ✔ {display:<35s} -> {dest}") diff --git a/plugins/memory/mem0_local/__init__.py b/plugins/memory/mem0_local/__init__.py index 66470630e..decfb4f11 100644 --- a/plugins/memory/mem0_local/__init__.py +++ b/plugins/memory/mem0_local/__init__.py @@ -173,7 +173,7 @@ class Mem0LocalProvider(MemoryProvider): def get_config_schema(self): return [ - {"key": "storage_path", "description": "Storage directory for ChromaDB", "default": "~/.hermes/mem0-local/"}, + {"key": "storage_path", "description": "Storage directory for ChromaDB", "default": "~/.hermes/mem0-local/"}, # noqa: hardcoded-path-ok {"key": "collection_prefix", "description": "Collection name prefix", "default": "mem0"}, {"key": "max_memories", "description": "Maximum stored memories", "default": "10000"}, ] diff --git a/scripts/lint_hardcoded_paths.py b/scripts/lint_hardcoded_paths.py index 74629bc80..11815e811 100644 --- a/scripts/lint_hardcoded_paths.py +++ b/scripts/lint_hardcoded_paths.py @@ -1,277 +1,49 @@ #!/usr/bin/env python3 """ -Poka-yoke: Hardcoded path linter for hermes-agent. - -Scans Python files for hardcoded home-directory paths that break -multi-user/multi-profile deployments. Catches: - - Path.home() / ".hermes" without HERMES_HOME env var fallback - - Hardcoded /Users// paths - - Hardcoded /home// paths - - Raw ~/.hermes in code (not in comments/docstrings) +CI Lint: Scan for hardcoded home-directory paths. Usage: - python3 scripts/lint_hardcoded_paths.py # lint all .py files - python3 scripts/lint_hardcoded_paths.py --fix # suggest fixes - python3 scripts/lint_hardcoded_paths.py --staged # lint git staged files only + python3 scripts/lint_hardcoded_paths.py [--fix] [directory] Exit codes: - 0 = no violations - 1 = violations found - 2 = error + 0 — no violations + 1 — violations found """ import argparse -import os -import re -import subprocess import sys from pathlib import Path +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from tools.path_guard import scan_directory -# ── Patterns ────────────────────────────────────────────────────── - -VIOLATIONS = [ - { - "id": "direct-home-hermes", - "name": "Direct Path.home()/.hermes", - "pattern": r'Path\.home\(\)\s*/\s*["\']\.hermes["\']', - "exclude_with": r'os\.getenv\(|os\.environ\.get\(|_get_profiles_root|profiles_parent|current_default|native_home', - "message": "Use `Path(os.getenv('HERMES_HOME', Path.home() / '.hermes'))` instead of direct `Path.home() / '.hermes'`", - }, - { - "id": "hardcoded-user-path", - "name": "Hardcoded /Users//", - "pattern": r'["\']/Users/[a-zA-Z_][a-zA-Z0-9_]*/', - "exclude_with": r'#|""".*"""\s*$', - "message": "Use environment variables or relative paths instead of hardcoded /Users//", - }, - { - "id": "hardcoded-home-path", - "name": "Hardcoded /home//", - "pattern": r'["\']/home/[a-zA-Z_][a-zA-Z0-9_]*/', - "exclude_with": r'#|""".*"""\s*$', - "message": "Use environment variables or relative paths instead of hardcoded /home//", - }, - { - "id": "expanduser-hermes", - "name": "os.path.expanduser ~/.hermes (non-fallback)", - "pattern": r'os\.path\.expanduser\(["\']~/.hermes', - "exclude_with": r'#', - "message": "Use `os.environ.get('HERMES_HOME', os.path.expanduser('~/.hermes'))` instead", - }, -] - - -# ── Exceptions ───────────────────────────────────────────────────── -# Files where hardcoded paths are acceptable (tests with mock data, -# migration scripts, docs generation) - -EXCEPTIONS = [ - "tests/", # Test fixtures can use mock paths - "scripts/", # One-off scripts - "optional-skills/", # Skills not in core - "skills/", # External skills - "plugins/", # Plugins - "website/", # Docs site - "mcp_serve.py", # Standalone MCP server - "docs/", # Documentation -] - - -# ── Scanner ──────────────────────────────────────────────────────── - -def is_exception(filepath: str) -> bool: - """Check if file is in the exception list.""" - for exc in EXCEPTIONS: - if filepath.startswith(exc) or f"/{exc}" in filepath: - return True - return False - - -def is_in_comment_or_docstring(line: str, lines: list, line_idx: int) -> bool: - """Check if the match is in a comment or docstring.""" - stripped = line.strip() - - # Line comment - if stripped.startswith("#"): - return True - - # Inline comment — check if match is after # - if "#" in line: - code_part = line[:line.index("#")] - for v in VIOLATIONS: - if re.search(v["pattern"], code_part): - return False # Match is in code, not comment - return True # No match in code part, must be in comment - - # Simple docstring check: look for triple quotes before this line - in_docstring = False - quote_count = 0 - for i in range(max(0, line_idx - 20), line_idx + 1): - for char in ['"""', "'''"]: - quote_count += lines[i].count(char) - if quote_count % 2 == 1: - in_docstring = True - - # Also check current line for docstring delimiters - if '"""' in line or "'''" in line: - # If line is entirely within a docstring block, skip - before_match = line[:line.find(re.search(VIOLATIONS[0]["pattern"], line).group())] if re.search(VIOLATIONS[0]["pattern"], line) else "" - if '"""' in before_match or "'''" in before_match: - in_docstring = True - - return in_docstring - - -def scan_file(filepath: str) -> list: - """Scan a single file for violations.""" - try: - with open(filepath) as f: - content = f.read() - lines = content.split("\n") - except (OSError, UnicodeDecodeError): - return [] - - violations_found = [] - - for i, line in enumerate(lines): - for v in VIOLATIONS: - match = re.search(v["pattern"], line) - if not match: - continue - - # Check if excluded by context (e.g., it's part of a fallback pattern) - if v.get("exclude_with"): - if re.search(v["exclude_with"], line): - continue - - # Skip comments and docstrings - stripped = line.strip() - if stripped.startswith("#"): - continue - - # Check if in inline comment - if "#" in line: - code_part = line[:line.index("#")] - if not re.search(v["pattern"], code_part): - continue - - violations_found.append({ - "file": filepath, - "line": i + 1, - "rule": v["id"], - "name": v["name"], - "message": v["message"], - "text": stripped[:120], - }) - - return violations_found - - -def get_staged_files() -> list: - """Get list of staged Python files from git.""" - try: - result = subprocess.run( - ["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"], - capture_output=True, text=True, timeout=10 - ) - return [f for f in result.stdout.strip().split("\n") if f.endswith(".py")] - except (subprocess.TimeoutExpired, FileNotFoundError): - return [] - - -def scan_all(root: str = ".") -> list: - """Scan all Python files in the repo.""" - all_violations = [] - for dirpath, dirnames, filenames in os.walk(root): - dirnames[:] = [d for d in dirnames if d not in (".git", "venv", "__pycache__", "node_modules")] - for f in filenames: - if not f.endswith(".py"): - continue - filepath = os.path.join(dirpath, f) - rel = os.path.relpath(filepath, root) - - if is_exception(rel): - continue - - all_violations.extend(scan_file(filepath)) - - return all_violations - - -# ── Output ───────────────────────────────────────────────────────── - -def print_violations(violations: list) -> None: - """Print violations in a readable format.""" - if not violations: - print("PASS: No hardcoded path violations found") - return - - print(f"FAIL: {len(violations)} hardcoded path violation(s) found\n") - - by_rule = {} - for v in violations: - by_rule.setdefault(v["rule"], []).append(v) - - for rule, items in sorted(by_rule.items()): - print(f" [{rule}] {items[0]['name']}") - print(f" {items[0]['message']}") - for item in items: - print(f" {item['file']}:{item['line']}: {item['text']}") - print() - - -def print_fix_suggestions(violations: list) -> None: - """Print fix suggestions for violations.""" - if not violations: - return - - print("\n=== Fix Suggestions ===\n") - - for v in violations: - print(f" {v['file']}:{v['line']}") - print(f" Current: {v['text']}") - - if v["rule"] == "direct-home-hermes": - print(f" Fix: Use `Path(os.getenv('HERMES_HOME', Path.home() / '.hermes'))`") - elif v["rule"] in ("hardcoded-user-path", "hardcoded-home-path"): - print(f" Fix: Use `os.environ.get('HOME')` or `Path.home()`") - elif v["rule"] == "expanduser-hermes": - print(f" Fix: Use `os.environ.get('HERMES_HOME', os.path.expanduser('~/.hermes'))`") - print() - - -# ── Main ─────────────────────────────────────────────────────────── def main(): - parser = argparse.ArgumentParser(description="Lint hardcoded paths in hermes-agent") - parser.add_argument("--staged", action="store_true", help="Only scan git staged files") + parser = argparse.ArgumentParser(description="Lint for hardcoded home-directory paths") + parser.add_argument("directory", nargs="?", default=".", help="Directory to scan") parser.add_argument("--fix", action="store_true", help="Show fix suggestions") - parser.add_argument("--json", action="store_true", help="Output as JSON") - parser.add_argument("--root", default=".", help="Root directory to scan") args = parser.parse_args() - if args.staged: - files = get_staged_files() - if not files: - print("No staged Python files") - sys.exit(0) - violations = [] - for f in files: - if not is_exception(f): - violations.extend(scan_file(f)) - else: - violations = scan_all(args.root) + results = scan_directory(args.directory) - if args.json: - import json - print(json.dumps(violations, indent=2)) - else: - print_violations(violations) - if args.fix: - print_fix_suggestions(violations) + if not results: + print("✅ No hardcoded path violations found.") + sys.exit(0) - sys.exit(1 if violations else 0) + total = sum(len(v) for _, v in results) + print(f"\n❌ {total} hardcoded path violation(s) in {len(results)} file(s):") + print("=" * 60) + + for filepath, violations in results: + print(f"\n {filepath}:") + for lineno, line, pattern, suggestion in violations: + print(f" L{lineno}: {line[:80].strip()}") + if args.fix: + print(f" → {suggestion}") + + print("\n" + "=" * 60) + print("Escape hatch: add # noqa: hardcoded-path-ok to legitimate lines") + sys.exit(1) if __name__ == "__main__": diff --git a/skills/red-teaming/godmode/scripts/auto_jailbreak.py b/skills/red-teaming/godmode/scripts/auto_jailbreak.py index 0b17de509..cfb9167ed 100644 --- a/skills/red-teaming/godmode/scripts/auto_jailbreak.py +++ b/skills/red-teaming/godmode/scripts/auto_jailbreak.py @@ -7,7 +7,7 @@ finds what works, and locks it in by writing config.yaml + prefill.json. Usage in execute_code: exec(open(os.path.expanduser( - "~/.hermes/skills/red-teaming/godmode/scripts/auto_jailbreak.py" + "~/.hermes/skills/red-teaming/godmode/scripts/auto_jailbreak.py" # noqa: hardcoded-path-ok )).read()) result = auto_jailbreak() # Uses current model from config diff --git a/skills/red-teaming/godmode/scripts/load_godmode.py b/skills/red-teaming/godmode/scripts/load_godmode.py index f8bf31acf..7bc1154a9 100644 --- a/skills/red-teaming/godmode/scripts/load_godmode.py +++ b/skills/red-teaming/godmode/scripts/load_godmode.py @@ -3,7 +3,7 @@ Loader for G0DM0D3 scripts. Handles the exec-scoping issues. Usage in execute_code: exec(open(os.path.expanduser( - "~/.hermes/skills/red-teaming/godmode/scripts/load_godmode.py" + "~/.hermes/skills/red-teaming/godmode/scripts/load_godmode.py" # noqa: hardcoded-path-ok )).read()) # Now all functions are available: diff --git a/skills/red-teaming/godmode/scripts/parseltongue.py b/skills/red-teaming/godmode/scripts/parseltongue.py index ba891c6ac..e35813a49 100644 --- a/skills/red-teaming/godmode/scripts/parseltongue.py +++ b/skills/red-teaming/godmode/scripts/parseltongue.py @@ -11,7 +11,7 @@ Usage: python parseltongue.py "How do I hack a WiFi network?" --tier standard # As a module in execute_code - exec(open("~/.hermes/skills/red-teaming/godmode/scripts/parseltongue.py").read()) + exec(open("~/.hermes/skills/red-teaming/godmode/scripts/parseltongue.py").read()) # noqa: hardcoded-path-ok variants = generate_variants("How do I hack a WiFi network?", tier="standard") """ diff --git a/tests/test_path_guard.py b/tests/test_path_guard.py new file mode 100644 index 000000000..fa08aad9d --- /dev/null +++ b/tests/test_path_guard.py @@ -0,0 +1,127 @@ +"""Tests for tools/path_guard.py — poka-yoke hardcoded path detection.""" + +import os +import tempfile +from pathlib import Path + +import pytest + +from tools.path_guard import ( + PathGuardError, + scan_directory, + scan_file_for_violations, + validate_path, + validate_tool_paths, +) + + +class TestValidatePath: + """Runtime path validation.""" + + def test_valid_relative_path(self): + assert validate_path("tools/file_tools.py") == "tools/file_tools.py" + + def test_valid_absolute_path(self): + assert validate_path("/tmp/test.txt") == "/tmp/test.txt" + + def test_valid_hermes_home(self): + assert validate_path(os.path.expanduser("~/.hermes/config.yaml")) is not None + + def test_reject_users_hardcoded(self): + with pytest.raises(PathGuardError, match="/Users/"): + validate_path("/Users/someone_else/.hermes/config") + + def test_reject_home_hardcoded(self): + with pytest.raises(PathGuardError, match="/home/"): + validate_path("/home/user/.hermes/config") + + def test_empty_path(self): + assert validate_path("") == "" + assert validate_path(None) is None + + def test_non_string(self): + assert validate_path(42) == 42 + + +class TestValidateToolPaths: + """Batch path validation.""" + + def test_all_valid(self): + paths = ["tools/file.py", "/tmp/x.txt", "relative/path.py"] + assert validate_tool_paths(paths) == paths + + def test_mixed_invalid(self): + with pytest.raises(PathGuardError): + validate_tool_paths(["tools/file.py", "/Users/someone_else/secret.txt"]) + + def test_skips_non_strings(self): + assert validate_tool_paths([None, 42, "valid.py"]) == ["valid.py"] + + +class TestScanFileForViolations: + """Static file scanning.""" + + def test_clean_file(self, tmp_path): + f = tmp_path / "clean.py" + f.write_text("import os\nHOME = os.environ['HOME']\n") + assert scan_file_for_violations(str(f)) == [] + + def test_hardcoded_users(self, tmp_path): + f = tmp_path / "bad.py" + f.write_text("CONFIG = '/Users/apayne/.hermes/config.yaml'\n") + violations = scan_file_for_violations(str(f)) + assert len(violations) == 1 + assert "/Users//" in violations[0][2] + + def test_hardcoded_home(self, tmp_path): + f = tmp_path / "bad2.py" + f.write_text("PATH = '/home/deploy/.hermes/state.db'\n") + violations = scan_file_for_violations(str(f)) + assert len(violations) == 1 + assert "/home//" in violations[0][2] + + def test_tilde_in_expanduser_ok(self, tmp_path): + f = tmp_path / "ok.py" + f.write_text("p = os.path.expanduser('~/.hermes/config')\n") + assert scan_file_for_violations(str(f)) == [] + + def test_tilde_in_display_ok(self, tmp_path): + f = tmp_path / "ok2.py" + f.write_text('print("~/config saved")\n') + assert scan_file_for_violations(str(f)) == [] + + def test_noqa_escape(self, tmp_path): + f = tmp_path / "noqa.py" + f.write_text("PATH = '/Users/apayne/test' # noqa: hardcoded-path-ok\n") + assert scan_file_for_violations(str(f)) == [] + + def test_comments_skipped(self, tmp_path): + f = tmp_path / "comment.py" + f.write_text("# PATH = '/Users/apayne/test'\n") + assert scan_file_for_violations(str(f)) == [] + + +class TestScanDirectory: + """Directory scanning.""" + + def test_clean_tree(self, tmp_path): + (tmp_path / "clean.py").write_text("import os\n") + (tmp_path / "sub").mkdir() + (tmp_path / "sub" / "also_clean.py").write_text("x = 1\n") + assert scan_directory(str(tmp_path)) == [] + + def test_finds_violations(self, tmp_path): + (tmp_path / "bad.py").write_text("P = '/Users/x/.hermes'\n") + results = scan_directory(str(tmp_path)) + assert len(results) == 1 + assert results[0][0].endswith("bad.py") + + def test_skips_tests(self, tmp_path): + (tmp_path / "test_something.py").write_text("P = '/Users/x/.hermes'\n") + assert scan_directory(str(tmp_path)) == [] + + def test_skips_pycache(self, tmp_path): + cache = tmp_path / "__pycache__" + cache.mkdir() + (cache / "cached.py").write_text("P = '/Users/x/.hermes'\n") + assert scan_directory(str(tmp_path)) == [] diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index bed4f2091..900341ff2 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -916,6 +916,25 @@ def execute_code( if not code or not code.strip(): return tool_error("No code provided.") + # Poka-yoke: scan code for hardcoded home-directory paths + try: + from tools.path_guard import scan_file_for_violations + import tempfile as _tf + _tmp = _tf.NamedTemporaryFile(mode="w", suffix=".py", delete=False) + _tmp.write(code) + _tmp.close() + _violations = scan_file_for_violations(_tmp.name) + os.unlink(_tmp.name) + if _violations: + _msgs = [f"Line {ln}: {line[:60]} ({pat})" for ln, line, pat, _ in _violations] + return tool_error( + f"Code contains hardcoded home-directory path(s):\n " + + "\n ".join(_msgs) + + "\nUse os.environ['HOME'], os.path.expanduser('~'), or get_hermes_home()." + ) + except Exception: + pass # Guard is best-effort; don't block execution on scan failure + # Dispatch: remote backends use file-based RPC, local uses UDS from tools.terminal_tool import _get_env_config env_type = _get_env_config()["env_type"] diff --git a/tools/file_tools.py b/tools/file_tools.py index ca2118c33..941f70cf1 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -540,6 +540,12 @@ def _check_file_staleness(filepath: str, task_id: str) -> str | None: def write_file_tool(path: str, content: str, task_id: str = "default") -> str: """Write content to a file.""" + # Poka-yoke: guard against hardcoded home-directory paths + try: + from tools.path_guard import validate_path + validate_path(path) + except Exception as guard_err: + return tool_error(str(guard_err)) sensitive_err = _check_sensitive_path(path) if sensitive_err: return tool_error(sensitive_err) diff --git a/tools/path_guard.py b/tools/path_guard.py new file mode 100644 index 000000000..b65ed0ece --- /dev/null +++ b/tools/path_guard.py @@ -0,0 +1,165 @@ +""" +tools/path_guard.py — Poka-yoke: Prevent hardcoded home-directory paths. + +Validates file paths before tool execution to prevent the latent defect +of hardcoded paths like /Users//, /home//, or ~/ in code +that gets committed or in runtime arguments. + +Usage: + from tools.path_guard import validate_path, scan_for_violations + + # Runtime check + validate_path("/Users/apayne/.hermes/config") # noqa: hardcoded-path-ok # raises PathGuardError + + # Pre-commit scan + violations = scan_for_violations("tools/file_tools.py") +""" + +import os +import re +from pathlib import Path +from typing import List, Tuple + +# ── Patterns ──────────────────────────────────────────────────────── + +# Matches hardcoded home-directory paths in string content +HARDCODED_PATH_PATTERNS = [ + # /Users//... (macOS) + (re.compile(r"""['"]/(Users)/[\w.-]+/"""), "/Users//"), + # /home//... (Linux) + (re.compile(r"""['"]/home/[\w.-]+/"""), "/home//"), + # Bare ~/... (unexpanded tilde in code — NOT in expanduser() calls) + (re.compile(r"""['"]~/[^'"]+['"]"""), "~/..."), # noqa: hardcoded-path-ok + # /root/... (Linux root home) + (re.compile(r"""['"]/root/['"]"""), "/root/"), # noqa: hardcoded-path-ok +] + +# Allowed contexts where ~/ is fine +SAFE_TILDE_CONTEXTS = re.compile( + r"""expanduser|display_path|relpath|os\.path|Path\(|str\(.*home|""" + r"""noqa:\s*hardcoded-path-ok|""" # explicit escape hatch + r"""\bprint\(|f['"]|\.format\(|""" # display/formatting contexts + r"""["']~/["']\s*$""", # just displaying ~/ as prefix + re.VERBOSE, +) + + +class PathGuardError(Exception): + """Raised when a hardcoded home-directory path is detected.""" + + def __init__(self, path: str, pattern_name: str, suggestion: str): + self.path = path + self.pattern_name = pattern_name + self.suggestion = suggestion + super().__init__( + f"Hardcoded path detected: {path} matches {pattern_name}. " + f"Suggestion: {suggestion}. " + f"Use get_hermes_home(), os.environ['HOME'], or annotate with " + f" # noqa: hardcoded-path-ok for legitimate cases." + ) + + +# ── Runtime Validation ────────────────────────────────────────────── + +def validate_path(path: str) -> str: + """ + Validate a file path for hardcoded home directories. + Returns the path if valid, raises PathGuardError if not. + + This is meant to be called in tool wrappers (write_file, execute_code) + before executing operations with user-supplied paths. + + Note: At runtime, paths from os.path.expanduser() will resolve to + /Users//... — this is expected and allowed. The guard catches + paths that were LITERALLY hardcoded in source code or tool arguments + that look like they came from a different machine (e.g., a path + containing a different username than the current user). + """ + if not path or not isinstance(path, str): + return path + + # At runtime, expanded paths matching current HOME are fine + home = os.environ.get("HOME", "") + if home and path.startswith(home): + return path + + # Check for hardcoded /Users// (macOS) — but not current user + if re.match(r"^/Users/[\w.-]+/", path): + raise PathGuardError( + path, "/Users//", + f"Use $HOME or os.path.expanduser('~') instead. " + f"Got: {path}" + ) + + # Check for hardcoded /home// (Linux) + if re.match(r"^/home/[\w.-]+/", path): + raise PathGuardError( + path, "/home//", + f"Use $HOME or os.path.expanduser('~') instead. " + f"Got: {path}" + ) + + return path + + +def validate_tool_paths(paths: list) -> list: + """ + Validate multiple paths (e.g., from tool arguments). + Returns validated list. Raises PathGuardError on first violation. + """ + return [validate_path(p) for p in paths if isinstance(p, str)] + + +# ── File Scanning (Pre-commit / CI) ──────────────────────────────── + +def scan_file_for_violations(filepath: str) -> List[Tuple[int, str, str, str]]: + """ + Scan a Python file for hardcoded home-directory path patterns. + Returns list of (line_number, line_content, pattern_name, suggestion). + """ + violations = [] + try: + with open(filepath) as f: + for lineno, line in enumerate(f, 1): + # Skip comments and noqa lines + stripped = line.strip() + if stripped.startswith("#"): + continue + if "noqa: hardcoded-path-ok" in line: + continue + + for pattern, name in HARDCODED_PATH_PATTERNS: + if pattern.search(line): + # Special case: ~/ in expanduser/display context is OK + if name == "~/..." and SAFE_TILDE_CONTEXTS.search(line): # noqa: hardcoded-path-ok + continue + violations.append((lineno, line.rstrip(), name, + f"Use get_hermes_home(), os.environ['HOME'], or add # noqa: hardcoded-path-ok")) + except (IOError, UnicodeDecodeError): + pass + return violations + + +def scan_directory(root: str, extensions: tuple = (".py",)) -> List[Tuple[str, List]]: + """ + Scan a directory tree for hardcoded path violations. + Returns list of (filepath, violations) tuples. + """ + results = [] + for dirpath, _, filenames in os.walk(root): + # Skip hidden dirs, __pycache__, venv, test dirs + skip_dirs = {"__pycache__", ".git", "venv", "node_modules", ".hermes"} + if any(s in dirpath for s in skip_dirs): + continue + + for fname in filenames: + if not fname.endswith(extensions): + continue + # Skip test files (they may legitimately have paths) + if fname.startswith("test_") or "/tests/" in dirpath: + continue + fpath = os.path.join(dirpath, fname) + violations = scan_file_for_violations(fpath) + if violations: + results.append((fpath, violations)) + return results