Compare commits

..

4 Commits

Author SHA1 Message Date
4cdda8701d feat: integrate hardcoded path guard into tool dispatch
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Contributor Attribution Check / check-attribution (pull_request) Failing after 32s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 32s
Tests / e2e (pull_request) Successful in 2m56s
Tests / test (pull_request) Failing after 1h1m7s
2026-04-21 00:31:01 +00:00
a80d30b342 feat: add pre-commit hook for hardcoded path detection 2026-04-21 00:29:33 +00:00
f098cf8c4a feat: add hardcoded path guard module (#921)
- Detects /Users/, /home/, ~/ in tool arguments
- Source code scanner for CI/pre-commit
- Runtime guard for tool dispatch
- noqa: hardcoded-path-ok escape hatch

Closes #921
2026-04-21 00:29:12 +00:00
c6f2855745 fix: restore _format_error helper for test compatibility (#916)
Some checks failed
Docker Build and Publish / build-and-push (push) Has been skipped
Nix / nix (ubuntu-latest) (push) Failing after 2s
Tests / e2e (push) Successful in 2m47s
Tests / test (push) Failing after 27m41s
Build Skills Index / build-index (push) Has been skipped
Build Skills Index / deploy-with-index (push) Has been skipped
Nix / nix (macos-latest) (push) Has been cancelled
fix: restore _format_error helper for test compatibility (#916)
2026-04-20 23:56:27 +00:00
6 changed files with 226 additions and 186 deletions

View File

@@ -0,0 +1,78 @@
#!/usr/bin/env python3
"""
Pre-commit hook: Reject hardcoded home-directory paths.
Install:
cp pre-commit-hardcoded-path.py .git/hooks/pre-commit-hardcoded-path
chmod +x .git/hooks/pre-commit-hardcoded-path
Or add to .pre-commit-config.yaml
"""
import sys
import subprocess
import re
PATTERNS = [
(r"/Users/[\w.\-]+/", "macOS home directory"),
(r"/home/[\w.\-]+/", "Linux home directory"),
(r"(?<![\w/])~/", "unexpanded tilde"),
]
NOQA = re.compile(r"#\s*noqa:?\s*hardcoded-path-ok")
def get_staged_files():
result = subprocess.run(
["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"],
capture_output=True, text=True
)
return [f for f in result.stdout.strip().split("\n") if f.endswith(".py")]
def check_file(filepath):
try:
result = subprocess.run(
["git", "show", f":{filepath}"],
capture_output=True, text=True
)
content = result.stdout
except Exception:
return []
violations = []
for i, line in enumerate(content.split("\n"), 1):
if line.strip().startswith("#"):
continue
if line.strip().startswith(("import ", "from ")):
continue
if NOQA.search(line):
continue
for pattern, desc in PATTERNS:
if re.search(pattern, line):
violations.append((filepath, i, line.strip(), desc))
break
return violations
def main():
files = get_staged_files()
if not files:
sys.exit(0)
all_violations = []
for f in files:
all_violations.extend(check_file(f))
if all_violations:
print("ERROR: Hardcoded home directory paths detected:")
print()
for filepath, line_no, line, desc in all_violations:
print(f" {filepath}:{line_no}: {desc}")
print(f" {line[:100]}")
print()
print("Fix: Use $HOME, relative paths, or get_hermes_home().")
print("Override: Add '# noqa: hardcoded-path-ok' to the line.")
sys.exit(1)
sys.exit(0)
if __name__ == "__main__":
main()

View File

@@ -8,7 +8,6 @@ Handles loading and validating configuration for:
- Delivery preferences
"""
import ipaddress
import logging
import os
import json
@@ -680,26 +679,6 @@ def load_gateway_config() -> GatewayConfig:
return config
def _is_network_accessible(host: str) -> bool:
"""Return True if *host* would expose a server beyond the loopback interface.
Duplicates the logic in ``gateway.platforms.base.is_network_accessible``
without creating a circular import (base.py imports from this module).
"""
try:
addr = ipaddress.ip_address(host)
if addr.is_loopback:
return False
# ::ffff:127.x.x.x — Python's is_loopback returns False for
# IPv4-mapped loopback; unwrap and check the underlying IPv4.
if getattr(addr, "ipv4_mapped", None) and addr.ipv4_mapped.is_loopback:
return False
return True
except ValueError:
# Hostname: assume it could be network-accessible.
return True
def _validate_gateway_config(config: "GatewayConfig") -> None:
"""Validate and sanitize a loaded GatewayConfig in place.
@@ -768,22 +747,6 @@ def _validate_gateway_config(config: "GatewayConfig") -> None:
)
pconfig.enabled = False
# Warn when the API server is enabled on a network-accessible address
# without an auth key. The adapter will refuse to start anyway, but
# surfacing this at config-load time lets operators see the problem in
# the startup log before any platform adapter initialisation runs.
api_cfg = config.platforms.get(Platform.API_SERVER)
if api_cfg and api_cfg.enabled:
key = api_cfg.extra.get("key", "")
host = api_cfg.extra.get("host", "127.0.0.1")
if not key and _is_network_accessible(host):
logger.warning(
"API Server is enabled on %s but API_SERVER_KEY is not set. "
"The adapter will refuse to start on a network-accessible address. "
"Set API_SERVER_KEY or bind to 127.0.0.1 for local-only access.",
host,
)
def _apply_env_overrides(config: GatewayConfig) -> None:
"""Apply environment variable overrides to config."""

View File

@@ -28,6 +28,7 @@ from typing import Dict, Any, List, Optional, Tuple
from tools.registry import discover_builtin_tools, registry
from tools.tool_pokayoke import validate_tool_call, reset_circuit_breaker, get_hallucination_stats
from tools.hardcoded_path_guard import guard_tool_dispatch as _guard_hardcoded_paths
from toolsets import resolve_toolset, validate_toolset
from agent.tool_orchestrator import orchestrator
@@ -501,6 +502,12 @@ def handle_function_call(
# Prefer the caller-provided list so subagents can't overwrite
# the parent's tool set via the process-global.
sandbox_enabled = enabled_tools if enabled_tools is not None else _last_resolved_tool_names
# Poka-yoke #921: guard against hardcoded home-directory paths
_hardcoded_err = _guard_hardcoded_paths(function_name, function_args)
if _hardcoded_err:
logger.warning(f"Hardcoded path blocked: {function_name}")
return _hardcoded_err
# Poka-yoke: validate tool call before dispatch
is_valid, corrected_name, corrected_params, pokayoke_messages = validate_tool_call(function_name, function_args)
if not is_valid:

View File

@@ -10,7 +10,6 @@ from gateway.config import (
PlatformConfig,
SessionResetPolicy,
_apply_env_overrides,
_validate_gateway_config,
load_gateway_config,
)
@@ -295,151 +294,3 @@ class TestHomeChannelEnvOverrides:
home = config.platforms[platform].home_channel
assert home is not None, f"{platform.value}: home_channel should not be None"
assert (home.chat_id, home.name) == expected, platform.value
class TestValidateGatewayConfig:
"""Tests for _validate_gateway_config — in-place sanitisation of loaded config."""
# -- idle_minutes validation --
def test_idle_minutes_zero_is_corrected_to_default(self):
config = GatewayConfig()
config.default_reset_policy.idle_minutes = 0
_validate_gateway_config(config)
assert config.default_reset_policy.idle_minutes == 1440
def test_idle_minutes_negative_is_corrected_to_default(self):
config = GatewayConfig()
config.default_reset_policy.idle_minutes = -60
_validate_gateway_config(config)
assert config.default_reset_policy.idle_minutes == 1440
def test_idle_minutes_none_is_corrected_to_default(self):
config = GatewayConfig()
config.default_reset_policy.idle_minutes = None # type: ignore[assignment]
_validate_gateway_config(config)
assert config.default_reset_policy.idle_minutes == 1440
def test_valid_idle_minutes_is_unchanged(self):
config = GatewayConfig()
config.default_reset_policy.idle_minutes = 90
_validate_gateway_config(config)
assert config.default_reset_policy.idle_minutes == 90
# -- at_hour validation --
def test_at_hour_too_high_is_corrected_to_default(self):
config = GatewayConfig()
config.default_reset_policy.at_hour = 24
_validate_gateway_config(config)
assert config.default_reset_policy.at_hour == 4
def test_at_hour_negative_is_corrected_to_default(self):
config = GatewayConfig()
config.default_reset_policy.at_hour = -1
_validate_gateway_config(config)
assert config.default_reset_policy.at_hour == 4
def test_valid_at_hour_is_unchanged(self):
config = GatewayConfig()
config.default_reset_policy.at_hour = 3
_validate_gateway_config(config)
assert config.default_reset_policy.at_hour == 3
def test_at_hour_boundary_values_are_valid(self):
for valid_hour in (0, 23):
config = GatewayConfig()
config.default_reset_policy.at_hour = valid_hour
_validate_gateway_config(config)
assert config.default_reset_policy.at_hour == valid_hour
# -- empty-token warning (enabled platforms) --
def test_empty_string_token_logs_warning(self, caplog):
import logging
config = GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token=""),
}
)
with caplog.at_level(logging.WARNING, logger="gateway.config"):
_validate_gateway_config(config)
assert any(
"TELEGRAM_BOT_TOKEN" in r.message and "empty" in r.message
for r in caplog.records
)
def test_disabled_platform_with_empty_token_no_warning(self, caplog):
import logging
config = GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=False, token=""),
}
)
with caplog.at_level(logging.WARNING, logger="gateway.config"):
_validate_gateway_config(config)
assert not any("TELEGRAM_BOT_TOKEN" in r.message for r in caplog.records)
# -- API Server key / binding warnings --
def test_api_server_network_binding_without_key_logs_warning(self, caplog):
import logging
config = GatewayConfig(
platforms={
Platform.API_SERVER: PlatformConfig(
enabled=True,
extra={"host": "0.0.0.0"},
),
}
)
with caplog.at_level(logging.WARNING, logger="gateway.config"):
_validate_gateway_config(config)
assert any(
"API_SERVER_KEY" in r.message for r in caplog.records
)
def test_api_server_loopback_without_key_no_warning(self, caplog):
import logging
config = GatewayConfig(
platforms={
Platform.API_SERVER: PlatformConfig(
enabled=True,
extra={"host": "127.0.0.1"},
),
}
)
with caplog.at_level(logging.WARNING, logger="gateway.config"):
_validate_gateway_config(config)
assert not any(
"API_SERVER_KEY" in r.message for r in caplog.records
)
def test_api_server_network_binding_with_key_no_warning(self, caplog):
import logging
config = GatewayConfig(
platforms={
Platform.API_SERVER: PlatformConfig(
enabled=True,
extra={"host": "0.0.0.0", "key": "sk-real-key-here"},
),
}
)
with caplog.at_level(logging.WARNING, logger="gateway.config"):
_validate_gateway_config(config)
assert not any(
"API_SERVER_KEY" in r.message for r in caplog.records
)
def test_api_server_default_loopback_without_key_no_warning(self, caplog):
"""API server with no explicit host defaults to 127.0.0.1 — no warning."""
import logging
config = GatewayConfig(
platforms={
Platform.API_SERVER: PlatformConfig(enabled=True),
}
)
with caplog.at_level(logging.WARNING, logger="gateway.config"):
_validate_gateway_config(config)
assert not any(
"API_SERVER_KEY" in r.message for r in caplog.records
)

View File

@@ -0,0 +1,113 @@
#!/usr/bin/env python3
"""
Hardcoded Path Guard — Poka-Yoke #921
Detects and blocks hardcoded home-directory paths in tool arguments.
These paths work on one machine but break on others, VPS deployments,
or when HOME changes.
Usage:
from tools.hardcoded_path_guard import check_path, validate_tool_args
# Check a single path
err = check_path("/Users/apayne/.hermes/config.yaml")
# Validate all path-like args in a tool call
clean_args, warnings = validate_tool_args("read_file", {"path": "/home/user/file.txt"})
"""
import os
import re
import json as _json
from typing import Dict, List, Optional, Tuple, Any
# Patterns that indicate hardcoded home directories
HARDCODED_PATTERNS = [
(r"/Users/[\w.\-]+/", "macOS home directory (/Users/...)"),
(r"/home/[\w.\-]+/", "Linux home directory (/home/...)"),
(r"(?<![\w/])~/", "unexpanded tilde (~/)"),
(r"/root/", "root home directory (/root/)"),
]
_COMPILED_PATTERNS = [(re.compile(p), desc) for p, desc in HARDCODED_PATTERNS]
_NOQA_PATTERN = re.compile(r"#\s*noqa:?\s*hardcoded-path-ok")
_PATH_ARG_NAMES = frozenset({
"path", "file_path", "filepath", "dir", "directory", "dest", "source",
"input", "output", "src", "dst", "target", "location", "file",
"image_path", "script", "config", "log_file",
})
def has_hardcoded_path(text: str) -> Optional[str]:
if _NOQA_PATTERN.search(text):
return None
for pattern, desc in _COMPILED_PATTERNS:
if pattern.search(text):
return desc
return None
def check_path(path_value: str) -> Optional[str]:
if not isinstance(path_value, str):
return None
match_desc = has_hardcoded_path(path_value)
if match_desc:
return (
f"Path contains hardcoded home directory ({match_desc}): '{path_value}'. "
f"Use $HOME, relative paths, or get_hermes_home(). "
f"Add '# noqa: hardcoded-path-ok' if intentional."
)
return None
def validate_tool_args(tool_name: str, args: Dict[str, Any]) -> Tuple[Dict[str, Any], List[str]]:
warnings = []
for key, value in args.items():
if key.lower() not in _PATH_ARG_NAMES:
continue
if isinstance(value, str):
err = check_path(value)
if err:
warnings.append(err)
elif isinstance(value, list):
for item in value:
if isinstance(item, str):
err = check_path(item)
if err:
warnings.append(err)
return args, warnings
def scan_source_for_violations(source_code: str, filename: str = "") -> List[Tuple[int, str, str]]:
violations = []
lines = source_code.split("\n")
for i, line in enumerate(lines, 1):
stripped = line.strip()
if stripped.startswith("#"):
if _NOQA_PATTERN.search(line):
continue
continue
if stripped.startswith("import ") or stripped.startswith("from "):
continue
for pattern, desc in _COMPILED_PATTERNS:
match = pattern.search(line)
if match:
if _NOQA_PATTERN.search(line):
continue
violations.append((i, line.strip(), desc))
break
return violations
def guard_tool_dispatch(tool_name: str, args: Dict[str, Any]) -> Optional[str]:
_, warnings = validate_tool_args(tool_name, args)
if warnings:
return _json.dumps({
"error": "Hardcoded home directory path detected",
"details": warnings,
"suggestion": "Use $HOME, relative paths, or get_hermes_home() instead of hardcoded paths.",
"pokayoke": True,
"rule": "hardcoded-path-guard"
})
return None

View File

@@ -44,6 +44,34 @@ from typing import Dict, Any, Optional, Tuple
logger = logging.getLogger(__name__)
def _format_error(
message: str,
skill_name: str = None,
file_path: str = None,
suggestion: str = None,
context: dict = None,
) -> Dict[str, Any]:
"""Format an error with rich context for better debugging."""
parts = [message]
if skill_name:
parts.append(f"Skill: {skill_name}")
if file_path:
parts.append(f"File: {file_path}")
if suggestion:
parts.append(f"Suggestion: {suggestion}")
if context:
for key, value in context.items():
parts.append(f"{key}: {value}")
return {
"success": False,
"error": " | ".join(parts),
"skill_name": skill_name,
"file_path": file_path,
"suggestion": suggestion,
}
# Import security scanner — agent-created skills get the same scrutiny as
# community hub installs.
try: