Compare commits
4 Commits
fix/839-17
...
fix/921-ha
| Author | SHA1 | Date | |
|---|---|---|---|
| 4cdda8701d | |||
| a80d30b342 | |||
| f098cf8c4a | |||
| c6f2855745 |
78
.githooks/pre-commit-hardcoded-path.py
Normal file
78
.githooks/pre-commit-hardcoded-path.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Pre-commit hook: Reject hardcoded home-directory paths.
|
||||
|
||||
Install:
|
||||
cp pre-commit-hardcoded-path.py .git/hooks/pre-commit-hardcoded-path
|
||||
chmod +x .git/hooks/pre-commit-hardcoded-path
|
||||
|
||||
Or add to .pre-commit-config.yaml
|
||||
"""
|
||||
|
||||
import sys
|
||||
import subprocess
|
||||
import re
|
||||
|
||||
PATTERNS = [
|
||||
(r"/Users/[\w.\-]+/", "macOS home directory"),
|
||||
(r"/home/[\w.\-]+/", "Linux home directory"),
|
||||
(r"(?<![\w/])~/", "unexpanded tilde"),
|
||||
]
|
||||
|
||||
NOQA = re.compile(r"#\s*noqa:?\s*hardcoded-path-ok")
|
||||
|
||||
def get_staged_files():
|
||||
result = subprocess.run(
|
||||
["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
return [f for f in result.stdout.strip().split("\n") if f.endswith(".py")]
|
||||
|
||||
def check_file(filepath):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "show", f":{filepath}"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
content = result.stdout
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
violations = []
|
||||
for i, line in enumerate(content.split("\n"), 1):
|
||||
if line.strip().startswith("#"):
|
||||
continue
|
||||
if line.strip().startswith(("import ", "from ")):
|
||||
continue
|
||||
if NOQA.search(line):
|
||||
continue
|
||||
for pattern, desc in PATTERNS:
|
||||
if re.search(pattern, line):
|
||||
violations.append((filepath, i, line.strip(), desc))
|
||||
break
|
||||
return violations
|
||||
|
||||
def main():
|
||||
files = get_staged_files()
|
||||
if not files:
|
||||
sys.exit(0)
|
||||
|
||||
all_violations = []
|
||||
for f in files:
|
||||
all_violations.extend(check_file(f))
|
||||
|
||||
if all_violations:
|
||||
print("ERROR: Hardcoded home directory paths detected:")
|
||||
print()
|
||||
for filepath, line_no, line, desc in all_violations:
|
||||
print(f" {filepath}:{line_no}: {desc}")
|
||||
print(f" {line[:100]}")
|
||||
print()
|
||||
print("Fix: Use $HOME, relative paths, or get_hermes_home().")
|
||||
print("Override: Add '# noqa: hardcoded-path-ok' to the line.")
|
||||
sys.exit(1)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -28,6 +28,7 @@ from typing import Dict, Any, List, Optional, Tuple
|
||||
|
||||
from tools.registry import discover_builtin_tools, registry
|
||||
from tools.tool_pokayoke import validate_tool_call, reset_circuit_breaker, get_hallucination_stats
|
||||
from tools.hardcoded_path_guard import guard_tool_dispatch as _guard_hardcoded_paths
|
||||
from toolsets import resolve_toolset, validate_toolset
|
||||
from agent.tool_orchestrator import orchestrator
|
||||
|
||||
@@ -501,6 +502,12 @@ def handle_function_call(
|
||||
# Prefer the caller-provided list so subagents can't overwrite
|
||||
# the parent's tool set via the process-global.
|
||||
sandbox_enabled = enabled_tools if enabled_tools is not None else _last_resolved_tool_names
|
||||
# Poka-yoke #921: guard against hardcoded home-directory paths
|
||||
_hardcoded_err = _guard_hardcoded_paths(function_name, function_args)
|
||||
if _hardcoded_err:
|
||||
logger.warning(f"Hardcoded path blocked: {function_name}")
|
||||
return _hardcoded_err
|
||||
|
||||
# Poka-yoke: validate tool call before dispatch
|
||||
is_valid, corrected_name, corrected_params, pokayoke_messages = validate_tool_call(function_name, function_args)
|
||||
if not is_valid:
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
"""
|
||||
Tests for credential redaction
|
||||
|
||||
Issue: #839
|
||||
"""
|
||||
|
||||
import unittest
|
||||
from tools.credential_redact import (
|
||||
CredentialRedactor,
|
||||
redact_credentials,
|
||||
redact_tool_output,
|
||||
should_mask_file,
|
||||
mask_sensitive_file,
|
||||
)
|
||||
|
||||
|
||||
class TestCredentialRedaction(unittest.TestCase):
|
||||
|
||||
def test_openai_key(self):
|
||||
text = "api_key=sk-abc123def456ghi789jkl012mno"
|
||||
redacted, count = redact_credentials(text)
|
||||
self.assertGreater(count, 0)
|
||||
self.assertIn("REDACTED", redacted)
|
||||
self.assertNotIn("sk-abc123", redacted)
|
||||
|
||||
def test_github_token(self):
|
||||
text = "token: ghp_1234567890abcdef1234567890abcdef12345678"
|
||||
redacted, count = redact_credentials(text)
|
||||
self.assertGreater(count, 0)
|
||||
self.assertIn("REDACTED", redacted)
|
||||
|
||||
def test_bearer_token(self):
|
||||
text = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
|
||||
redacted, count = redact_credentials(text)
|
||||
self.assertGreater(count, 0)
|
||||
self.assertIn("REDACTED", redacted)
|
||||
|
||||
def test_password(self):
|
||||
text = "password: mySecretPassword123"
|
||||
redacted, count = redact_credentials(text)
|
||||
self.assertGreater(count, 0)
|
||||
self.assertIn("REDACTED", redacted)
|
||||
|
||||
def test_aws_key(self):
|
||||
text = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE"
|
||||
redacted, count = redact_credentials(text)
|
||||
self.assertGreater(count, 0)
|
||||
self.assertIn("REDACTED", redacted)
|
||||
|
||||
def test_database_url(self):
|
||||
text = "DATABASE_URL=postgres://user:pass@localhost/db"
|
||||
redacted, count = redact_credentials(text)
|
||||
self.assertGreater(count, 0)
|
||||
self.assertIn("REDACTED", redacted)
|
||||
|
||||
def test_clean_text_unchanged(self):
|
||||
text = "Hello world, this is a normal message"
|
||||
redacted, count = redact_credentials(text)
|
||||
self.assertEqual(count, 0)
|
||||
self.assertEqual(redacted, text)
|
||||
|
||||
def test_multiple_credentials(self):
|
||||
text = "key1=sk-abc123def456ghi789jkl012mno and token: ghp_1234567890abcdef1234567890abcdef12345678"
|
||||
redacted, count = redact_credentials(text)
|
||||
self.assertGreaterEqual(count, 2)
|
||||
|
||||
|
||||
class TestToolOutputRedaction(unittest.TestCase):
|
||||
|
||||
def test_redaction_notice(self):
|
||||
output = "Running with key sk-abc123def456ghi789jkl012mno"
|
||||
redacted, notice = redact_tool_output("terminal", output)
|
||||
self.assertIn("REDACTED", notice)
|
||||
self.assertIn("terminal", notice)
|
||||
|
||||
def test_no_notice_when_clean(self):
|
||||
output = "Hello world"
|
||||
redacted, notice = redact_tool_output("terminal", output)
|
||||
self.assertEqual(notice, "")
|
||||
|
||||
|
||||
class TestSensitiveFileMasking(unittest.TestCase):
|
||||
|
||||
def test_env_file_detected(self):
|
||||
self.assertTrue(should_mask_file("/path/to/.env"))
|
||||
self.assertTrue(should_mask_file("/path/to/.env.local"))
|
||||
self.assertTrue(should_mask_file("/path/to/config.yaml"))
|
||||
|
||||
def test_normal_file_not_detected(self):
|
||||
self.assertFalse(should_mask_file("/path/to/readme.md"))
|
||||
self.assertFalse(should_mask_file("/path/to/code.py"))
|
||||
|
||||
def test_mask_env_file(self):
|
||||
content = "API_KEY=sk-abc123\nDATABASE_URL=postgres://u:p@h/d\nNORMAL=value"
|
||||
masked = mask_sensitive_file(content, ".env")
|
||||
self.assertIn("[REDACTED]", masked)
|
||||
self.assertIn("NORMAL=value", masked)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,183 +0,0 @@
|
||||
"""
|
||||
Credential Redaction — Block silent credential exposure in tool outputs
|
||||
|
||||
Poka-yoke: Prevent API keys, tokens, passwords from leaking into context.
|
||||
|
||||
Issue: #839
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
HERMES_HOME = Path.home() / ".hermes"
|
||||
AUDIT_DIR = HERMES_HOME / "audit"
|
||||
|
||||
# Credential patterns to detect and redact
|
||||
CREDENTIAL_PATTERNS = [
|
||||
# API keys
|
||||
(r"sk-[a-zA-Z0-9]{20,}", "[REDACTED: OpenAI API key]"),
|
||||
(r"sk-ant-[a-zA-Z0-9-]{20,}", "[REDACTED: Anthropic API key]"),
|
||||
(r"ghp_[a-zA-Z0-9]{36}", "[REDACTED: GitHub token]"),
|
||||
(r"gho_[a-zA-Z0-9]{36}", "[REDACTED: GitHub OAuth token]"),
|
||||
(r"glpat-[a-zA-Z0-9-]{20,}", "[REDACTED: GitLab token]"),
|
||||
|
||||
# Bearer tokens
|
||||
(r"Bearer\s+[a-zA-Z0-9._-]{20,}", "[REDACTED: Bearer token]"),
|
||||
(r"bearer\s+[a-zA-Z0-9._-]{20,}", "[REDACTED: Bearer token]"),
|
||||
|
||||
# Generic tokens/passwords
|
||||
(r"(?:token|TOKEN|Token)[:=]\s*["']?[a-zA-Z0-9._-]{20,}["']?", "[REDACTED: Token]"),
|
||||
(r"(?:password|PASSWORD|Password)[:=]\s*["']?[^\s"']{8,}["']?", "[REDACTED: Password]"),
|
||||
(r"(?:secret|SECRET|Secret)[:=]\s*["']?[a-zA-Z0-9._-]{20,}["']?", "[REDACTED: Secret]"),
|
||||
(r"(?:api_key|API_KEY|apiKey|ApiKey)[:=]\s*["']?[a-zA-Z0-9._-]{20,}["']?", "[REDACTED: API key]"),
|
||||
|
||||
# AWS keys
|
||||
(r"AKIA[0-9A-Z]{16}", "[REDACTED: AWS access key]"),
|
||||
(r"(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)[:=]\s*["']?[a-zA-Z0-9/+=]{40}["']?", "[REDACTED: AWS secret]"),
|
||||
|
||||
# Private keys
|
||||
(r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----", "[REDACTED: Private key header]"),
|
||||
|
||||
# Connection strings
|
||||
(r"(?:postgres|mysql|mongodb|redis)://[^:]+:[^@]+@[^\s]+", "[REDACTED: Database connection string]"),
|
||||
]
|
||||
|
||||
# Files that should trigger auto-masking
|
||||
SENSITIVE_FILE_PATTERNS = [
|
||||
r"\.env$",
|
||||
r"\.env\.",
|
||||
r"\.secret",
|
||||
r"credentials",
|
||||
r"\.token",
|
||||
r"config\.yaml$",
|
||||
r"config\.yml$",
|
||||
r"config\.json$",
|
||||
r"\.netrc$",
|
||||
r"\.pgpass$",
|
||||
]
|
||||
|
||||
|
||||
class CredentialRedactor:
|
||||
"""Redact credentials from text."""
|
||||
|
||||
def __init__(self, audit_log: bool = True):
|
||||
self.audit_log = audit_log
|
||||
self._redaction_count = 0
|
||||
|
||||
def redact(self, text: str) -> Tuple[str, int]:
|
||||
"""
|
||||
Redact credentials from text.
|
||||
|
||||
Returns:
|
||||
Tuple of (redacted_text, number_of_redactions)
|
||||
"""
|
||||
if not text:
|
||||
return text, 0
|
||||
|
||||
redacted = text
|
||||
count = 0
|
||||
|
||||
for pattern, replacement in CREDENTIAL_PATTERNS:
|
||||
matches = re.findall(pattern, redacted, re.IGNORECASE)
|
||||
if matches:
|
||||
redacted = re.sub(pattern, replacement, redacted, flags=re.IGNORECASE)
|
||||
count += len(matches)
|
||||
|
||||
if count > 0:
|
||||
self._redaction_count += count
|
||||
if self.audit_log:
|
||||
self._log_redaction(count, text[:100])
|
||||
|
||||
return redacted, count
|
||||
|
||||
def redact_tool_output(self, tool_name: str, output: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Redact tool output and return notice if redactions occurred.
|
||||
|
||||
Returns:
|
||||
Tuple of (redacted_output, notice_or_empty)
|
||||
"""
|
||||
redacted, count = self.redact(output)
|
||||
|
||||
if count > 0:
|
||||
notice = f"[REDACTED: {count} credential pattern{'s' if count > 1 else ''} found in {tool_name} output]"
|
||||
return redacted, notice
|
||||
|
||||
return redacted, ""
|
||||
|
||||
def should_mask_file(self, file_path: str) -> bool:
|
||||
"""Check if file should have credentials auto-masked."""
|
||||
path_lower = file_path.lower()
|
||||
return any(re.search(p, path_lower) for p in SENSITIVE_FILE_PATTERNS)
|
||||
|
||||
def mask_file_content(self, content: str, file_path: str) -> str:
|
||||
"""Mask credentials in file content while preserving structure."""
|
||||
if not self.should_mask_file(file_path):
|
||||
return content
|
||||
|
||||
lines = content.split("\n")
|
||||
masked_lines = []
|
||||
|
||||
for line in lines:
|
||||
# Preserve key=value structure but mask values
|
||||
if "=" in line and not line.strip().startswith("#"):
|
||||
key, _, value = line.partition("=")
|
||||
key_lower = key.strip().lower()
|
||||
|
||||
sensitive_keys = ["password", "secret", "token", "key", "api", "credential"]
|
||||
if any(sk in key_lower for sk in sensitive_keys):
|
||||
masked_lines.append(f"{key}=[REDACTED]")
|
||||
else:
|
||||
masked_lines.append(line)
|
||||
else:
|
||||
masked_lines.append(line)
|
||||
|
||||
return "\n".join(masked_lines)
|
||||
|
||||
def _log_redaction(self, count: int, preview: str):
|
||||
"""Log redaction event to audit trail."""
|
||||
try:
|
||||
AUDIT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
audit_file = AUDIT_DIR / "redactions.jsonl"
|
||||
|
||||
entry = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"redactions": count,
|
||||
"preview_hash": hash(preview),
|
||||
}
|
||||
|
||||
with open(audit_file, "a") as f:
|
||||
f.write(json.dumps(entry) + "\n")
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Audit log failed: %s", e)
|
||||
|
||||
|
||||
# Module-level redactor
|
||||
_redactor = CredentialRedactor()
|
||||
|
||||
|
||||
def redact_credentials(text: str) -> Tuple[str, int]:
|
||||
"""Redact credentials from text."""
|
||||
return _redactor.redact(text)
|
||||
|
||||
|
||||
def redact_tool_output(tool_name: str, output: str) -> Tuple[str, str]:
|
||||
"""Redact tool output and return notice."""
|
||||
return _redactor.redact_tool_output(tool_name, output)
|
||||
|
||||
|
||||
def should_mask_file(file_path: str) -> bool:
|
||||
"""Check if file should be masked."""
|
||||
return _redactor.should_mask_file(file_path)
|
||||
|
||||
|
||||
def mask_sensitive_file(content: str, file_path: str) -> str:
|
||||
"""Mask credentials in sensitive file."""
|
||||
return _redactor.mask_file_content(content, file_path)
|
||||
113
tools/hardcoded_path_guard.py
Normal file
113
tools/hardcoded_path_guard.py
Normal file
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Hardcoded Path Guard — Poka-Yoke #921
|
||||
|
||||
Detects and blocks hardcoded home-directory paths in tool arguments.
|
||||
These paths work on one machine but break on others, VPS deployments,
|
||||
or when HOME changes.
|
||||
|
||||
Usage:
|
||||
from tools.hardcoded_path_guard import check_path, validate_tool_args
|
||||
|
||||
# Check a single path
|
||||
err = check_path("/Users/apayne/.hermes/config.yaml")
|
||||
|
||||
# Validate all path-like args in a tool call
|
||||
clean_args, warnings = validate_tool_args("read_file", {"path": "/home/user/file.txt"})
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import json as _json
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
|
||||
# Patterns that indicate hardcoded home directories
|
||||
HARDCODED_PATTERNS = [
|
||||
(r"/Users/[\w.\-]+/", "macOS home directory (/Users/...)"),
|
||||
(r"/home/[\w.\-]+/", "Linux home directory (/home/...)"),
|
||||
(r"(?<![\w/])~/", "unexpanded tilde (~/)"),
|
||||
(r"/root/", "root home directory (/root/)"),
|
||||
]
|
||||
|
||||
_COMPILED_PATTERNS = [(re.compile(p), desc) for p, desc in HARDCODED_PATTERNS]
|
||||
_NOQA_PATTERN = re.compile(r"#\s*noqa:?\s*hardcoded-path-ok")
|
||||
|
||||
_PATH_ARG_NAMES = frozenset({
|
||||
"path", "file_path", "filepath", "dir", "directory", "dest", "source",
|
||||
"input", "output", "src", "dst", "target", "location", "file",
|
||||
"image_path", "script", "config", "log_file",
|
||||
})
|
||||
|
||||
|
||||
def has_hardcoded_path(text: str) -> Optional[str]:
|
||||
if _NOQA_PATTERN.search(text):
|
||||
return None
|
||||
for pattern, desc in _COMPILED_PATTERNS:
|
||||
if pattern.search(text):
|
||||
return desc
|
||||
return None
|
||||
|
||||
|
||||
def check_path(path_value: str) -> Optional[str]:
|
||||
if not isinstance(path_value, str):
|
||||
return None
|
||||
match_desc = has_hardcoded_path(path_value)
|
||||
if match_desc:
|
||||
return (
|
||||
f"Path contains hardcoded home directory ({match_desc}): '{path_value}'. "
|
||||
f"Use $HOME, relative paths, or get_hermes_home(). "
|
||||
f"Add '# noqa: hardcoded-path-ok' if intentional."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def validate_tool_args(tool_name: str, args: Dict[str, Any]) -> Tuple[Dict[str, Any], List[str]]:
|
||||
warnings = []
|
||||
for key, value in args.items():
|
||||
if key.lower() not in _PATH_ARG_NAMES:
|
||||
continue
|
||||
if isinstance(value, str):
|
||||
err = check_path(value)
|
||||
if err:
|
||||
warnings.append(err)
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
if isinstance(item, str):
|
||||
err = check_path(item)
|
||||
if err:
|
||||
warnings.append(err)
|
||||
return args, warnings
|
||||
|
||||
|
||||
def scan_source_for_violations(source_code: str, filename: str = "") -> List[Tuple[int, str, str]]:
|
||||
violations = []
|
||||
lines = source_code.split("\n")
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("#"):
|
||||
if _NOQA_PATTERN.search(line):
|
||||
continue
|
||||
continue
|
||||
if stripped.startswith("import ") or stripped.startswith("from "):
|
||||
continue
|
||||
for pattern, desc in _COMPILED_PATTERNS:
|
||||
match = pattern.search(line)
|
||||
if match:
|
||||
if _NOQA_PATTERN.search(line):
|
||||
continue
|
||||
violations.append((i, line.strip(), desc))
|
||||
break
|
||||
return violations
|
||||
|
||||
|
||||
def guard_tool_dispatch(tool_name: str, args: Dict[str, Any]) -> Optional[str]:
|
||||
_, warnings = validate_tool_args(tool_name, args)
|
||||
if warnings:
|
||||
return _json.dumps({
|
||||
"error": "Hardcoded home directory path detected",
|
||||
"details": warnings,
|
||||
"suggestion": "Use $HOME, relative paths, or get_hermes_home() instead of hardcoded paths.",
|
||||
"pokayoke": True,
|
||||
"rule": "hardcoded-path-guard"
|
||||
})
|
||||
return None
|
||||
@@ -44,6 +44,34 @@ from typing import Dict, Any, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _format_error(
|
||||
message: str,
|
||||
skill_name: str = None,
|
||||
file_path: str = None,
|
||||
suggestion: str = None,
|
||||
context: dict = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Format an error with rich context for better debugging."""
|
||||
parts = [message]
|
||||
if skill_name:
|
||||
parts.append(f"Skill: {skill_name}")
|
||||
if file_path:
|
||||
parts.append(f"File: {file_path}")
|
||||
if suggestion:
|
||||
parts.append(f"Suggestion: {suggestion}")
|
||||
if context:
|
||||
for key, value in context.items():
|
||||
parts.append(f"{key}: {value}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": " | ".join(parts),
|
||||
"skill_name": skill_name,
|
||||
"file_path": file_path,
|
||||
"suggestion": suggestion,
|
||||
}
|
||||
|
||||
|
||||
# Import security scanner — agent-created skills get the same scrutiny as
|
||||
# community hub installs.
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user