Files
hermes-agent/tools/credential_redact.py
Alexander Whitestone 3a7e0e7db4
All checks were successful
Lint / lint (pull_request) Successful in 23s
fix: migrate hardcoded ~/.hermes paths to HERMES_HOME resolution (#835)
- tools/session_templates.py: use get_hermes_home() for template dir and state.db
- tools/credential_redact.py: use get_hermes_home() for HERMES_HOME base
- agent/context_budget.py: use get_hermes_home() for checkpoints dir
- tools/crisis_tool.py: use HERMES_HOME env var with fallback for crisis log path
- tools/hardcoded_path_guard.py: add noqa to example docstring lines
- scripts/lint_hardcoded_paths.py: exclude lines already referencing HERMES_HOME

Also fixes a pre-existing SyntaxError in credential_redact.py caused by
raw strings with escaped quotes inside double-quoted literals.
2026-04-22 02:45:05 -04:00

186 lines
6.0 KiB
Python

"""
Credential Redaction — Block silent credential exposure in tool outputs
Poka-yoke: Prevent API keys, tokens, passwords from leaking into context.
Issue: #839
"""
import json
import logging
import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Tuple
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
HERMES_HOME = get_hermes_home()
AUDIT_DIR = HERMES_HOME / "audit"
# Credential patterns to detect and redact
CREDENTIAL_PATTERNS = [
# API keys
(r"sk-[a-zA-Z0-9]{20,}", "[REDACTED: OpenAI API key]"),
(r"sk-ant-[a-zA-Z0-9-]{20,}", "[REDACTED: Anthropic API key]"),
(r"ghp_[a-zA-Z0-9]{36}", "[REDACTED: GitHub token]"),
(r"gho_[a-zA-Z0-9]{36}", "[REDACTED: GitHub OAuth token]"),
(r"glpat-[a-zA-Z0-9-]{20,}", "[REDACTED: GitLab token]"),
# Bearer tokens
(r"Bearer\s+[a-zA-Z0-9._-]{20,}", "[REDACTED: Bearer token]"),
(r"bearer\s+[a-zA-Z0-9._-]{20,}", "[REDACTED: Bearer token]"),
# Generic tokens/passwords
("(?:token|TOKEN|Token)[:=]\\s*['\"]?[a-zA-Z0-9._-]{20,}['\"]?", "[REDACTED: Token]"),
("(?:password|PASSWORD|Password)[:=]\\s*['\"]?[^\\s\"']{8,}['\"]?", "[REDACTED: Password]"),
("(?:secret|SECRET|Secret)[:=]\\s*['\"]?[a-zA-Z0-9._-]{20,}['\"]?", "[REDACTED: Secret]"),
("(?:api_key|API_KEY|apiKey|ApiKey)[:=]\\s*['\"]?[a-zA-Z0-9._-]{20,}['\"]?", "[REDACTED: API key]"),
# AWS keys
(r"AKIA[0-9A-Z]{16}", "[REDACTED: AWS access key]"),
("(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)[:=]\\s*['\"]?[a-zA-Z0-9/+=]{40}['\"]?", "[REDACTED: AWS secret]"),
# Private keys
(r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----", "[REDACTED: Private key header]"),
# Connection strings
(r"(?:postgres|mysql|mongodb|redis)://[^:]+:[^@]+@[^\s]+", "[REDACTED: Database connection string]"),
]
# Files that should trigger auto-masking
SENSITIVE_FILE_PATTERNS = [
r"\.env$",
r"\.env\.",
r"\.secret",
r"credentials",
r"\.token",
r"config\.yaml$",
r"config\.yml$",
r"config\.json$",
r"\.netrc$",
r"\.pgpass$",
]
class CredentialRedactor:
"""Redact credentials from text."""
def __init__(self, audit_log: bool = True):
self.audit_log = audit_log
self._redaction_count = 0
def redact(self, text: str) -> Tuple[str, int]:
"""
Redact credentials from text.
Returns:
Tuple of (redacted_text, number_of_redactions)
"""
if not text:
return text, 0
redacted = text
count = 0
for pattern, replacement in CREDENTIAL_PATTERNS:
matches = re.findall(pattern, redacted, re.IGNORECASE)
if matches:
redacted = re.sub(pattern, replacement, redacted, flags=re.IGNORECASE)
count += len(matches)
if count > 0:
self._redaction_count += count
if self.audit_log:
self._log_redaction(count, text[:100])
return redacted, count
def redact_tool_output(self, tool_name: str, output: str) -> Tuple[str, str]:
"""
Redact tool output and return notice if redactions occurred.
Returns:
Tuple of (redacted_output, notice_or_empty)
"""
redacted, count = self.redact(output)
if count > 0:
notice = f"[REDACTED: {count} credential pattern{'s' if count > 1 else ''} found in {tool_name} output]"
return redacted, notice
return redacted, ""
def should_mask_file(self, file_path: str) -> bool:
"""Check if file should have credentials auto-masked."""
path_lower = file_path.lower()
return any(re.search(p, path_lower) for p in SENSITIVE_FILE_PATTERNS)
def mask_file_content(self, content: str, file_path: str) -> str:
"""Mask credentials in file content while preserving structure."""
if not self.should_mask_file(file_path):
return content
lines = content.split("\n")
masked_lines = []
for line in lines:
# Preserve key=value structure but mask values
if "=" in line and not line.strip().startswith("#"):
key, _, value = line.partition("=")
key_lower = key.strip().lower()
sensitive_keys = ["password", "secret", "token", "key", "api", "credential"]
if any(sk in key_lower for sk in sensitive_keys):
masked_lines.append(f"{key}=[REDACTED]")
else:
masked_lines.append(line)
else:
masked_lines.append(line)
return "\n".join(masked_lines)
def _log_redaction(self, count: int, preview: str):
"""Log redaction event to audit trail."""
try:
AUDIT_DIR.mkdir(parents=True, exist_ok=True)
audit_file = AUDIT_DIR / "redactions.jsonl"
entry = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"redactions": count,
"preview_hash": hash(preview),
}
with open(audit_file, "a") as f:
f.write(json.dumps(entry) + "\n")
except Exception as e:
logger.debug("Audit log failed: %s", e)
# Module-level redactor
_redactor = CredentialRedactor()
def redact_credentials(text: str) -> Tuple[str, int]:
"""Redact credentials from text."""
return _redactor.redact(text)
def redact_tool_output(tool_name: str, output: str) -> Tuple[str, str]:
"""Redact tool output and return notice."""
return _redactor.redact_tool_output(tool_name, output)
def should_mask_file(file_path: str) -> bool:
"""Check if file should be masked."""
return _redactor.should_mask_file(file_path)
def mask_sensitive_file(content: str, file_path: str) -> str:
"""Mask credentials in sensitive file."""
return _redactor.mask_file_content(content, file_path)