""" Credential Redaction — Block silent credential exposure in tool outputs Poka-yoke: Prevent API keys, tokens, passwords from leaking into context. Issue: #839 """ import json import logging import re from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Tuple logger = logging.getLogger(__name__) HERMES_HOME = Path.home() / ".hermes" AUDIT_DIR = HERMES_HOME / "audit" # Credential patterns to detect and redact CREDENTIAL_PATTERNS = [ # API keys (r"sk-[a-zA-Z0-9]{20,}", "[REDACTED: OpenAI API key]"), (r"sk-ant-[a-zA-Z0-9-]{20,}", "[REDACTED: Anthropic API key]"), (r"ghp_[a-zA-Z0-9]{36}", "[REDACTED: GitHub token]"), (r"gho_[a-zA-Z0-9]{36}", "[REDACTED: GitHub OAuth token]"), (r"glpat-[a-zA-Z0-9-]{20,}", "[REDACTED: GitLab token]"), # Bearer tokens (r"Bearer\s+[a-zA-Z0-9._-]{20,}", "[REDACTED: Bearer token]"), (r"bearer\s+[a-zA-Z0-9._-]{20,}", "[REDACTED: Bearer token]"), # Generic tokens/passwords (r"(?:token|TOKEN|Token)[:=]\s*["']?[a-zA-Z0-9._-]{20,}["']?", "[REDACTED: Token]"), (r"(?:password|PASSWORD|Password)[:=]\s*["']?[^\s"']{8,}["']?", "[REDACTED: Password]"), (r"(?:secret|SECRET|Secret)[:=]\s*["']?[a-zA-Z0-9._-]{20,}["']?", "[REDACTED: Secret]"), (r"(?:api_key|API_KEY|apiKey|ApiKey)[:=]\s*["']?[a-zA-Z0-9._-]{20,}["']?", "[REDACTED: API key]"), # AWS keys (r"AKIA[0-9A-Z]{16}", "[REDACTED: AWS access key]"), (r"(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)[:=]\s*["']?[a-zA-Z0-9/+=]{40}["']?", "[REDACTED: AWS secret]"), # Private keys (r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----", "[REDACTED: Private key header]"), # Connection strings (r"(?:postgres|mysql|mongodb|redis)://[^:]+:[^@]+@[^\s]+", "[REDACTED: Database connection string]"), ] # Files that should trigger auto-masking SENSITIVE_FILE_PATTERNS = [ r"\.env$", r"\.env\.", r"\.secret", r"credentials", r"\.token", r"config\.yaml$", r"config\.yml$", r"config\.json$", r"\.netrc$", r"\.pgpass$", ] class CredentialRedactor: """Redact credentials from text.""" def __init__(self, audit_log: bool = True): self.audit_log = audit_log self._redaction_count = 0 def redact(self, text: str) -> Tuple[str, int]: """ Redact credentials from text. Returns: Tuple of (redacted_text, number_of_redactions) """ if not text: return text, 0 redacted = text count = 0 for pattern, replacement in CREDENTIAL_PATTERNS: matches = re.findall(pattern, redacted, re.IGNORECASE) if matches: redacted = re.sub(pattern, replacement, redacted, flags=re.IGNORECASE) count += len(matches) if count > 0: self._redaction_count += count if self.audit_log: self._log_redaction(count, text[:100]) return redacted, count def redact_tool_output(self, tool_name: str, output: str) -> Tuple[str, str]: """ Redact tool output and return notice if redactions occurred. Returns: Tuple of (redacted_output, notice_or_empty) """ redacted, count = self.redact(output) if count > 0: notice = f"[REDACTED: {count} credential pattern{'s' if count > 1 else ''} found in {tool_name} output]" return redacted, notice return redacted, "" def should_mask_file(self, file_path: str) -> bool: """Check if file should have credentials auto-masked.""" path_lower = file_path.lower() return any(re.search(p, path_lower) for p in SENSITIVE_FILE_PATTERNS) def mask_file_content(self, content: str, file_path: str) -> str: """Mask credentials in file content while preserving structure.""" if not self.should_mask_file(file_path): return content lines = content.split("\n") masked_lines = [] for line in lines: # Preserve key=value structure but mask values if "=" in line and not line.strip().startswith("#"): key, _, value = line.partition("=") key_lower = key.strip().lower() sensitive_keys = ["password", "secret", "token", "key", "api", "credential"] if any(sk in key_lower for sk in sensitive_keys): masked_lines.append(f"{key}=[REDACTED]") else: masked_lines.append(line) else: masked_lines.append(line) return "\n".join(masked_lines) def _log_redaction(self, count: int, preview: str): """Log redaction event to audit trail.""" try: AUDIT_DIR.mkdir(parents=True, exist_ok=True) audit_file = AUDIT_DIR / "redactions.jsonl" entry = { "timestamp": datetime.now(timezone.utc).isoformat(), "redactions": count, "preview_hash": hash(preview), } with open(audit_file, "a") as f: f.write(json.dumps(entry) + "\n") except Exception as e: logger.debug("Audit log failed: %s", e) # Module-level redactor _redactor = CredentialRedactor() def redact_credentials(text: str) -> Tuple[str, int]: """Redact credentials from text.""" return _redactor.redact(text) def redact_tool_output(tool_name: str, output: str) -> Tuple[str, str]: """Redact tool output and return notice.""" return _redactor.redact_tool_output(tool_name, output) def should_mask_file(file_path: str) -> bool: """Check if file should be masked.""" return _redactor.should_mask_file(file_path) def mask_sensitive_file(content: str, file_path: str) -> str: """Mask credentials in sensitive file.""" return _redactor.mask_file_content(content, file_path)