fix: credential redaction for tool outputs (closes #839 )

2026-04-15 22:11:13 -04:00
2 changed files with 391 additions and 0 deletions
--- a/tests/test_credential_redaction.py
+++ b/tests/test_credential_redaction.py
@@ -0,0 +1,122 @@
+"""Tests for credential redaction — Issue #839."""
+import json
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from tools.credential_redaction import (
+    redact_credentials, should_auto_mask, mask_config_values,
+    redact_tool_output, RedactionResult
+)
+
+
+class TestRedactCredentials:
+    def test_openai_key(self):
+        text = "API key: sk-abc123def456ghi789jkl012mno345pqr678stu901vwx"
+        result = redact_credentials(text)
+        assert result.was_redacted
+        assert "sk-abc" not in result.text
+        assert "[REDACTED" in result.text
+    
+    def test_github_pat(self):
+        text = "token: ghp_1234567890abcdefghijklmnopqrstuvwxyz"
+        result = redact_credentials(text)
+        assert result.was_redacted
+        assert "ghp_" not in result.text
+    
+    def test_bearer_token(self):
+        text = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
+        result = redact_credentials(text)
+        assert result.was_redacted
+        assert "Bearer eyJ" not in result.text
+    
+    def test_password_assignment(self):
+        text = 'password: "supersecret123"'
+        result = redact_credentials(text)
+        assert result.was_redacted
+    
+    def test_clean_text(self):
+        text = "Hello world, no credentials here"
+        result = redact_credentials(text)
+        assert not result.was_redacted
+        assert result.text == text
+    
+    def test_empty_text(self):
+        result = redact_credentials("")
+        assert not result.was_redacted
+
+
+class TestShouldAutoMask:
+    def test_env_file(self):
+        assert should_auto_mask(".env") == True
+    
+    def test_config_file(self):
+        assert should_auto_mask("config.yaml") == True
+    
+    def test_token_file(self):
+        assert should_auto_mask("gitea_token") == True
+    
+    def test_normal_file(self):
+        assert should_auto_mask("readme.md") == False
+
+
+class TestMaskConfigValues:
+    def test_env_api_key(self):
+        text = "API_KEY=sk-abc123def456"
+        result = mask_config_values(text)
+        assert "sk-abc" not in result
+        assert "[REDACTED]" in result
+    
+    def test_yaml_token(self):
+        text = 'token: "ghp_1234567890"'
+        result = mask_config_values(text)
+        assert "ghp_" not in result
+        assert "[REDACTED]" in result
+    
+    def test_preserves_structure(self):
+        text = "API_KEY=secret\nOTHER=value"
+        result = mask_config_values(text)
+        assert "OTHER=value" in result  # Non-credential preserved
+
+
+class TestRedactToolOutput:
+    def test_string_output(self):
+        output = "Result: sk-abc123def456ghi789jkl012mno345pqr678stu901vwx"
+        redacted, notice = redact_tool_output("file_read", output)
+        assert "sk-abc123" not in redacted
+        assert notice is not None
+    
+    def test_dict_output(self):
+        output = {"content": "token: ghp_1234567890abcdefghijklmnopqrstuvwxyz"}
+        redacted, notice = redact_tool_output("file_read", output)
+        assert "ghp_" not in redacted["content"]
+    
+    def test_clean_output(self):
+        output = "No credentials here"
+        redacted, notice = redact_tool_output("file_read", output)
+        assert redacted == output
+        assert notice is None
+
+
+class TestRedactionResult:
+    def test_notice_singular(self):
+        result = RedactionResult("redacted", "original", [{"pattern_name": "test"}])
+        assert "1 credential pattern" in result.notice()
+    
+    def test_notice_plural(self):
+        result = RedactionResult("redacted", "original", [
+            {"pattern_name": "test1"},
+            {"pattern_name": "test2"},
+        ])
+        assert "2 credential patterns" in result.notice()
+    
+    def test_to_dict(self):
+        result = RedactionResult("redacted", "original", [{"pattern_name": "test"}])
+        d = result.to_dict()
+        assert d["redacted"] == True
+        assert d["count"] == 1
+
+
+if __name__ == "__main__":
+    import pytest
+    pytest.main([__file__, "-v"])
--- a/tools/credential_redaction.py
+++ b/tools/credential_redaction.py
@@ -0,0 +1,269 @@
+"""Credential Redaction — Poka-yoke for tool outputs.
+
+Blocks silent credential exposure by redacting API keys, tokens, and
+passwords from tool outputs before they enter agent context.
+
+Issue #839: Poka-yoke: Block silent credential exposure in tool outputs
+"""
+
+import json
+import logging
+import re
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+# Audit log path
+_AUDIT_DIR = Path.home() / ".hermes" / "audit"
+_AUDIT_LOG = _AUDIT_DIR / "redactions.jsonl"
+
+# Credential patterns — order matters (most specific first)
+_CREDENTIAL_PATTERNS = [
+    # API keys
+    (r'sk-[a-zA-Z0-9]{20,}', '[REDACTED: OpenAI-style API key]'),
+    (r'sk-ant-[a-zA-Z0-9-]{20,}', '[REDACTED: Anthropic API key]'),
+    (r'ghp_[a-zA-Z0-9]{36}', '[REDACTED: GitHub PAT]'),
+    (r'gho_[a-zA-Z0-9]{36}', '[REDACTED: GitHub OAuth token]'),
+    (r'github_pat_[a-zA-Z0-9_]{82}', '[REDACTED: GitHub fine-grained PAT]'),
+    (r'glpat-[a-zA-Z0-9-]{20,}', '[REDACTED: GitLab PAT]'),
+    (r'syt_[a-zA-Z0-9_-]{40,}', '[REDACTED: Matrix access token]'),
+    (r'xoxb-[0-9]{10,}-[a-zA-Z0-9]{20,}', '[REDACTED: Slack bot token]'),
+    (r'xoxp-[0-9]{10,}-[a-zA-Z0-9]{20,}', '[REDACTED: Slack user token]'),
+    
+    # Bearer tokens
+    (r'Bearer\s+[a-zA-Z0-9_.-]{20,}', '[REDACTED: Bearer token]'),
+    
+    # Generic tokens/passwords in assignments
+    (r'(?:token|api_key|api_key|secret|password|passwd|pwd)\s*[:=]\s*["\']?([a-zA-Z0-9_.-]{8,})["\']?', '[REDACTED: credential]'),
+    
+    # Environment variable assignments
+    (r'(?:export\s+)?(?:TOKEN|KEY|SECRET|PASSWORD|API_KEY)\s*=\s*["\']?([a-zA-Z0-9_.-]{8,})["\']?', '[REDACTED: env credential]'),
+    
+    # Base64 encoded credentials (high entropy strings)
+    (r'(?:authorization|auth)\s*[:=]\s*(?:basic|bearer)\s+[a-zA-Z0-9+/=]{20,}', '[REDACTED: auth header]'),
+    
+    # AWS credentials
+    (r'AKIA[0-9A-Z]{16}', '[REDACTED: AWS access key]'),
+    (r'(?<![A-Z0-9])[A-Za-z0-9/+=]{40}(?![A-Z0-9])', None),  # Only match near context
+    
+    # Private keys
+    (r'-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----', '[REDACTED: private key block]'),
+]
+
+
+class RedactionResult:
+    """Result of credential redaction."""
+    
+    def __init__(self, text: str, original: str, redactions: List[Dict[str, Any]]):
+        self.text = text
+        self.original = original
+        self.redactions = redactions
+    
+    @property
+    def was_redacted(self) -> bool:
+        return len(self.redactions) > 0
+    
+    @property
+    def count(self) -> int:
+        return len(self.redactions)
+    
+    def notice(self) -> str:
+        """Generate compact redaction notice."""
+        if not self.was_redacted:
+            return ""
+        return f"[REDACTED: {self.count} credential pattern{'s' if self.count > 1 else ''} found]"
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "redacted": self.was_redacted,
+            "count": self.count,
+            "notice": self.notice(),
+            "patterns": [r["pattern_name"] for r in self.redactions],
+        }
+
+
+def redact_credentials(text: str, source: str = "unknown") -> RedactionResult:
+    """Redact credentials from text.
+    
+    Args:
+        text: Text to redact
+        source: Source identifier for audit logging
+        
+    Returns:
+        RedactionResult with redacted text and metadata
+    """
+    if not text:
+        return RedactionResult(text, text, [])
+    
+    redactions = []
+    result = text
+    
+    for pattern, replacement in _CREDENTIAL_PATTERNS:
+        if replacement is None:
+            continue  # Skip conditional patterns
+        
+        matches = list(re.finditer(pattern, result, re.IGNORECASE))
+        for match in matches:
+            redactions.append({
+                "pattern_name": replacement,
+                "position": match.start(),
+                "length": len(match.group()),
+                "source": source,
+                "timestamp": time.time(),
+            })
+        
+        result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)
+    
+    redaction_result = RedactionResult(result, text, redactions)
+    
+    # Log to audit trail
+    if redaction_result.was_redacted:
+        _log_redaction(redaction_result, source)
+    
+    return redaction_result
+
+
+def _log_redaction(result: RedactionResult, source: str) -> None:
+    """Log redaction event to audit trail."""
+    try:
+        _AUDIT_DIR.mkdir(parents=True, exist_ok=True)
+        entry = {
+            "timestamp": time.time(),
+            "source": source,
+            "count": result.count,
+            "patterns": [r["pattern_name"] for r in result.redactions],
+        }
+        with open(_AUDIT_LOG, "a") as f:
+            f.write(json.dumps(entry) + "\n")
+    except Exception as e:
+        logger.debug(f"Failed to log redaction: {e}")
+
+
+def should_auto_mask(file_path: str) -> bool:
+    """Check if file should have credentials auto-masked."""
+    path_lower = file_path.lower()
+    sensitive_patterns = [
+        ".env", "config", "token", "secret", "credential",
+        "key", "auth", "password", ".pem", ".key",
+    ]
+    return any(p in path_lower for p in sensitive_patterns)
+
+
+def mask_config_values(text: str) -> str:
+    """Mask credential values in config/env files while preserving structure.
+    
+    Transforms:
+        API_KEY=sk-abc123  →  API_KEY=[REDACTED]
+        token: "ghp_xyz"   →  token: "[REDACTED]"
+    """
+    lines = text.split("\n")
+    result = []
+    
+    for line in lines:
+        # Match KEY=VALUE patterns
+        match = re.match(r'^(\s*(?:export\s+)?[A-Z_][A-Z0-9_]*)\s*=\s*(.*)', line)
+        if match:
+            key = match.group(1)
+            value = match.group(2).strip()
+            
+            # Check if key looks credential-like
+            key_lower = key.lower()
+            if any(p in key_lower for p in ["key", "token", "secret", "password", "auth"]):
+                if value and not value.startswith("[REDACTED]"):
+                    # Preserve quotes
+                    if value.startswith('"') and value.endswith('"'):
+                        result.append(f'{key}="[REDACTED]"')
+                    elif value.startswith("'") and value.endswith("'"):
+                        result.append(f"{key}='[REDACTED]'")
+                    else:
+                        result.append(f"{key}=[REDACTED]")
+                    continue
+        
+        # Match YAML-style key: value
+        match = re.match(r'^(\s*[a-z_][a-z0-9_]*)\s*:\s*["\']?(.*?)["\']?\s*$', line)
+        if match:
+            key = match.group(1)
+            value = match.group(2).strip()
+            
+            key_lower = key.lower()
+            if any(p in key_lower for p in ["key", "token", "secret", "password", "auth"]):
+                if value and not value.startswith("[REDACTED]"):
+                    result.append(f'{key}: "[REDACTED]"')
+                    continue
+        
+        result.append(line)
+    
+    return "\n".join(result)
+
+
+def redact_tool_output(
+    tool_name: str,
+    output: Any,
+    source: str = None,
+) -> Tuple[Any, Optional[str]]:
+    """Redact credentials from tool output.
+    
+    Args:
+        tool_name: Name of the tool
+        output: Tool output (string or dict)
+        source: Source identifier (defaults to tool_name)
+        
+    Returns:
+        Tuple of (redacted_output, notice)
+    """
+    source = source or tool_name
+    
+    if isinstance(output, str):
+        result = redact_credentials(output, source)
+        if result.was_redacted:
+            return result.text, result.notice()
+        return output, None
+    
+    if isinstance(output, dict):
+        # Redact string values in dict
+        redacted = {}
+        notices = []
+        for key, value in output.items():
+            if isinstance(value, str):
+                r, n = redact_tool_output(tool_name, value, f"{source}.{key}")
+                redacted[key] = r
+                if n:
+                    notices.append(n)
+            else:
+                redacted[key] = value
+        
+        notice = "; ".join(notices) if notices else None
+        return redacted, notice
+    
+    # Non-string, non-dict: pass through
+    return output, None
+
+
+def get_redaction_stats() -> Dict[str, Any]:
+    """Get redaction statistics from audit log."""
+    stats = {
+        "total_redactions": 0,
+        "by_source": {},
+        "by_pattern": {},
+    }
+    
+    if not _AUDIT_LOG.exists():
+        return stats
+    
+    try:
+        with open(_AUDIT_LOG, "r") as f:
+            for line in f:
+                entry = json.loads(line.strip())
+                stats["total_redactions"] += entry.get("count", 0)
+                
+                source = entry.get("source", "unknown")
+                stats["by_source"][source] = stats["by_source"].get(source, 0) + 1
+                
+                for pattern in entry.get("patterns", []):
+                    stats["by_pattern"][pattern] = stats["by_pattern"].get(pattern, 0) + 1
+    except Exception:
+        pass
+    
+    return stats