2026-02-28 21:56:05 -08:00
|
|
|
"""Regex-based secret redaction for logs and tool output.
|
|
|
|
|
|
|
|
|
|
Applies pattern matching to mask API keys, tokens, and credentials
|
|
|
|
|
before they reach log files, verbose output, or gateway logs.
|
|
|
|
|
|
|
|
|
|
Short tokens (< 18 chars) are fully masked. Longer tokens preserve
|
|
|
|
|
the first 6 and last 4 characters for debuggability.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import logging
|
2026-03-09 01:04:33 -07:00
|
|
|
import os
|
2026-02-28 21:56:05 -08:00
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
# Known API key prefixes -- match the prefix + contiguous token chars
|
|
|
|
|
_PREFIX_PATTERNS = [
|
2026-03-09 01:28:27 -07:00
|
|
|
r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter / Anthropic (sk-ant-*)
|
2026-02-28 21:56:05 -08:00
|
|
|
r"ghp_[A-Za-z0-9]{10,}", # GitHub PAT (classic)
|
|
|
|
|
r"github_pat_[A-Za-z0-9_]{10,}", # GitHub PAT (fine-grained)
|
|
|
|
|
r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack tokens
|
|
|
|
|
r"AIza[A-Za-z0-9_-]{30,}", # Google API keys
|
|
|
|
|
r"pplx-[A-Za-z0-9]{10,}", # Perplexity
|
|
|
|
|
r"fal_[A-Za-z0-9_-]{10,}", # Fal.ai
|
|
|
|
|
r"fc-[A-Za-z0-9]{10,}", # Firecrawl
|
|
|
|
|
r"bb_live_[A-Za-z0-9_-]{10,}", # BrowserBase
|
|
|
|
|
r"gAAAA[A-Za-z0-9_=-]{20,}", # Codex encrypted tokens
|
2026-03-09 01:28:27 -07:00
|
|
|
r"AKIA[A-Z0-9]{16}", # AWS Access Key ID
|
|
|
|
|
r"sk_live_[A-Za-z0-9]{10,}", # Stripe secret key (live)
|
|
|
|
|
r"sk_test_[A-Za-z0-9]{10,}", # Stripe secret key (test)
|
|
|
|
|
r"rk_live_[A-Za-z0-9]{10,}", # Stripe restricted key
|
|
|
|
|
r"SG\.[A-Za-z0-9_-]{10,}", # SendGrid API key
|
|
|
|
|
r"hf_[A-Za-z0-9]{10,}", # HuggingFace token
|
|
|
|
|
r"r8_[A-Za-z0-9]{10,}", # Replicate API token
|
|
|
|
|
r"npm_[A-Za-z0-9]{10,}", # npm access token
|
|
|
|
|
r"pypi-[A-Za-z0-9_-]{10,}", # PyPI API token
|
|
|
|
|
r"dop_v1_[A-Za-z0-9]{10,}", # DigitalOcean PAT
|
|
|
|
|
r"doo_v1_[A-Za-z0-9]{10,}", # DigitalOcean OAuth
|
|
|
|
|
r"am_[A-Za-z0-9_-]{10,}", # AgentMail API key
|
2026-02-28 21:56:05 -08:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# ENV assignment patterns: KEY=value where KEY contains a secret-like name
|
|
|
|
|
_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
|
|
|
|
|
_ENV_ASSIGN_RE = re.compile(
|
|
|
|
|
rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2",
|
|
|
|
|
re.IGNORECASE,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# JSON field patterns: "apiKey": "value", "token": "value", etc.
|
|
|
|
|
_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer)"
|
|
|
|
|
_JSON_FIELD_RE = re.compile(
|
|
|
|
|
rf'("{_JSON_KEY_NAMES}")\s*:\s*"([^"]+)"',
|
|
|
|
|
re.IGNORECASE,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Authorization headers
|
|
|
|
|
_AUTH_HEADER_RE = re.compile(
|
|
|
|
|
r"(Authorization:\s*Bearer\s+)(\S+)",
|
|
|
|
|
re.IGNORECASE,
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-08 22:33:06 +02:00
|
|
|
# Telegram bot tokens: bot<digits>:<token> or <digits>:<token>,
|
|
|
|
|
# where token part is restricted to [-A-Za-z0-9_] and length >= 30
|
2026-02-28 21:56:05 -08:00
|
|
|
_TELEGRAM_RE = re.compile(
|
|
|
|
|
r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})",
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-09 01:28:27 -07:00
|
|
|
# Private key blocks: -----BEGIN RSA PRIVATE KEY----- ... -----END RSA PRIVATE KEY-----
|
|
|
|
|
_PRIVATE_KEY_RE = re.compile(
|
|
|
|
|
r"-----BEGIN[A-Z ]*PRIVATE KEY-----[\s\S]*?-----END[A-Z ]*PRIVATE KEY-----"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Database connection strings: protocol://user:PASSWORD@host
|
|
|
|
|
# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password
|
|
|
|
|
_DB_CONNSTR_RE = re.compile(
|
|
|
|
|
r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)",
|
|
|
|
|
re.IGNORECASE,
|
|
|
|
|
)
|
|
|
|
|
|
feat: add Signal messenger gateway platform (#405)
Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.
Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py
Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support
Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance
Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)
No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).
Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.
Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>
2026-03-08 20:20:35 -07:00
|
|
|
# E.164 phone numbers: +<country><number>, 7-15 digits
|
|
|
|
|
# Negative lookahead prevents matching hex strings or identifiers
|
|
|
|
|
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
|
|
|
|
|
|
2026-02-28 21:56:05 -08:00
|
|
|
# Compile known prefix patterns into one alternation
|
|
|
|
|
_PREFIX_RE = re.compile(
|
|
|
|
|
r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _mask_token(token: str) -> str:
|
|
|
|
|
"""Mask a token, preserving prefix for long tokens."""
|
|
|
|
|
if len(token) < 18:
|
|
|
|
|
return "***"
|
|
|
|
|
return f"{token[:6]}...{token[-4:]}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def redact_sensitive_text(text: str) -> str:
|
|
|
|
|
"""Apply all redaction patterns to a block of text.
|
|
|
|
|
|
|
|
|
|
Safe to call on any string -- non-matching text passes through unchanged.
|
2026-03-09 01:04:33 -07:00
|
|
|
Disabled when security.redact_secrets is false in config.yaml.
|
2026-02-28 21:56:05 -08:00
|
|
|
"""
|
|
|
|
|
if not text:
|
|
|
|
|
return text
|
2026-03-09 01:04:33 -07:00
|
|
|
if os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("0", "false", "no", "off"):
|
|
|
|
|
return text
|
2026-02-28 21:56:05 -08:00
|
|
|
|
|
|
|
|
# Known prefixes (sk-, ghp_, etc.)
|
|
|
|
|
text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text)
|
|
|
|
|
|
|
|
|
|
# ENV assignments: OPENAI_API_KEY=sk-abc...
|
|
|
|
|
def _redact_env(m):
|
|
|
|
|
name, quote, value = m.group(1), m.group(2), m.group(3)
|
|
|
|
|
return f"{name}={quote}{_mask_token(value)}{quote}"
|
|
|
|
|
text = _ENV_ASSIGN_RE.sub(_redact_env, text)
|
|
|
|
|
|
|
|
|
|
# JSON fields: "apiKey": "value"
|
|
|
|
|
def _redact_json(m):
|
|
|
|
|
key, value = m.group(1), m.group(2)
|
|
|
|
|
return f'{key}: "{_mask_token(value)}"'
|
|
|
|
|
text = _JSON_FIELD_RE.sub(_redact_json, text)
|
|
|
|
|
|
|
|
|
|
# Authorization headers
|
|
|
|
|
text = _AUTH_HEADER_RE.sub(
|
|
|
|
|
lambda m: m.group(1) + _mask_token(m.group(2)),
|
|
|
|
|
text,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Telegram bot tokens
|
|
|
|
|
def _redact_telegram(m):
|
|
|
|
|
prefix = m.group(1) or ""
|
|
|
|
|
digits = m.group(2)
|
|
|
|
|
return f"{prefix}{digits}:***"
|
|
|
|
|
text = _TELEGRAM_RE.sub(_redact_telegram, text)
|
|
|
|
|
|
2026-03-09 01:28:27 -07:00
|
|
|
# Private key blocks
|
|
|
|
|
text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text)
|
|
|
|
|
|
|
|
|
|
# Database connection string passwords
|
|
|
|
|
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
|
|
|
|
|
|
feat: add Signal messenger gateway platform (#405)
Complete Signal adapter using signal-cli daemon HTTP API.
Based on PR #268 by ibhagwan, rebuilt on current main with bug fixes.
Architecture:
- SSE streaming for inbound messages with exponential backoff (2s→60s)
- JSON-RPC 2.0 for outbound (send, typing, attachments, contacts)
- Health monitor detects stale SSE connections (120s threshold)
- Phone number redaction in all logs and global redact.py
Features:
- DM and group message support with separate access policies
- DM policies: pairing (default), allowlist, open
- Group policies: disabled (default), allowlist, open
- Attachment download with magic-byte type detection
- Typing indicators (8s refresh interval)
- 100MB attachment size limit, 8000 char message limit
- E.164 phone + UUID allowlist support
Integration:
- Platform.SIGNAL enum in gateway/config.py
- Signal in _is_user_authorized() allowlist maps (gateway/run.py)
- Adapter factory in _create_adapter() (gateway/run.py)
- user_id_alt/chat_id_alt fields in SessionSource for UUIDs
- send_message tool support via httpx JSON-RPC (not aiohttp)
- Interactive setup wizard in 'hermes gateway setup'
- Connectivity testing during setup (pings /api/v1/check)
- signal-cli detection and install guidance
Bug fixes from PR #268:
- Timestamp reads from envelope_data (not outer wrapper)
- Uses httpx consistently (not aiohttp in send_message tool)
- SIGNAL_DEBUG scoped to signal logger (not root)
- extract_images regex NOT modified (preserves group numbering)
- pairing.py NOT modified (no cross-platform side effects)
- No dual authorization (adapter defers to run.py for user auth)
- Wildcard uses set membership ('*' in set, not list equality)
- .zip default for PK magic bytes (not .docx)
No new Python dependencies — uses httpx (already core).
External requirement: signal-cli daemon (user-installed).
Tests: 30 new tests covering config, init, helpers, session source,
phone redaction, authorization, and send_message integration.
Co-authored-by: ibhagwan <ibhagwan@users.noreply.github.com>
2026-03-08 20:20:35 -07:00
|
|
|
# E.164 phone numbers (Signal, WhatsApp)
|
|
|
|
|
def _redact_phone(m):
|
|
|
|
|
phone = m.group(1)
|
|
|
|
|
if len(phone) <= 8:
|
|
|
|
|
return phone[:2] + "****" + phone[-2:]
|
|
|
|
|
return phone[:4] + "****" + phone[-4:]
|
|
|
|
|
text = _SIGNAL_PHONE_RE.sub(_redact_phone, text)
|
|
|
|
|
|
2026-02-28 21:56:05 -08:00
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RedactingFormatter(logging.Formatter):
|
|
|
|
|
"""Log formatter that redacts secrets from all log messages."""
|
|
|
|
|
|
|
|
|
|
def __init__(self, fmt=None, datefmt=None, style='%', **kwargs):
|
|
|
|
|
super().__init__(fmt, datefmt, style, **kwargs)
|
|
|
|
|
|
|
|
|
|
def format(self, record: logging.LogRecord) -> str:
|
|
|
|
|
original = super().format(record)
|
|
|
|
|
return redact_sensitive_text(original)
|