Files
hermes-agent/agent/mtls.py
Alexander Whitestone 4214082fb6
All checks were successful
Lint / lint (pull_request) Successful in 8s
feat: A2A auth — mutual TLS between fleet agents
Implements mTLS for securing agent-to-agent communication in the Hermes
fleet. Fixes #806.

Changes:
- scripts/gen_fleet_ca.sh: generate a self-signed Fleet CA (4096-bit RSA,
  10-year validity) that signs all agent certificates
- scripts/gen_agent_cert.sh: generate per-agent certs (Timmy, Allegro,
  Ezra) signed by the fleet CA with SAN entries and clientAuth/serverAuth
  extended key usage
- agent/mtls.py: new module providing:
  - build_server_ssl_context() — TLS_SERVER context with CERT_REQUIRED,
    enforces client cert against Fleet CA
  - build_client_ssl_context() — TLS_CLIENT context for outbound A2A calls
  - MTLSMiddleware — ASGI middleware that rejects unauthenticated requests
    to A2A routes (/.well-known/agent-card*, /api/agent-card, /a2a/) with
    HTTP 403 when mTLS is enabled
  - is_mtls_configured() — checks HERMES_MTLS_CERT/KEY/CA env vars
- hermes_cli/web_server.py: wire MTLSMiddleware into the FastAPI app;
  pass SSL context to uvicorn when HERMES_MTLS_* env vars are set so
  the server runs TLS with mandatory client cert verification
- ansible/roles/hermes_mtls/: Ansible role to distribute Fleet CA cert,
  agent cert, and agent key to fleet nodes; writes an env file with
  HERMES_MTLS_* vars and restarts the hermes-gateway service
- ansible/fleet_mtls.yml: fleet-wide playbook referencing the role for
  Timmy, Allegro, and Ezra nodes
- tests/test_mtls.py: 15 tests covering is_mtls_configured, SSL context
  creation with real cryptography-generated certs, and MTLSMiddleware
  (unauthorized agent rejected → 403, authorized agent accepted → 200)

mTLS is opt-in: set HERMES_MTLS_CERT, HERMES_MTLS_KEY, and HERMES_MTLS_CA
to enable. When unset, the server behaves exactly as before.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 18:04:00 -04:00

185 lines
6.4 KiB
Python

"""
agent/mtls.py — Mutual TLS support for Hermes A2A communication.
Provides:
- build_server_ssl_context() — SSL context for uvicorn that requires client certs
- build_client_ssl_context() — SSL context for httpx/aiohttp A2A clients
- MTLSMiddleware — FastAPI middleware that enforces client cert on A2A routes
- is_mtls_configured() — Check if env vars are set
Configuration (environment variables):
HERMES_MTLS_CERT Path to this agent's TLS certificate (PEM)
HERMES_MTLS_KEY Path to this agent's TLS private key (PEM)
HERMES_MTLS_CA Path to the Fleet CA certificate (PEM) — used to verify peers
All three must be set to enable mTLS. If any is missing, mTLS is disabled and
the server falls back to plain HTTP (or regular TLS without client auth).
"""
import logging
import os
import ssl
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
# A2A routes that require a valid client certificate when mTLS is enabled.
_A2A_PATH_PREFIXES = (
"/.well-known/agent-card",
"/agent-card",
"/api/agent-card",
"/a2a/",
)
def _get_env(key: str) -> Optional[str]:
val = os.environ.get(key, "").strip()
return val or None
def is_mtls_configured() -> bool:
"""Return True if all three mTLS env vars are set and the files exist."""
cert = _get_env("HERMES_MTLS_CERT")
key = _get_env("HERMES_MTLS_KEY")
ca = _get_env("HERMES_MTLS_CA")
if not (cert and key and ca):
return False
for label, path in (("HERMES_MTLS_CERT", cert), ("HERMES_MTLS_KEY", key), ("HERMES_MTLS_CA", ca)):
if not Path(path).is_file():
logger.warning("mTLS disabled: %s file not found: %s", label, path)
return False
return True
def build_server_ssl_context() -> ssl.SSLContext:
"""
Build an SSL context for the A2A server that:
- presents its own certificate
- requires and verifies the client's certificate against the Fleet CA
Raises:
RuntimeError: if mTLS env vars are not set or files are missing
ssl.SSLError: if cert/key/CA files are invalid
"""
cert = _get_env("HERMES_MTLS_CERT")
key = _get_env("HERMES_MTLS_KEY")
ca = _get_env("HERMES_MTLS_CA")
if not (cert and key and ca):
raise RuntimeError(
"mTLS not configured. Set HERMES_MTLS_CERT, HERMES_MTLS_KEY, and HERMES_MTLS_CA."
)
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
ctx.minimum_version = ssl.TLSVersion.TLSv1_2
ctx.load_cert_chain(certfile=cert, keyfile=key)
ctx.load_verify_locations(cafile=ca)
# CERT_REQUIRED: reject connections without a valid client cert
ctx.verify_mode = ssl.CERT_REQUIRED
logger.info("mTLS server context built (cert=%s, CA=%s)", cert, ca)
return ctx
def build_client_ssl_context() -> ssl.SSLContext:
"""
Build an SSL context for outbound A2A connections that:
- presents this agent's certificate as a client cert
- verifies the remote server against the Fleet CA
Raises:
RuntimeError: if mTLS env vars are not set or files are missing
ssl.SSLError: if cert/key/CA files are invalid
"""
cert = _get_env("HERMES_MTLS_CERT")
key = _get_env("HERMES_MTLS_KEY")
ca = _get_env("HERMES_MTLS_CA")
if not (cert and key and ca):
raise RuntimeError(
"mTLS not configured. Set HERMES_MTLS_CERT, HERMES_MTLS_KEY, and HERMES_MTLS_CA."
)
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ctx.minimum_version = ssl.TLSVersion.TLSv1_2
ctx.load_cert_chain(certfile=cert, keyfile=key)
ctx.load_verify_locations(cafile=ca)
ctx.verify_mode = ssl.CERT_REQUIRED
ctx.check_hostname = True
logger.info("mTLS client context built (cert=%s, CA=%s)", cert, ca)
return ctx
def get_peer_cn(ssl_object) -> Optional[str]:
"""Extract the CN from the peer certificate's subject, or None."""
try:
peer_cert = ssl_object.getpeercert()
if not peer_cert:
return None
for rdn in peer_cert.get("subject", ()):
for attr, value in rdn:
if attr == "commonName":
return value
except Exception:
pass
return None
class MTLSMiddleware:
"""
ASGI middleware that enforces client certificate verification on A2A routes.
When mTLS is NOT configured (no env vars) or the route is not an A2A route,
the request passes through unchanged.
When mTLS IS configured and the route matches an A2A prefix, the middleware
checks that the request arrived over a TLS connection with a verified client
certificate. If not, it returns HTTP 403.
Note: This middleware only provides defence-in-depth at the app layer.
The primary enforcement is at the SSL context level (CERT_REQUIRED on the
server context). This middleware is useful when the server runs behind a
TLS-terminating proxy that forwards cert info via headers (not yet
implemented) or for test-time injection.
"""
def __init__(self, app):
self.app = app
self._enabled = is_mtls_configured()
if self._enabled:
logger.info("MTLSMiddleware enabled — A2A routes require client cert")
def _is_a2a_route(self, path: str) -> bool:
return any(path.startswith(prefix) for prefix in _A2A_PATH_PREFIXES)
async def __call__(self, scope, receive, send):
if scope["type"] == "http" and self._enabled and self._is_a2a_route(scope.get("path", "")):
# Check for client cert in the SSL connection
transport = scope.get("extensions", {}).get("tls", {})
peer_cert = transport.get("peer_cert")
if peer_cert is None:
# No client cert — reject
response = _forbidden_response("Client certificate required for A2A endpoints")
await response(scope, receive, send)
return
await self.app(scope, receive, send)
def _forbidden_response(message: str):
"""Return a minimal ASGI 403 response."""
body = message.encode()
async def respond(scope, receive, send):
await send({
"type": "http.response.start",
"status": 403,
"headers": [
(b"content-type", b"text/plain"),
(b"content-length", str(len(body)).encode()),
],
})
await send({"type": "http.response.body", "body": body})
return respond