97 lines
3.6 KiB
Python
97 lines
3.6 KiB
Python
|
|
"""URL safety checks — blocks requests to private/internal network addresses.
|
||
|
|
|
||
|
|
Prevents SSRF (Server-Side Request Forgery) where a malicious prompt or
|
||
|
|
skill could trick the agent into fetching internal resources like cloud
|
||
|
|
metadata endpoints (169.254.169.254), localhost services, or private
|
||
|
|
network hosts.
|
||
|
|
|
||
|
|
Limitations (documented, not fixable at pre-flight level):
|
||
|
|
- DNS rebinding (TOCTOU): an attacker-controlled DNS server with TTL=0
|
||
|
|
can return a public IP for the check, then a private IP for the actual
|
||
|
|
connection. Fixing this requires connection-level validation (e.g.
|
||
|
|
Python's Champion library or an egress proxy like Stripe's Smokescreen).
|
||
|
|
- Redirect-based bypass in vision_tools is mitigated by an httpx event
|
||
|
|
hook that re-validates each redirect target. Web tools use third-party
|
||
|
|
SDKs (Firecrawl/Tavily) where redirect handling is on their servers.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import ipaddress
|
||
|
|
import logging
|
||
|
|
import socket
|
||
|
|
from urllib.parse import urlparse
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
# Hostnames that should always be blocked regardless of IP resolution
|
||
|
|
_BLOCKED_HOSTNAMES = frozenset({
|
||
|
|
"metadata.google.internal",
|
||
|
|
"metadata.goog",
|
||
|
|
})
|
||
|
|
|
||
|
|
# 100.64.0.0/10 (CGNAT / Shared Address Space, RFC 6598) is NOT covered by
|
||
|
|
# ipaddress.is_private — it returns False for both is_private and is_global.
|
||
|
|
# Must be blocked explicitly. Used by carrier-grade NAT, Tailscale/WireGuard
|
||
|
|
# VPNs, and some cloud internal networks.
|
||
|
|
_CGNAT_NETWORK = ipaddress.ip_network("100.64.0.0/10")
|
||
|
|
|
||
|
|
|
||
|
|
def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
|
||
|
|
"""Return True if the IP should be blocked for SSRF protection."""
|
||
|
|
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
||
|
|
return True
|
||
|
|
if ip.is_multicast or ip.is_unspecified:
|
||
|
|
return True
|
||
|
|
# CGNAT range not covered by is_private
|
||
|
|
if ip in _CGNAT_NETWORK:
|
||
|
|
return True
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
def is_safe_url(url: str) -> bool:
|
||
|
|
"""Return True if the URL target is not a private/internal address.
|
||
|
|
|
||
|
|
Resolves the hostname to an IP and checks against private ranges.
|
||
|
|
Fails closed: DNS errors and unexpected exceptions block the request.
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
parsed = urlparse(url)
|
||
|
|
hostname = (parsed.hostname or "").strip().lower()
|
||
|
|
if not hostname:
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Block known internal hostnames
|
||
|
|
if hostname in _BLOCKED_HOSTNAMES:
|
||
|
|
logger.warning("Blocked request to internal hostname: %s", hostname)
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Try to resolve and check IP
|
||
|
|
try:
|
||
|
|
addr_info = socket.getaddrinfo(hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM)
|
||
|
|
except socket.gaierror:
|
||
|
|
# DNS resolution failed — fail closed. If DNS can't resolve it,
|
||
|
|
# the HTTP client will also fail, so blocking loses nothing.
|
||
|
|
logger.warning("Blocked request — DNS resolution failed for: %s", hostname)
|
||
|
|
return False
|
||
|
|
|
||
|
|
for family, _, _, _, sockaddr in addr_info:
|
||
|
|
ip_str = sockaddr[0]
|
||
|
|
try:
|
||
|
|
ip = ipaddress.ip_address(ip_str)
|
||
|
|
except ValueError:
|
||
|
|
continue
|
||
|
|
|
||
|
|
if _is_blocked_ip(ip):
|
||
|
|
logger.warning(
|
||
|
|
"Blocked request to private/internal address: %s -> %s",
|
||
|
|
hostname, ip_str,
|
||
|
|
)
|
||
|
|
return False
|
||
|
|
|
||
|
|
return True
|
||
|
|
|
||
|
|
except Exception as exc:
|
||
|
|
# Fail closed on unexpected errors — don't let parsing edge cases
|
||
|
|
# become SSRF bypass vectors
|
||
|
|
logger.warning("Blocked request — URL safety check error for %s: %s", url, exc)
|
||
|
|
return False
|