fix: allow self-hosted Firecrawl without API key + add self-hosting docs

On top of PR #460: self-hosted Firecrawl instances don't require an API
key (USE_DB_AUTHENTICATION=false), so don't force users to set a dummy
FIRECRAWL_API_KEY when FIRECRAWL_API_URL is set. Also adds a proper
self-hosting section to the configuration docs explaining what you get,
what you lose, and how to set it up (Docker stack, tradeoffs vs cloud).

Added 2 more tests (URL-only without key, neither-set raises).
This commit is contained in:
teknium1
2026-03-05 16:44:21 -08:00
parent a41ba57a7a
commit 363633e2ba
4 changed files with 78 additions and 19 deletions

View File

@@ -14,16 +14,15 @@ class TestFirecrawlClientConfig:
tools.web_tools._firecrawl_client = None
def _clear_firecrawl_env(self):
"""Remove Firecrawl env vars so tests start clean."""
for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL"):
os.environ.pop(key, None)
def test_client_with_api_key_only(self):
"""Test client initialization with only API key (no custom URL)."""
env_vars = {"FIRECRAWL_API_KEY": "test-key"}
env_vars.pop("FIRECRAWL_API_URL", None)
with patch.dict(os.environ, env_vars, clear=False):
# Remove FIRECRAWL_API_URL from env if it exists
if "FIRECRAWL_API_URL" in os.environ:
del os.environ["FIRECRAWL_API_URL"]
"""Test client initialization with only API key (cloud mode)."""
self._clear_firecrawl_env()
with patch.dict(os.environ, {"FIRECRAWL_API_KEY": "test-key"}, clear=False):
with patch("tools.web_tools.Firecrawl") as mock_firecrawl:
from tools.web_tools import _get_firecrawl_client
@@ -32,6 +31,7 @@ class TestFirecrawlClientConfig:
def test_client_with_api_key_and_url(self):
"""Test client initialization with API key and custom URL."""
self._clear_firecrawl_env()
with patch.dict(
os.environ,
{
@@ -47,3 +47,28 @@ class TestFirecrawlClientConfig:
mock_firecrawl.assert_called_once_with(
api_key="test-key", api_url="http://localhost:3002"
)
def test_client_with_url_only_no_key(self):
"""Self-hosted mode: URL without API key should work."""
self._clear_firecrawl_env()
with patch.dict(
os.environ,
{"FIRECRAWL_API_URL": "http://localhost:3002"},
clear=False,
):
with patch("tools.web_tools.Firecrawl") as mock_firecrawl:
from tools.web_tools import _get_firecrawl_client
_get_firecrawl_client()
mock_firecrawl.assert_called_once_with(
api_url="http://localhost:3002"
)
def test_no_key_no_url_raises(self):
"""Neither key nor URL set should raise a clear error."""
self._clear_firecrawl_env()
with patch("tools.web_tools.Firecrawl"):
from tools.web_tools import _get_firecrawl_client
with pytest.raises(ValueError, match="FIRECRAWL_API_KEY"):
_get_firecrawl_client()

View File

@@ -56,18 +56,29 @@ logger = logging.getLogger(__name__)
_firecrawl_client = None
def _get_firecrawl_client():
"""Get or create the Firecrawl client (lazy initialization)."""
"""Get or create the Firecrawl client (lazy initialization).
Uses the cloud API by default (requires FIRECRAWL_API_KEY).
Set FIRECRAWL_API_URL to point at a self-hosted instance instead —
in that case the API key is optional (set USE_DB_AUTHENTICATION=false
on your Firecrawl server to disable auth entirely).
"""
global _firecrawl_client
if _firecrawl_client is None:
api_key = os.getenv("FIRECRAWL_API_KEY")
if not api_key:
raise ValueError("FIRECRAWL_API_KEY environment variable not set")
api_url = os.getenv("FIRECRAWL_API_URL")
if not api_key and not api_url:
raise ValueError(
"FIRECRAWL_API_KEY environment variable not set. "
"Set it for cloud Firecrawl, or set FIRECRAWL_API_URL "
"to use a self-hosted instance."
)
kwargs = {}
if api_key:
kwargs["api_key"] = api_key
if api_url:
_firecrawl_client = Firecrawl(api_key=api_key, api_url=api_url)
else:
_firecrawl_client = Firecrawl(api_key=api_key)
kwargs["api_url"] = api_url
_firecrawl_client = Firecrawl(**kwargs)
return _firecrawl_client
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000

View File

@@ -163,8 +163,7 @@ Open `~/.hermes/.env` and add at minimum an LLM provider key:
OPENROUTER_API_KEY=sk-or-v1-your-key-here
# Optional — enable additional tools:
FIRECRAWL_API_KEY=fc-your-key # Web search & scraping
FIRECRAWL_API_URL=http://localhost:3002 # Self-hosted Firecrawl (optional)
FIRECRAWL_API_KEY=fc-your-key # Web search & scraping (or self-host, see docs)
FAL_KEY=your-fal-key # Image generation (FLUX)
```

View File

@@ -79,7 +79,6 @@ Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, we
| Feature | Provider | Env Variable |
|---------|----------|--------------|
| Web scraping | [Firecrawl](https://firecrawl.dev/) | `FIRECRAWL_API_KEY` |
| Web scraping (self-hosted) | Firecrawl | `FIRECRAWL_API_URL` |
| Browser automation | [Browserbase](https://browserbase.com/) | `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID` |
| Image generation | [FAL](https://fal.ai/) | `FAL_KEY` |
| Premium TTS voices | [ElevenLabs](https://elevenlabs.io/) | `ELEVENLABS_API_KEY` |
@@ -87,6 +86,31 @@ Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, we
| RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` |
| Cross-session user modeling | [Honcho](https://honcho.dev/) | `HONCHO_API_KEY` |
### Self-Hosting Firecrawl
By default, Hermes uses the [Firecrawl cloud API](https://firecrawl.dev/) for web search and scraping. If you prefer to run Firecrawl locally, you can point Hermes at a self-hosted instance instead.
**What you get:** No API key required, no rate limits, no per-page costs, full data sovereignty.
**What you lose:** The cloud version uses Firecrawl's proprietary "Fire-engine" for advanced anti-bot bypassing (Cloudflare, CAPTCHAs, IP rotation). Self-hosted uses basic fetch + Playwright, so some protected sites may fail. Search uses DuckDuckGo instead of Google.
**Setup:**
1. Clone and start the Firecrawl Docker stack (5 containers: API, Playwright, Redis, RabbitMQ, PostgreSQL — requires ~4-8 GB RAM):
```bash
git clone https://github.com/mendableai/firecrawl
cd firecrawl
# In .env, set: USE_DB_AUTHENTICATION=false
docker compose up -d
```
2. Point Hermes at your instance (no API key needed):
```bash
hermes config set FIRECRAWL_API_URL http://localhost:3002
```
You can also set both `FIRECRAWL_API_KEY` and `FIRECRAWL_API_URL` if your self-hosted instance has authentication enabled.
## OpenRouter Provider Routing
When using OpenRouter, you can control how requests are routed across providers. Add a `provider_routing` section to `~/.hermes/config.yaml`: