fix: allow self-hosted Firecrawl without API key + add self-hosting docs
On top of PR #460: self-hosted Firecrawl instances don't require an API key (USE_DB_AUTHENTICATION=false), so don't force users to set a dummy FIRECRAWL_API_KEY when FIRECRAWL_API_URL is set. Also adds a proper self-hosting section to the configuration docs explaining what you get, what you lose, and how to set it up (Docker stack, tradeoffs vs cloud). Added 2 more tests (URL-only without key, neither-set raises).
This commit is contained in:
@@ -14,16 +14,15 @@ class TestFirecrawlClientConfig:
|
||||
|
||||
tools.web_tools._firecrawl_client = None
|
||||
|
||||
def _clear_firecrawl_env(self):
|
||||
"""Remove Firecrawl env vars so tests start clean."""
|
||||
for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL"):
|
||||
os.environ.pop(key, None)
|
||||
|
||||
def test_client_with_api_key_only(self):
|
||||
"""Test client initialization with only API key (no custom URL)."""
|
||||
env_vars = {"FIRECRAWL_API_KEY": "test-key"}
|
||||
env_vars.pop("FIRECRAWL_API_URL", None)
|
||||
|
||||
with patch.dict(os.environ, env_vars, clear=False):
|
||||
# Remove FIRECRAWL_API_URL from env if it exists
|
||||
if "FIRECRAWL_API_URL" in os.environ:
|
||||
del os.environ["FIRECRAWL_API_URL"]
|
||||
|
||||
"""Test client initialization with only API key (cloud mode)."""
|
||||
self._clear_firecrawl_env()
|
||||
with patch.dict(os.environ, {"FIRECRAWL_API_KEY": "test-key"}, clear=False):
|
||||
with patch("tools.web_tools.Firecrawl") as mock_firecrawl:
|
||||
from tools.web_tools import _get_firecrawl_client
|
||||
|
||||
@@ -32,6 +31,7 @@ class TestFirecrawlClientConfig:
|
||||
|
||||
def test_client_with_api_key_and_url(self):
|
||||
"""Test client initialization with API key and custom URL."""
|
||||
self._clear_firecrawl_env()
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
@@ -47,3 +47,28 @@ class TestFirecrawlClientConfig:
|
||||
mock_firecrawl.assert_called_once_with(
|
||||
api_key="test-key", api_url="http://localhost:3002"
|
||||
)
|
||||
|
||||
def test_client_with_url_only_no_key(self):
|
||||
"""Self-hosted mode: URL without API key should work."""
|
||||
self._clear_firecrawl_env()
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{"FIRECRAWL_API_URL": "http://localhost:3002"},
|
||||
clear=False,
|
||||
):
|
||||
with patch("tools.web_tools.Firecrawl") as mock_firecrawl:
|
||||
from tools.web_tools import _get_firecrawl_client
|
||||
|
||||
_get_firecrawl_client()
|
||||
mock_firecrawl.assert_called_once_with(
|
||||
api_url="http://localhost:3002"
|
||||
)
|
||||
|
||||
def test_no_key_no_url_raises(self):
|
||||
"""Neither key nor URL set should raise a clear error."""
|
||||
self._clear_firecrawl_env()
|
||||
with patch("tools.web_tools.Firecrawl"):
|
||||
from tools.web_tools import _get_firecrawl_client
|
||||
|
||||
with pytest.raises(ValueError, match="FIRECRAWL_API_KEY"):
|
||||
_get_firecrawl_client()
|
||||
|
||||
@@ -56,18 +56,29 @@ logger = logging.getLogger(__name__)
|
||||
_firecrawl_client = None
|
||||
|
||||
def _get_firecrawl_client():
|
||||
"""Get or create the Firecrawl client (lazy initialization)."""
|
||||
"""Get or create the Firecrawl client (lazy initialization).
|
||||
|
||||
Uses the cloud API by default (requires FIRECRAWL_API_KEY).
|
||||
Set FIRECRAWL_API_URL to point at a self-hosted instance instead —
|
||||
in that case the API key is optional (set USE_DB_AUTHENTICATION=false
|
||||
on your Firecrawl server to disable auth entirely).
|
||||
"""
|
||||
global _firecrawl_client
|
||||
if _firecrawl_client is None:
|
||||
api_key = os.getenv("FIRECRAWL_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("FIRECRAWL_API_KEY environment variable not set")
|
||||
|
||||
api_url = os.getenv("FIRECRAWL_API_URL")
|
||||
if not api_key and not api_url:
|
||||
raise ValueError(
|
||||
"FIRECRAWL_API_KEY environment variable not set. "
|
||||
"Set it for cloud Firecrawl, or set FIRECRAWL_API_URL "
|
||||
"to use a self-hosted instance."
|
||||
)
|
||||
kwargs = {}
|
||||
if api_key:
|
||||
kwargs["api_key"] = api_key
|
||||
if api_url:
|
||||
_firecrawl_client = Firecrawl(api_key=api_key, api_url=api_url)
|
||||
else:
|
||||
_firecrawl_client = Firecrawl(api_key=api_key)
|
||||
kwargs["api_url"] = api_url
|
||||
_firecrawl_client = Firecrawl(**kwargs)
|
||||
return _firecrawl_client
|
||||
|
||||
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
|
||||
|
||||
@@ -163,8 +163,7 @@ Open `~/.hermes/.env` and add at minimum an LLM provider key:
|
||||
OPENROUTER_API_KEY=sk-or-v1-your-key-here
|
||||
|
||||
# Optional — enable additional tools:
|
||||
FIRECRAWL_API_KEY=fc-your-key # Web search & scraping
|
||||
FIRECRAWL_API_URL=http://localhost:3002 # Self-hosted Firecrawl (optional)
|
||||
FIRECRAWL_API_KEY=fc-your-key # Web search & scraping (or self-host, see docs)
|
||||
FAL_KEY=your-fal-key # Image generation (FLUX)
|
||||
```
|
||||
|
||||
|
||||
@@ -79,7 +79,6 @@ Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, we
|
||||
| Feature | Provider | Env Variable |
|
||||
|---------|----------|--------------|
|
||||
| Web scraping | [Firecrawl](https://firecrawl.dev/) | `FIRECRAWL_API_KEY` |
|
||||
| Web scraping (self-hosted) | Firecrawl | `FIRECRAWL_API_URL` |
|
||||
| Browser automation | [Browserbase](https://browserbase.com/) | `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID` |
|
||||
| Image generation | [FAL](https://fal.ai/) | `FAL_KEY` |
|
||||
| Premium TTS voices | [ElevenLabs](https://elevenlabs.io/) | `ELEVENLABS_API_KEY` |
|
||||
@@ -87,6 +86,31 @@ Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, we
|
||||
| RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` |
|
||||
| Cross-session user modeling | [Honcho](https://honcho.dev/) | `HONCHO_API_KEY` |
|
||||
|
||||
### Self-Hosting Firecrawl
|
||||
|
||||
By default, Hermes uses the [Firecrawl cloud API](https://firecrawl.dev/) for web search and scraping. If you prefer to run Firecrawl locally, you can point Hermes at a self-hosted instance instead.
|
||||
|
||||
**What you get:** No API key required, no rate limits, no per-page costs, full data sovereignty.
|
||||
|
||||
**What you lose:** The cloud version uses Firecrawl's proprietary "Fire-engine" for advanced anti-bot bypassing (Cloudflare, CAPTCHAs, IP rotation). Self-hosted uses basic fetch + Playwright, so some protected sites may fail. Search uses DuckDuckGo instead of Google.
|
||||
|
||||
**Setup:**
|
||||
|
||||
1. Clone and start the Firecrawl Docker stack (5 containers: API, Playwright, Redis, RabbitMQ, PostgreSQL — requires ~4-8 GB RAM):
|
||||
```bash
|
||||
git clone https://github.com/mendableai/firecrawl
|
||||
cd firecrawl
|
||||
# In .env, set: USE_DB_AUTHENTICATION=false
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
2. Point Hermes at your instance (no API key needed):
|
||||
```bash
|
||||
hermes config set FIRECRAWL_API_URL http://localhost:3002
|
||||
```
|
||||
|
||||
You can also set both `FIRECRAWL_API_KEY` and `FIRECRAWL_API_URL` if your self-hosted instance has authentication enabled.
|
||||
|
||||
## OpenRouter Provider Routing
|
||||
|
||||
When using OpenRouter, you can control how requests are routed across providers. Add a `provider_routing` section to `~/.hermes/config.yaml`:
|
||||
|
||||
Reference in New Issue
Block a user