diff --git a/tests/tools/test_web_tools_config.py b/tests/tools/test_web_tools_config.py index 8486d08c3..f1da27823 100644 --- a/tests/tools/test_web_tools_config.py +++ b/tests/tools/test_web_tools_config.py @@ -14,16 +14,15 @@ class TestFirecrawlClientConfig: tools.web_tools._firecrawl_client = None + def _clear_firecrawl_env(self): + """Remove Firecrawl env vars so tests start clean.""" + for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL"): + os.environ.pop(key, None) + def test_client_with_api_key_only(self): - """Test client initialization with only API key (no custom URL).""" - env_vars = {"FIRECRAWL_API_KEY": "test-key"} - env_vars.pop("FIRECRAWL_API_URL", None) - - with patch.dict(os.environ, env_vars, clear=False): - # Remove FIRECRAWL_API_URL from env if it exists - if "FIRECRAWL_API_URL" in os.environ: - del os.environ["FIRECRAWL_API_URL"] - + """Test client initialization with only API key (cloud mode).""" + self._clear_firecrawl_env() + with patch.dict(os.environ, {"FIRECRAWL_API_KEY": "test-key"}, clear=False): with patch("tools.web_tools.Firecrawl") as mock_firecrawl: from tools.web_tools import _get_firecrawl_client @@ -32,6 +31,7 @@ class TestFirecrawlClientConfig: def test_client_with_api_key_and_url(self): """Test client initialization with API key and custom URL.""" + self._clear_firecrawl_env() with patch.dict( os.environ, { @@ -47,3 +47,28 @@ class TestFirecrawlClientConfig: mock_firecrawl.assert_called_once_with( api_key="test-key", api_url="http://localhost:3002" ) + + def test_client_with_url_only_no_key(self): + """Self-hosted mode: URL without API key should work.""" + self._clear_firecrawl_env() + with patch.dict( + os.environ, + {"FIRECRAWL_API_URL": "http://localhost:3002"}, + clear=False, + ): + with patch("tools.web_tools.Firecrawl") as mock_firecrawl: + from tools.web_tools import _get_firecrawl_client + + _get_firecrawl_client() + mock_firecrawl.assert_called_once_with( + api_url="http://localhost:3002" + ) + + def test_no_key_no_url_raises(self): + """Neither key nor URL set should raise a clear error.""" + self._clear_firecrawl_env() + with patch("tools.web_tools.Firecrawl"): + from tools.web_tools import _get_firecrawl_client + + with pytest.raises(ValueError, match="FIRECRAWL_API_KEY"): + _get_firecrawl_client() diff --git a/tools/web_tools.py b/tools/web_tools.py index d0e6e0d74..5bf223425 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -56,18 +56,29 @@ logger = logging.getLogger(__name__) _firecrawl_client = None def _get_firecrawl_client(): - """Get or create the Firecrawl client (lazy initialization).""" + """Get or create the Firecrawl client (lazy initialization). + + Uses the cloud API by default (requires FIRECRAWL_API_KEY). + Set FIRECRAWL_API_URL to point at a self-hosted instance instead — + in that case the API key is optional (set USE_DB_AUTHENTICATION=false + on your Firecrawl server to disable auth entirely). + """ global _firecrawl_client if _firecrawl_client is None: api_key = os.getenv("FIRECRAWL_API_KEY") - if not api_key: - raise ValueError("FIRECRAWL_API_KEY environment variable not set") - api_url = os.getenv("FIRECRAWL_API_URL") + if not api_key and not api_url: + raise ValueError( + "FIRECRAWL_API_KEY environment variable not set. " + "Set it for cloud Firecrawl, or set FIRECRAWL_API_URL " + "to use a self-hosted instance." + ) + kwargs = {} + if api_key: + kwargs["api_key"] = api_key if api_url: - _firecrawl_client = Firecrawl(api_key=api_key, api_url=api_url) - else: - _firecrawl_client = Firecrawl(api_key=api_key) + kwargs["api_url"] = api_url + _firecrawl_client = Firecrawl(**kwargs) return _firecrawl_client DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000 diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md index 21a4e0092..d74822022 100644 --- a/website/docs/getting-started/installation.md +++ b/website/docs/getting-started/installation.md @@ -163,8 +163,7 @@ Open `~/.hermes/.env` and add at minimum an LLM provider key: OPENROUTER_API_KEY=sk-or-v1-your-key-here # Optional — enable additional tools: -FIRECRAWL_API_KEY=fc-your-key # Web search & scraping -FIRECRAWL_API_URL=http://localhost:3002 # Self-hosted Firecrawl (optional) +FIRECRAWL_API_KEY=fc-your-key # Web search & scraping (or self-host, see docs) FAL_KEY=your-fal-key # Image generation (FLUX) ``` diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index d8a0d7a48..6c38a3e81 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -79,7 +79,6 @@ Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, we | Feature | Provider | Env Variable | |---------|----------|--------------| | Web scraping | [Firecrawl](https://firecrawl.dev/) | `FIRECRAWL_API_KEY` | -| Web scraping (self-hosted) | Firecrawl | `FIRECRAWL_API_URL` | | Browser automation | [Browserbase](https://browserbase.com/) | `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID` | | Image generation | [FAL](https://fal.ai/) | `FAL_KEY` | | Premium TTS voices | [ElevenLabs](https://elevenlabs.io/) | `ELEVENLABS_API_KEY` | @@ -87,6 +86,31 @@ Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, we | RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` | | Cross-session user modeling | [Honcho](https://honcho.dev/) | `HONCHO_API_KEY` | +### Self-Hosting Firecrawl + +By default, Hermes uses the [Firecrawl cloud API](https://firecrawl.dev/) for web search and scraping. If you prefer to run Firecrawl locally, you can point Hermes at a self-hosted instance instead. + +**What you get:** No API key required, no rate limits, no per-page costs, full data sovereignty. + +**What you lose:** The cloud version uses Firecrawl's proprietary "Fire-engine" for advanced anti-bot bypassing (Cloudflare, CAPTCHAs, IP rotation). Self-hosted uses basic fetch + Playwright, so some protected sites may fail. Search uses DuckDuckGo instead of Google. + +**Setup:** + +1. Clone and start the Firecrawl Docker stack (5 containers: API, Playwright, Redis, RabbitMQ, PostgreSQL — requires ~4-8 GB RAM): + ```bash + git clone https://github.com/mendableai/firecrawl + cd firecrawl + # In .env, set: USE_DB_AUTHENTICATION=false + docker compose up -d + ``` + +2. Point Hermes at your instance (no API key needed): + ```bash + hermes config set FIRECRAWL_API_URL http://localhost:3002 + ``` + +You can also set both `FIRECRAWL_API_KEY` and `FIRECRAWL_API_URL` if your self-hosted instance has authentication enabled. + ## OpenRouter Provider Routing When using OpenRouter, you can control how requests are routed across providers. Add a `provider_routing` section to `~/.hermes/config.yaml`: