Files
Timmy-time-dashboard/tests/timmy/test_tools_search.py
Claude (Opus 4.6) a7ccfbddc9
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
[claude] feat: SearXNG + Crawl4AI self-hosted search backend (#1282) (#1299)
2026-03-24 01:52:51 +00:00

309 lines
12 KiB
Python

"""Unit tests for web_search and scrape_url tools (SearXNG + Crawl4AI).
All tests use mocked HTTP — no live services required.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from timmy.tools.search import _extract_crawl_content, scrape_url, web_search
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _mock_requests(json_response=None, status_code=200, raise_exc=None):
"""Build a mock requests module whose .get/.post return controlled responses."""
mock_req = MagicMock()
# Exception hierarchy
class Timeout(Exception):
pass
class HTTPError(Exception):
def __init__(self, *a, response=None, **kw):
super().__init__(*a, **kw)
self.response = response
class RequestException(Exception):
pass
exc_mod = MagicMock()
exc_mod.Timeout = Timeout
exc_mod.HTTPError = HTTPError
exc_mod.RequestException = RequestException
mock_req.exceptions = exc_mod
if raise_exc is not None:
mock_req.get.side_effect = raise_exc
mock_req.post.side_effect = raise_exc
else:
mock_resp = MagicMock()
mock_resp.status_code = status_code
mock_resp.json.return_value = json_response or {}
if status_code >= 400:
mock_resp.raise_for_status.side_effect = HTTPError(
response=MagicMock(status_code=status_code)
)
mock_req.get.return_value = mock_resp
mock_req.post.return_value = mock_resp
return mock_req
# ---------------------------------------------------------------------------
# web_search tests
# ---------------------------------------------------------------------------
class TestWebSearch:
def test_backend_none_short_circuits(self):
"""TIMMY_SEARCH_BACKEND=none returns disabled message immediately."""
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "none"
result = web_search("anything")
assert "disabled" in result
def test_missing_requests_package(self):
"""Graceful error when requests is not installed."""
with patch.dict("sys.modules", {"requests": None}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.search_url = "http://localhost:8888"
result = web_search("test query")
assert "requests" in result and "not installed" in result
def test_successful_search(self):
"""Happy path: returns formatted result list."""
mock_data = {
"results": [
{"title": "Foo Bar", "url": "https://example.com/foo", "content": "Foo is great"},
{"title": "Baz", "url": "https://example.com/baz", "content": "Baz rules"},
]
}
mock_req = _mock_requests(json_response=mock_data)
with patch.dict("sys.modules", {"requests": mock_req}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.search_url = "http://localhost:8888"
result = web_search("foo bar")
assert "Foo Bar" in result
assert "https://example.com/foo" in result
assert "Baz" in result
assert "foo bar" in result
def test_no_results(self):
"""Empty results list returns a helpful no-results message."""
mock_req = _mock_requests(json_response={"results": []})
with patch.dict("sys.modules", {"requests": mock_req}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.search_url = "http://localhost:8888"
result = web_search("xyzzy")
assert "No results" in result
def test_num_results_respected(self):
"""Only up to num_results entries are returned."""
mock_data = {
"results": [
{"title": f"Result {i}", "url": f"https://example.com/{i}", "content": "x"}
for i in range(10)
]
}
mock_req = _mock_requests(json_response=mock_data)
with patch.dict("sys.modules", {"requests": mock_req}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.search_url = "http://localhost:8888"
result = web_search("test", num_results=3)
# Only 3 numbered entries should appear
assert "1." in result
assert "3." in result
assert "4." not in result
def test_service_unavailable(self):
"""Connection error degrades gracefully."""
mock_req = MagicMock()
mock_req.get.side_effect = OSError("connection refused")
mock_req.exceptions = MagicMock()
with patch.dict("sys.modules", {"requests": mock_req}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.search_url = "http://localhost:8888"
result = web_search("test")
assert "not reachable" in result or "unavailable" in result
def test_catalog_entry_exists(self):
"""web_search must appear in the tool catalog."""
from timmy.tools import get_all_available_tools
catalog = get_all_available_tools()
assert "web_search" in catalog
assert "orchestrator" in catalog["web_search"]["available_in"]
assert "echo" in catalog["web_search"]["available_in"]
# ---------------------------------------------------------------------------
# scrape_url tests
# ---------------------------------------------------------------------------
class TestScrapeUrl:
def test_invalid_url_no_scheme(self):
"""URLs without http(s) scheme are rejected before any HTTP call."""
result = scrape_url("example.com/page")
assert "Error: invalid URL" in result
def test_invalid_url_empty(self):
result = scrape_url("")
assert "Error: invalid URL" in result
def test_backend_none_short_circuits(self):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "none"
result = scrape_url("https://example.com")
assert "disabled" in result
def test_missing_requests_package(self):
with patch.dict("sys.modules", {"requests": None}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.crawl_url = "http://localhost:11235"
result = scrape_url("https://example.com")
assert "requests" in result and "not installed" in result
def test_sync_result_returned_immediately(self):
"""If Crawl4AI returns results in the POST response, use them directly."""
mock_data = {
"results": [{"markdown": "# Hello\n\nThis is the page content."}]
}
mock_req = _mock_requests(json_response=mock_data)
with patch.dict("sys.modules", {"requests": mock_req}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.crawl_url = "http://localhost:11235"
result = scrape_url("https://example.com")
assert "Hello" in result
assert "page content" in result
def test_async_poll_completed(self):
"""Async task_id flow: polls until completed and returns content."""
submit_response = MagicMock()
submit_response.json.return_value = {"task_id": "abc123"}
submit_response.raise_for_status.return_value = None
poll_response = MagicMock()
poll_response.json.return_value = {
"status": "completed",
"results": [{"markdown": "# Async content"}],
}
poll_response.raise_for_status.return_value = None
mock_req = MagicMock()
mock_req.post.return_value = submit_response
mock_req.get.return_value = poll_response
mock_req.exceptions = MagicMock()
with patch.dict("sys.modules", {"requests": mock_req}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.crawl_url = "http://localhost:11235"
with patch("timmy.tools.search.time") as mock_time:
mock_time.sleep = MagicMock()
result = scrape_url("https://example.com")
assert "Async content" in result
def test_async_poll_failed_task(self):
"""Crawl4AI task failure is reported clearly."""
submit_response = MagicMock()
submit_response.json.return_value = {"task_id": "abc123"}
submit_response.raise_for_status.return_value = None
poll_response = MagicMock()
poll_response.json.return_value = {"status": "failed", "error": "site blocked"}
poll_response.raise_for_status.return_value = None
mock_req = MagicMock()
mock_req.post.return_value = submit_response
mock_req.get.return_value = poll_response
mock_req.exceptions = MagicMock()
with patch.dict("sys.modules", {"requests": mock_req}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.crawl_url = "http://localhost:11235"
with patch("timmy.tools.search.time") as mock_time:
mock_time.sleep = MagicMock()
result = scrape_url("https://example.com")
assert "failed" in result and "site blocked" in result
def test_service_unavailable(self):
"""Connection error degrades gracefully."""
mock_req = MagicMock()
mock_req.post.side_effect = OSError("connection refused")
mock_req.exceptions = MagicMock()
with patch.dict("sys.modules", {"requests": mock_req}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.crawl_url = "http://localhost:11235"
result = scrape_url("https://example.com")
assert "not reachable" in result or "unavailable" in result
def test_content_truncation(self):
"""Content longer than ~4000 tokens is truncated."""
long_content = "x" * 20000
mock_data = {"results": [{"markdown": long_content}]}
mock_req = _mock_requests(json_response=mock_data)
with patch.dict("sys.modules", {"requests": mock_req}):
with patch("timmy.tools.search.settings") as mock_settings:
mock_settings.timmy_search_backend = "searxng"
mock_settings.crawl_url = "http://localhost:11235"
result = scrape_url("https://example.com")
assert "[…truncated" in result
assert len(result) < 17000
def test_catalog_entry_exists(self):
"""scrape_url must appear in the tool catalog."""
from timmy.tools import get_all_available_tools
catalog = get_all_available_tools()
assert "scrape_url" in catalog
assert "orchestrator" in catalog["scrape_url"]["available_in"]
# ---------------------------------------------------------------------------
# _extract_crawl_content helper
# ---------------------------------------------------------------------------
class TestExtractCrawlContent:
def test_empty_results(self):
result = _extract_crawl_content([], "https://example.com")
assert "No content" in result
def test_markdown_field_preferred(self):
results = [{"markdown": "# Title", "content": "fallback"}]
result = _extract_crawl_content(results, "https://example.com")
assert "Title" in result
def test_fallback_to_content_field(self):
results = [{"content": "plain text content"}]
result = _extract_crawl_content(results, "https://example.com")
assert "plain text content" in result
def test_no_content_fields(self):
results = [{"url": "https://example.com"}]
result = _extract_crawl_content(results, "https://example.com")
assert "No readable content" in result