From 5b74df2bfc4f5632c9a0a8a22d0bb4301f900e0d Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 02:19:57 -0700
Subject: [PATCH 01/25] fix: OAuth flag stale after refresh/fallback, memory
 nudge never fires, dead code

- Update _is_anthropic_oauth in _try_refresh_anthropic_client_credentials()
  when token type changes during credential refresh
- Set _is_anthropic_oauth in _try_activate_fallback() Anthropic path
- Move _turns_since_memory and _iters_since_skill init to __init__ so
  nudge counters accumulate across run_conversation() calls in CLI mode
- Remove unreachable retry_count >= max_retries block after raise

Adds 7 regression tests. Salvaged from PR #1797 by @0xbyt4.
---
 run_agent.py            |  17 +++---
 tests/test_run_agent.py | 132 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+), 7 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 210ab2d2..0ce3919f 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -738,6 +738,8 @@ class AIAgent:
         self._user_profile_enabled = False
         self._memory_nudge_interval = 10
         self._memory_flush_min_turns = 6
+        self._turns_since_memory = 0
+        self._iters_since_skill = 0
         if not skip_memory:
             try:
                 from hermes_cli.config import load_config as _load_mem_config
@@ -2951,6 +2953,9 @@ class AIAgent:
             return False
 
         self._anthropic_api_key = new_token
+        # Update OAuth flag — token type may have changed (API key ↔ OAuth)
+        from agent.anthropic_adapter import _is_oauth_token
+        self._is_anthropic_oauth = _is_oauth_token(new_token)
         return True
 
     def _anthropic_messages_create(self, api_kwargs: dict):
@@ -3342,11 +3347,12 @@ class AIAgent:
 
             if fb_api_mode == "anthropic_messages":
                 # Build native Anthropic client instead of using OpenAI client
-                from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
+                from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token
                 effective_key = fb_client.api_key or resolve_anthropic_token() or ""
                 self._anthropic_api_key = effective_key
                 self._anthropic_base_url = getattr(fb_client, "base_url", None)
                 self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url)
+                self._is_anthropic_oauth = _is_oauth_token(effective_key)
                 self.client = None
                 self._client_kwargs = {}
             else:
@@ -4831,8 +4837,9 @@ class AIAgent:
         self._incomplete_scratchpad_retries = 0
         self._codex_incomplete_retries = 0
         self._last_content_with_tools = None
-        self._turns_since_memory = 0
-        self._iters_since_skill = 0
+        # NOTE: _turns_since_memory and _iters_since_skill are NOT reset here.
+        # They are initialized in __init__ and must persist across run_conversation
+        # calls so that nudge logic accumulates correctly in CLI mode.
         self.iteration_budget = IterationBudget(self.max_iterations)
         
         # Initialize conversation (copy to avoid mutating the caller's list)
@@ -5850,10 +5857,6 @@ class AIAgent:
                         self._client_log_context(),
                         api_error,
                     )
-                    if retry_count >= max_retries:
-                        self._vprint(f"{self.log_prefix}⚠️  API call failed after {retry_count} attempts: {str(api_error)[:100]}")
-                        self._vprint(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
-                    
                     # Sleep in small increments so we can respond to interrupts quickly
                     # instead of blocking the entire wait_time in one sleep() call
                     sleep_end = time.time() + wait_time
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index cfe8bab2..ec9b26f3 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -2717,3 +2717,135 @@ class TestNormalizeCodexDictArguments:
         msg, _ = agent._normalize_codex_response(response)
         tc = msg.tool_calls[0]
         assert tc.function.arguments == args_str
+
+
+# ---------------------------------------------------------------------------
+# OAuth flag and nudge counter fixes (salvaged from PR #1797)
+# ---------------------------------------------------------------------------
+
+
+class TestOAuthFlagAfterCredentialRefresh:
+    """_is_anthropic_oauth must update when token type changes during refresh."""
+
+    def test_oauth_flag_updates_api_key_to_oauth(self, agent):
+        """Refreshing from API key to OAuth token must set flag to True."""
+        agent.api_mode = "anthropic_messages"
+        agent._anthropic_api_key = "sk-ant-api-old"
+        agent._anthropic_client = MagicMock()
+        agent._is_anthropic_oauth = False
+
+        with (
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value="sk-ant-setup-oauth-token"),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+        ):
+            result = agent._try_refresh_anthropic_client_credentials()
+
+        assert result is True
+        assert agent._is_anthropic_oauth is True
+
+    def test_oauth_flag_updates_oauth_to_api_key(self, agent):
+        """Refreshing from OAuth to API key must set flag to False."""
+        agent.api_mode = "anthropic_messages"
+        agent._anthropic_api_key = "sk-ant-setup-old"
+        agent._anthropic_client = MagicMock()
+        agent._is_anthropic_oauth = True
+
+        with (
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value="sk-ant-api03-new-key"),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+        ):
+            result = agent._try_refresh_anthropic_client_credentials()
+
+        assert result is True
+        assert agent._is_anthropic_oauth is False
+
+
+class TestFallbackSetsOAuthFlag:
+    """_try_activate_fallback must set _is_anthropic_oauth for Anthropic fallbacks."""
+
+    def test_fallback_to_anthropic_oauth_sets_flag(self, agent):
+        agent._fallback_activated = False
+        agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-6"}
+
+        mock_client = MagicMock()
+        mock_client.base_url = "https://api.anthropic.com/v1"
+        mock_client.api_key = "sk-ant-setup-oauth-token"
+
+        with (
+            patch("agent.auxiliary_client.resolve_provider_client",
+                  return_value=(mock_client, None)),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value=None),
+        ):
+            result = agent._try_activate_fallback()
+
+        assert result is True
+        assert agent._is_anthropic_oauth is True
+
+    def test_fallback_to_anthropic_api_key_clears_flag(self, agent):
+        agent._fallback_activated = False
+        agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-6"}
+
+        mock_client = MagicMock()
+        mock_client.base_url = "https://api.anthropic.com/v1"
+        mock_client.api_key = "sk-ant-api03-regular-key"
+
+        with (
+            patch("agent.auxiliary_client.resolve_provider_client",
+                  return_value=(mock_client, None)),
+            patch("agent.anthropic_adapter.build_anthropic_client",
+                  return_value=MagicMock()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token",
+                  return_value=None),
+        ):
+            result = agent._try_activate_fallback()
+
+        assert result is True
+        assert agent._is_anthropic_oauth is False
+
+
+class TestMemoryNudgeCounterPersistence:
+    """_turns_since_memory must persist across run_conversation calls."""
+
+    def test_counters_initialized_in_init(self):
+        """Counters must exist on the agent after __init__."""
+        with patch("run_agent.get_tool_definitions", return_value=[]):
+            a = AIAgent(
+                model="test", api_key="test-key", provider="openrouter",
+                skip_context_files=True, skip_memory=True,
+            )
+        assert hasattr(a, "_turns_since_memory")
+        assert hasattr(a, "_iters_since_skill")
+        assert a._turns_since_memory == 0
+        assert a._iters_since_skill == 0
+
+    def test_counters_not_reset_in_preamble(self):
+        """The run_conversation preamble must not zero the nudge counters."""
+        import inspect
+        src = inspect.getsource(AIAgent.run_conversation)
+        # The preamble resets many fields (retry counts, budget, etc.)
+        # before the main loop. Find that reset block and verify our
+        # counters aren't in it. The reset block ends at iteration_budget.
+        preamble_end = src.index("self.iteration_budget = IterationBudget")
+        preamble = src[:preamble_end]
+        assert "self._turns_since_memory = 0" not in preamble
+        assert "self._iters_since_skill = 0" not in preamble
+
+
+class TestDeadRetryCode:
+    """Unreachable retry_count >= max_retries after raise must not exist."""
+
+    def test_no_unreachable_max_retries_after_backoff(self):
+        import inspect
+        source = inspect.getsource(AIAgent.run_conversation)
+        occurrences = source.count("if retry_count >= max_retries:")
+        assert occurrences == 2, (
+            f"Expected 2 occurrences of 'if retry_count >= max_retries:' "
+            f"but found {occurrences}"
+        )

From 6fc4e36625fefd6a2e0848546644c4541d0a7031 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 02:21:29 -0700
Subject: [PATCH 02/25] fix: search all sources by default in session_search
 (#1892)

* fix: include ACP sessions in default search sources

* fix: remove hardcoded source allowlist from session search

The default source_filter was a hardcoded list that silently excluded
any platform not explicitly listed. Instead of maintaining an ever-growing
allowlist, remove it entirely so all sources are searched by default.
Callers can still pass source_filter explicitly to narrow results.

Follow-up to cherry-picked PR #1817.

---------

Co-authored-by: someoneexistsontheinternet <154079416+someoneexistsontheinternet@users.noreply.github.com>
Co-authored-by: Test <test@test.com>
---
 hermes_state.py            | 10 ++++------
 tests/test_hermes_state.py | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/hermes_state.py b/hermes_state.py
index e87997ec..b35723d1 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -757,16 +757,14 @@ class SessionDB:
         if not query:
             return []
 
-        if source_filter is None:
-            source_filter = ["cli", "telegram", "discord", "whatsapp", "slack"]
-
         # Build WHERE clauses dynamically
         where_clauses = ["messages_fts MATCH ?"]
         params: list = [query]
 
-        source_placeholders = ",".join("?" for _ in source_filter)
-        where_clauses.append(f"s.source IN ({source_placeholders})")
-        params.extend(source_filter)
+        if source_filter is not None:
+            source_placeholders = ",".join("?" for _ in source_filter)
+            where_clauses.append(f"s.source IN ({source_placeholders})")
+            params.extend(source_filter)
 
         if role_filter:
             role_placeholders = ",".join("?" for _ in role_filter)
diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py
index f9155d3f..c731ccf3 100644
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -210,6 +210,25 @@ class TestFTS5Search:
         sources = [r["source"] for r in results]
         assert all(s == "telegram" for s in sources)
 
+    def test_search_default_sources_include_acp(self, db):
+        db.create_session(session_id="s1", source="acp")
+        db.append_message("s1", role="user", content="ACP question about Python")
+
+        results = db.search_messages("Python")
+        sources = [r["source"] for r in results]
+        assert "acp" in sources
+
+    def test_search_default_includes_all_platforms(self, db):
+        """Default search (no source_filter) should find sessions from any platform."""
+        for src in ("cli", "telegram", "signal", "homeassistant", "acp", "matrix"):
+            sid = f"s-{src}"
+            db.create_session(session_id=sid, source=src)
+            db.append_message(sid, role="user", content=f"universal search test from {src}")
+
+        results = db.search_messages("universal search test")
+        found_sources = {r["source"] for r in results}
+        assert found_sources == {"cli", "telegram", "signal", "homeassistant", "acp", "matrix"}
+
     def test_search_with_role_filter(self, db):
         db.create_session(session_id="s1", source="cli")
         db.append_message("s1", role="user", content="What is FastAPI?")

From ba7248c6696b77adc92993b26ae50d474b385d0c Mon Sep 17 00:00:00 2001
From: Bartok9 <bartokmagic@proton.me>
Date: Wed, 18 Mar 2026 03:32:26 -0400
Subject: [PATCH 03/25] fix(delegate): move _saved_tool_names save/restore to
 _run_single_child scope

Fixes #1802

The v0.3.0 refactor split child agent construction (_build_child_agent)
and execution (_run_single_child) into separate functions. This created
a scope bug where _saved_tool_names was defined in _build_child_agent
but referenced in _run_single_child's finally block, causing a NameError
on every delegate_task call.

Solution: Move the save/restore logic entirely into _run_single_child,
keeping the save and restore in the same scope as the try/finally block.
This is cleaner than passing the variable through and removes the dead
save from _build_child_agent.
---
 tools/delegate_tool.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 2a0e5b13..e6907d45 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -173,10 +173,6 @@ def _build_child_agent(
     from run_agent import AIAgent
     import model_tools
 
-    # Save the parent's resolved tool names before the child agent can
-    # overwrite the process-global via get_tool_definitions().
-    _saved_tool_names = list(model_tools._last_resolved_tool_names)
-
     # When no explicit toolsets given, inherit from parent's enabled toolsets
     # so disabled tools (e.g. web) don't leak to subagents.
     if toolsets:
@@ -263,6 +259,13 @@ def _run_single_child(
     # Get the progress callback from the child agent
     child_progress_cb = getattr(child, 'tool_progress_callback', None)
 
+    # Save the parent's resolved tool names before the child agent can
+    # overwrite the process-global via get_tool_definitions().
+    # This must be in _run_single_child (not _build_child_agent) so the
+    # save/restore happens in the same scope as the try/finally.
+    import model_tools
+    _saved_tool_names = list(model_tools._last_resolved_tool_names)
+
     try:
         result = child.run_conversation(user_message=goal)
 

From 66f71c18362dd9434d2bbac8a523233ca78d1c34 Mon Sep 17 00:00:00 2001
From: Bartok9 <bartokmagic@proton.me>
Date: Wed, 18 Mar 2026 03:33:04 -0400
Subject: [PATCH 04/25] fix(matrix): use correct reply_to_message_id parameter
 name

Fixes #1842

The MessageEvent dataclass expects 'reply_to_message_id' but the Matrix
connector was passing 'reply_to'. This caused replies to fail with:

    MessageEvent.__init__() got an unexpected keyword argument 'reply_to'

Changed the parameter name to match the dataclass definition.
---
 gateway/platforms/matrix.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py
index a4f5531d..77a2f240 100644
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -635,7 +635,7 @@ class MatrixAdapter(BasePlatformAdapter):
             source=source,
             raw_message=getattr(event, "source", {}),
             message_id=event.event_id,
-            reply_to=reply_to,
+            reply_to_message_id=reply_to,
         )
 
         await self.handle_message(msg_event)

From e4043633fcf852c604311d5e09bc32a0ed3150cc Mon Sep 17 00:00:00 2001
From: octo-patch <octo-patch@users.noreply.github.com>
Date: Wed, 18 Mar 2026 02:42:58 -0700
Subject: [PATCH 05/25] feat: upgrade MiniMax default to M2.7 + add new
 OpenRouter models

MiniMax: Add M2.7 and M2.7-highspeed as new defaults across provider
model lists, auxiliary client, metadata, setup wizard, RL training tool,
fallback tests, and docs. Retain M2.5/M2.1 as alternatives.

OpenRouter: Add grok-4.20-beta, nemotron-3-super-120b-a12b:free,
trinity-large-preview:free, glm-5-turbo, and hunter-alpha to the
model catalog.

MiniMax changes based on PR #1882 by @octo-patch (applied manually
due to stale conflicts in refactored pricing module).
---
 agent/auxiliary_client.py                       |  4 ++--
 agent/model_metadata.py                         |  2 ++
 hermes_cli/models.py                            | 11 ++++++++++-
 hermes_cli/setup.py                             |  4 ++--
 tests/test_fallback_model.py                    |  8 ++++----
 tools/rl_training_tool.py                       |  4 ++--
 website/docs/user-guide/configuration.md        |  4 ++--
 website/docs/user-guide/features/rl-training.md |  2 +-
 8 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 94be9d6f..c1049083 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -55,8 +55,8 @@ logger = logging.getLogger(__name__)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "zai": "glm-4.5-flash",
     "kimi-coding": "kimi-k2-turbo-preview",
-    "minimax": "MiniMax-M2.5-highspeed",
-    "minimax-cn": "MiniMax-M2.5-highspeed",
+    "minimax": "MiniMax-M2.7-highspeed",
+    "minimax-cn": "MiniMax-M2.7-highspeed",
     "anthropic": "claude-haiku-4-5-20251001",
     "ai-gateway": "google/gemini-3-flash",
     "opencode-zen": "gemini-3-flash",
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index cd847aa8..fb0d3846 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -77,6 +77,8 @@ DEFAULT_CONTEXT_LENGTHS = {
     "kimi-k2-thinking-turbo": 262144,
     "kimi-k2-turbo-preview": 262144,
     "kimi-k2-0905-preview": 131072,
+    "MiniMax-M2.7": 204800,
+    "MiniMax-M2.7-highspeed": 204800,
     "MiniMax-M2.5": 204800,
     "MiniMax-M2.5-highspeed": 204800,
     "MiniMax-M2.1": 204800,
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 174aa947..5a3f871f 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -28,7 +28,12 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("stepfun/step-3.5-flash",          ""),
     ("z-ai/glm-5",                      ""),
     ("moonshotai/kimi-k2.5",            ""),
-    ("minimax/minimax-m2.5",            ""),
+    ("minimax/minimax-m2.7",            ""),
+    ("x-ai/grok-4.20-beta",             ""),
+    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
+    ("arcee-ai/trinity-large-preview:free", "free"),
+    ("z-ai/glm-5-turbo",                ""),
+    ("openrouter/hunter-alpha",          ""),
 ]
 
 _PROVIDER_MODELS: dict[str, list[str]] = {
@@ -61,11 +66,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "kimi-k2-0905-preview",
     ],
     "minimax": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
         "MiniMax-M2.5",
         "MiniMax-M2.5-highspeed",
         "MiniMax-M2.1",
     ],
     "minimax-cn": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
         "MiniMax-M2.5",
         "MiniMax-M2.5-highspeed",
         "MiniMax-M2.1",
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index e3b5ed7d..46c7eea9 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -57,8 +57,8 @@ def _set_default_model(config: Dict[str, Any], model_name: str) -> None:
 _DEFAULT_PROVIDER_MODELS = {
     "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
     "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
-    "minimax": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
-    "minimax-cn": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
+    "minimax": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
+    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
     "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
     "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
 }
diff --git a/tests/test_fallback_model.py b/tests/test_fallback_model.py
index 9e34bf74..df2bc9cb 100644
--- a/tests/test_fallback_model.py
+++ b/tests/test_fallback_model.py
@@ -131,7 +131,7 @@ class TestTryActivateFallback:
 
     def test_activates_minimax_fallback(self):
         agent = _make_agent(
-            fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
+            fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"},
         )
         mock_client = _mock_resolve(
             api_key="sk-mm-key",
@@ -139,10 +139,10 @@ class TestTryActivateFallback:
         )
         with patch(
             "agent.auxiliary_client.resolve_provider_client",
-            return_value=(mock_client, "MiniMax-M2.5"),
+            return_value=(mock_client, "MiniMax-M2.7"),
         ):
             assert agent._try_activate_fallback() is True
-            assert agent.model == "MiniMax-M2.5"
+            assert agent.model == "MiniMax-M2.7"
             assert agent.provider == "minimax"
             assert agent.client is mock_client
 
@@ -165,7 +165,7 @@ class TestTryActivateFallback:
     def test_returns_false_when_no_api_key(self):
         """Fallback should fail gracefully when the API key env var is unset."""
         agent = _make_agent(
-            fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
+            fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"},
         )
         with patch(
             "agent.auxiliary_client.resolve_provider_client",
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index 6d32bd53..566a2fb3 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -1009,7 +1009,7 @@ async def rl_list_runs() -> str:
 TEST_MODELS = [
     {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
     {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
-    {"id": "minimax/minimax-m2.5", "name": "MiniMax M2.5", "scale": "large"},
+    {"id": "minimax/minimax-m2.7", "name": "MiniMax M2.7", "scale": "large"},
 ]
 
 # Default test parameters - quick but representative
@@ -1370,7 +1370,7 @@ RL_CHECK_STATUS_SCHEMA = {"name": "rl_check_status", "description": "Get status
 RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}}
 RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}}
 RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}}
-RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.5"}}, "required": []}}
+RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.7"}}, "required": []}}
 
 _rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"]
 
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 8ee4d309..878982b2 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -131,11 +131,11 @@ hermes chat --provider kimi-coding --model moonshot-v1-auto
 # Requires: KIMI_API_KEY in ~/.hermes/.env
 
 # MiniMax (global endpoint)
-hermes chat --provider minimax --model MiniMax-Text-01
+hermes chat --provider minimax --model MiniMax-M2.7
 # Requires: MINIMAX_API_KEY in ~/.hermes/.env
 
 # MiniMax (China endpoint)
-hermes chat --provider minimax-cn --model MiniMax-Text-01
+hermes chat --provider minimax-cn --model MiniMax-M2.7
 # Requires: MINIMAX_CN_API_KEY in ~/.hermes/.env
 
 # Alibaba Cloud / DashScope (Qwen models)
diff --git a/website/docs/user-guide/features/rl-training.md b/website/docs/user-guide/features/rl-training.md
index 440cc31b..9c5d7195 100644
--- a/website/docs/user-guide/features/rl-training.md
+++ b/website/docs/user-guide/features/rl-training.md
@@ -147,7 +147,7 @@ Default configuration:
 - Tests 3 models at different scales for robustness:
   - `qwen/qwen3-8b` (small)
   - `z-ai/glm-4.7-flash` (medium)
-  - `minimax/minimax-m2.5` (large)
+  - `minimax/minimax-m2.7` (large)
 - Total: ~144 rollouts
 
 This validates:

From 24ac57704628e6938cd49e63bc0be3429283c3c9 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 02:50:31 -0700
Subject: [PATCH 06/25] fix: respect model.default from config.yaml for
 openai-codex provider (#1896)

When config.yaml had a non-default model (e.g. gpt-5.3-codex) and the
provider was openai-codex, _normalize_model_for_provider() would replace
it with the latest available codex model because _model_is_default only
checked the CLI argument, not the config value.

Now _model_is_default is False when config.yaml has a model that differs
from the global fallback (anthropic/claude-opus-4.6), so the user's
explicit config choice is preserved.

Fixes #1887

Co-authored-by: Test <test@test.com>
---
 cli.py                                | 10 +++++--
 tests/test_cli_provider_resolution.py | 43 +++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/cli.py b/cli.py
index 19a0c972..18620142 100755
--- a/cli.py
+++ b/cli.py
@@ -1044,11 +1044,17 @@ class HermesCLI:
         # env vars would stomp each other.
         _model_config = CLI_CONFIG.get("model", {})
         _config_model = _model_config.get("default", "") if isinstance(_model_config, dict) else (_model_config or "")
-        self.model = model or _config_model or "anthropic/claude-opus-4.6"
+        _FALLBACK_MODEL = "anthropic/claude-opus-4.6"
+        self.model = model or _config_model or _FALLBACK_MODEL
         # Track whether model was explicitly chosen by the user or fell back
         # to the global default.  Provider-specific normalisation may override
         # the default silently but should warn when overriding an explicit choice.
-        self._model_is_default = not model
+        # A config model that matches the global fallback is NOT considered an
+        # explicit choice — the user just never changed it.  But a config model
+        # like "gpt-5.3-codex" IS explicit and must be preserved.
+        self._model_is_default = not model and (
+            not _config_model or _config_model == _FALLBACK_MODEL
+        )
 
         self._explicit_api_key = api_key
         self._explicit_base_url = base_url
diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py
index 99d8830f..48281101 100644
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@@ -312,6 +312,49 @@ def test_codex_provider_uses_config_model(monkeypatch):
     assert shell.model != "should-be-ignored"
 
 
+def test_codex_config_model_not_replaced_by_normalization(monkeypatch):
+    """When the user sets model.default in config.yaml to a specific codex
+    model, _normalize_model_for_provider must NOT replace it with the latest
+    available model from the API.  Regression test for #1887."""
+    cli = _import_cli()
+
+    monkeypatch.delenv("LLM_MODEL", raising=False)
+    monkeypatch.delenv("OPENAI_MODEL", raising=False)
+
+    # User explicitly configured gpt-5.3-codex in config.yaml
+    monkeypatch.setitem(cli.CLI_CONFIG, "model", {
+        "default": "gpt-5.3-codex",
+        "provider": "openai-codex",
+        "base_url": "https://chatgpt.com/backend-api/codex",
+    })
+
+    def _runtime_resolve(**kwargs):
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "fake-key",
+            "source": "env/config",
+        }
+
+    monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+    # API returns a DIFFERENT model than what the user configured
+    monkeypatch.setattr(
+        "hermes_cli.codex_models.get_codex_model_ids",
+        lambda access_token=None: ["gpt-5.4", "gpt-5.3-codex"],
+    )
+
+    shell = cli.HermesCLI(compact=True, max_turns=1)
+
+    # Config model is NOT the global default — user made a deliberate choice
+    assert shell._model_is_default is False
+    assert shell._ensure_runtime_credentials() is True
+    assert shell.provider == "openai-codex"
+    # Model must stay as user configured, not replaced by gpt-5.4
+    assert shell.model == "gpt-5.3-codex"
+
+
 def test_codex_provider_preserves_explicit_codex_model(monkeypatch):
     """If the user explicitly passes a Codex-compatible model, it must be
     preserved even when the provider resolves to openai-codex."""

From ace2cc62575b39c58abffd2bf1d46e0e86114bd1 Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 02:54:18 -0700
Subject: [PATCH 07/25] fix(gateway): PID-based wait with force-kill for
 gateway restart

Add _wait_for_gateway_exit() that polls get_running_pid() to confirm
the old gateway process has actually exited before starting a new one.
If the process doesn't exit within 5s, sends SIGKILL to the specific
PID. Uses the saved PID from gateway.pid (not launchd labels) so it
works correctly with multiple gateway instances under separate
HERMES_HOME directories.

Applied to both launchd_restart() and the manual restart path (replaces
the blind time.sleep(2)).

Inspired by PR #1881 by @AzothZephyr (race condition diagnosis).
Adds 4 tests.
---
 hermes_cli/gateway.py            | 48 ++++++++++++++++--
 tests/hermes_cli/test_gateway.py | 83 ++++++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+), 4 deletions(-)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 1c44e311..a7876bc4 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -849,6 +849,46 @@ def launchd_stop():
     subprocess.run(["launchctl", "stop", "ai.hermes.gateway"], check=True)
     print("✓ Service stopped")
 
+def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0):
+    """Wait for the gateway process (by saved PID) to exit.
+
+    Uses the PID from the gateway.pid file — not launchd labels — so this
+    works correctly when multiple gateway instances run under separate
+    HERMES_HOME directories.
+
+    Args:
+        timeout: Total seconds to wait before giving up.
+        force_after: Seconds of graceful waiting before sending SIGKILL.
+    """
+    import time
+    from gateway.status import get_running_pid
+
+    deadline = time.monotonic() + timeout
+    force_deadline = time.monotonic() + force_after
+    force_sent = False
+
+    while time.monotonic() < deadline:
+        pid = get_running_pid()
+        if pid is None:
+            return  # Process exited cleanly.
+
+        if not force_sent and time.monotonic() >= force_deadline:
+            # Grace period expired — force-kill the specific PID.
+            try:
+                os.kill(pid, signal.SIGKILL)
+                print(f"⚠ Gateway PID {pid} did not exit gracefully; sent SIGKILL")
+            except (ProcessLookupError, PermissionError):
+                return  # Already gone or we can't touch it.
+            force_sent = True
+
+        time.sleep(0.3)
+
+    # Timed out even after SIGKILL.
+    remaining_pid = get_running_pid()
+    if remaining_pid is not None:
+        print(f"⚠ Gateway PID {remaining_pid} still running after {timeout}s — restart may fail")
+
+
 def launchd_restart():
     try:
         launchd_stop()
@@ -856,6 +896,7 @@ def launchd_restart():
         if e.returncode != 3:
             raise
         print("↻ launchd job was unloaded; skipping stop")
+    _wait_for_gateway_exit()
     launchd_start()
 
 def launchd_status(deep: bool = False):
@@ -1753,10 +1794,9 @@ def gateway_command(args):
             killed = kill_gateway_processes()
             if killed:
                 print(f"✓ Stopped {killed} gateway process(es)")
-            
-            import time
-            time.sleep(2)
-            
+
+            _wait_for_gateway_exit(timeout=10.0, force_after=5.0)
+
             # Start fresh
             print("Starting gateway...")
             run_gateway(verbose=False)
diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py
index 52d43fd0..b92f385e 100644
--- a/tests/hermes_cli/test_gateway.py
+++ b/tests/hermes_cli/test_gateway.py
@@ -1,6 +1,8 @@
 """Tests for hermes_cli.gateway."""
 
+import signal
 from types import SimpleNamespace
+from unittest.mock import patch, call
 
 import hermes_cli.gateway as gateway
 
@@ -169,3 +171,84 @@ def test_install_linux_gateway_from_setup_system_choice_as_root_installs(monkeyp
 
     assert (scope, did_install) == ("system", True)
     assert calls == [(True, True, "alice")]
+
+
+# ---------------------------------------------------------------------------
+# _wait_for_gateway_exit
+# ---------------------------------------------------------------------------
+
+
+class TestWaitForGatewayExit:
+    """PID-based wait with force-kill on timeout."""
+
+    def test_returns_immediately_when_no_pid(self, monkeypatch):
+        """If get_running_pid returns None, exit instantly."""
+        monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
+        # Should return without sleeping at all.
+        gateway._wait_for_gateway_exit(timeout=1.0, force_after=0.5)
+
+    def test_returns_when_process_exits_gracefully(self, monkeypatch):
+        """Process exits after a couple of polls — no SIGKILL needed."""
+        poll_count = 0
+
+        def mock_get_running_pid():
+            nonlocal poll_count
+            poll_count += 1
+            return 12345 if poll_count <= 2 else None
+
+        monkeypatch.setattr("gateway.status.get_running_pid", mock_get_running_pid)
+        monkeypatch.setattr("time.sleep", lambda _: None)
+
+        gateway._wait_for_gateway_exit(timeout=10.0, force_after=999.0)
+        # Should have polled until None was returned.
+        assert poll_count == 3
+
+    def test_force_kills_after_grace_period(self, monkeypatch):
+        """When the process doesn't exit, SIGKILL the saved PID."""
+        import time as _time
+
+        # Simulate monotonic time advancing past force_after
+        call_num = 0
+        def fake_monotonic():
+            nonlocal call_num
+            call_num += 1
+            # First two calls: initial deadline + force_deadline setup (time 0)
+            # Then each loop iteration advances time
+            return call_num * 2.0  # 2, 4, 6, 8, ...
+
+        kills = []
+        def mock_kill(pid, sig):
+            kills.append((pid, sig))
+
+        # get_running_pid returns the PID until kill is sent, then None
+        def mock_get_running_pid():
+            return None if kills else 42
+
+        monkeypatch.setattr("time.monotonic", fake_monotonic)
+        monkeypatch.setattr("time.sleep", lambda _: None)
+        monkeypatch.setattr("gateway.status.get_running_pid", mock_get_running_pid)
+        monkeypatch.setattr("os.kill", mock_kill)
+
+        gateway._wait_for_gateway_exit(timeout=10.0, force_after=5.0)
+        assert (42, signal.SIGKILL) in kills
+
+    def test_handles_process_already_gone_on_kill(self, monkeypatch):
+        """ProcessLookupError during SIGKILL is not fatal."""
+        import time as _time
+
+        call_num = 0
+        def fake_monotonic():
+            nonlocal call_num
+            call_num += 1
+            return call_num * 3.0  # Jump past force_after quickly
+
+        def mock_kill(pid, sig):
+            raise ProcessLookupError
+
+        monkeypatch.setattr("time.monotonic", fake_monotonic)
+        monkeypatch.setattr("time.sleep", lambda _: None)
+        monkeypatch.setattr("gateway.status.get_running_pid", lambda: 99)
+        monkeypatch.setattr("os.kill", mock_kill)
+
+        # Should not raise — ProcessLookupError means it's already gone.
+        gateway._wait_for_gateway_exit(timeout=10.0, force_after=2.0)

From 11f029c311d57dbee37ca94cf45bfb212f04b13e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 02:55:30 -0700
Subject: [PATCH 08/25] fix(tts): document NeuTTS provider and align install
 guidance (#1903)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: charles-édouard <59705750+ccbbccbb@users.noreply.github.com>
---
 hermes_cli/setup.py                           |  2 +-
 tools/neutts_synth.py                         |  4 +--
 tools/tts_tool.py                             |  4 +--
 .../docs/guides/use-voice-mode-with-hermes.md | 32 +++++++++++++++++
 website/docs/user-guide/configuration.md      |  7 +++-
 website/docs/user-guide/features/tts.md       | 13 +++++--
 .../docs/user-guide/features/voice-mode.md    | 34 ++++++++++++++-----
 7 files changed, 79 insertions(+), 17 deletions(-)

diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 46c7eea9..dd06279f 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -1710,7 +1710,7 @@ def _install_neutts_deps() -> bool:
         return True
     except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
         print_error(f"Failed to install neutts: {e}")
-        print_info("Try manually: pip install neutts[all]")
+        print_info("Try manually: python -m pip install -U neutts[all]")
         return False
 
 
diff --git a/tools/neutts_synth.py b/tools/neutts_synth.py
index b1a91451..ee2c84b2 100644
--- a/tools/neutts_synth.py
+++ b/tools/neutts_synth.py
@@ -8,7 +8,7 @@ Usage:
     python -m tools.neutts_synth --text "Hello" --out output.wav \
         --ref-audio samples/jo.wav --ref-text samples/jo.txt
 
-Requires: pip install neutts[all]
+Requires: python -m pip install -U neutts[all]
 System:   apt install espeak-ng  (or brew install espeak-ng)
 """
 
@@ -75,7 +75,7 @@ def main():
     try:
         from neutts import NeuTTS
     except ImportError:
-        print("Error: neutts not installed. Run: pip install neutts[all]", file=sys.stderr)
+        print("Error: neutts not installed. Run: python -m pip install -U neutts[all]", file=sys.stderr)
         sys.exit(1)
 
     tts = NeuTTS(
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 66911371..e05d1efe 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -423,8 +423,8 @@ def text_to_speech_tool(
             if not _check_neutts_available():
                 return json.dumps({
                     "success": False,
-                    "error": "NeuTTS provider selected but neutts_cli is not installed. "
-                             "Install the NeuTTS skill and run the bootstrap helper first."
+                    "error": "NeuTTS provider selected but neutts is not installed. "
+                             "Run hermes setup and choose NeuTTS, or install espeak-ng and run python -m pip install -U neutts[all]."
                 }, ensure_ascii=False)
             logger.info("Generating speech with NeuTTS (local)...")
             _generate_neutts(text, file_str, tts_config)
diff --git a/website/docs/guides/use-voice-mode-with-hermes.md b/website/docs/guides/use-voice-mode-with-hermes.md
index dc35dcc6..fe38b837 100644
--- a/website/docs/guides/use-voice-mode-with-hermes.md
+++ b/website/docs/guides/use-voice-mode-with-hermes.md
@@ -72,6 +72,12 @@ pip install hermes-agent[messaging]
 pip install hermes-agent[tts-premium]
 ```
 
+### Local NeuTTS (optional)
+
+```bash
+python -m pip install -U neutts[all]
+```
+
 ### Everything
 
 ```bash
@@ -84,18 +90,21 @@ pip install hermes-agent[all]
 
 ```bash
 brew install portaudio ffmpeg opus
+brew install espeak-ng
 ```
 
 ### Ubuntu / Debian
 
 ```bash
 sudo apt install portaudio19-dev ffmpeg libopus0
+sudo apt install espeak-ng
 ```
 
 Why these matter:
 - `portaudio` → microphone input / playback for CLI voice mode
 - `ffmpeg` → audio conversion for TTS and messaging delivery
 - `opus` → Discord voice codec support
+- `espeak-ng` → phonemizer backend for NeuTTS
 
 ## Step 4: choose STT and TTS providers
 
@@ -133,9 +142,20 @@ ELEVENLABS_API_KEY=***
 #### Text-to-speech
 
 - `edge` → free and good enough for most users
+- `neutts` → free local/on-device TTS
 - `elevenlabs` → best quality
 - `openai` → good middle ground
 
+### If you use `hermes setup`
+
+If you choose NeuTTS in the setup wizard, Hermes checks whether `neutts` is already installed. If it is missing, the wizard tells you NeuTTS needs the Python package `neutts` and the system package `espeak-ng`, offers to install them for you, installs `espeak-ng` with your platform package manager, and then runs:
+
+```bash
+python -m pip install -U neutts[all]
+```
+
+If you skip that install or it fails, the wizard falls back to Edge TTS.
+
 ## Step 5: recommended config
 
 ```yaml
@@ -159,6 +179,18 @@ tts:
 
 This is a good conservative default for most people.
 
+If you want local TTS instead, switch the `tts` block to:
+
+```yaml
+tts:
+  provider: "neutts"
+  neutts:
+    ref_audio: ''
+    ref_text: ''
+    model: neuphonic/neutts-air-q4-gguf
+    device: cpu
+```
+
 ## Use case 1: CLI voice mode
 
 ## Turn it on
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 878982b2..aa770c9e 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -929,7 +929,7 @@ You can also change the reasoning effort at runtime with the `/reasoning` comman
 
 ```yaml
 tts:
-  provider: "edge"              # "edge" | "elevenlabs" | "openai"
+  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "neutts"
   edge:
     voice: "en-US-AriaNeural"   # 322 voices, 74 languages
   elevenlabs:
@@ -938,6 +938,11 @@ tts:
   openai:
     model: "gpt-4o-mini-tts"
     voice: "alloy"              # alloy, echo, fable, onyx, nova, shimmer
+  neutts:
+    ref_audio: ''
+    ref_text: ''
+    model: neuphonic/neutts-air-q4-gguf
+    device: cpu
 ```
 
 This controls both the `text_to_speech` tool and spoken replies in voice mode (`/voice tts` in the CLI or messaging gateway).
diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md
index c6ba365a..930a1bbf 100644
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@@ -10,13 +10,14 @@ Hermes Agent supports both text-to-speech output and voice message transcription
 
 ## Text-to-Speech
 
-Convert text to speech with three providers:
+Convert text to speech with four providers:
 
 | Provider | Quality | Cost | API Key |
 |----------|---------|------|---------|
 | **Edge TTS** (default) | Good | Free | None needed |
 | **ElevenLabs** | Excellent | Paid | `ELEVENLABS_API_KEY` |
 | **OpenAI TTS** | Good | Paid | `VOICE_TOOLS_OPENAI_KEY` |
+| **NeuTTS** | Good | Free | None needed |
 
 ### Platform Delivery
 
@@ -32,7 +33,7 @@ Convert text to speech with three providers:
 ```yaml
 # In ~/.hermes/config.yaml
 tts:
-  provider: "edge"              # "edge" | "elevenlabs" | "openai"
+  provider: "edge"              # "edge" | "elevenlabs" | "openai" | "neutts"
   edge:
     voice: "en-US-AriaNeural"   # 322 voices, 74 languages
   elevenlabs:
@@ -41,6 +42,11 @@ tts:
   openai:
     model: "gpt-4o-mini-tts"
     voice: "alloy"              # alloy, echo, fable, onyx, nova, shimmer
+  neutts:
+    ref_audio: ''
+    ref_text: ''
+    model: neuphonic/neutts-air-q4-gguf
+    device: cpu
 ```
 
 ### Telegram Voice Bubbles & ffmpeg
@@ -49,6 +55,7 @@ Telegram voice bubbles require Opus/OGG audio format:
 
 - **OpenAI and ElevenLabs** produce Opus natively — no extra setup
 - **Edge TTS** (default) outputs MP3 and needs **ffmpeg** to convert:
+- **NeuTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles
 
 ```bash
 # Ubuntu/Debian
@@ -61,7 +68,7 @@ brew install ffmpeg
 sudo dnf install ffmpeg
 ```
 
-Without ffmpeg, Edge TTS audio is sent as a regular audio file (playable, but shows as a rectangular player instead of a voice bubble).
+Without ffmpeg, Edge TTS and NeuTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble).
 
 :::tip
 If you want voice bubbles without installing ffmpeg, switch to the OpenAI or ElevenLabs provider.
diff --git a/website/docs/user-guide/features/voice-mode.md b/website/docs/user-guide/features/voice-mode.md
index 3dfe0db4..b0f20355 100644
--- a/website/docs/user-guide/features/voice-mode.md
+++ b/website/docs/user-guide/features/voice-mode.md
@@ -44,6 +44,9 @@ pip install hermes-agent[messaging]
 # Premium TTS (ElevenLabs)
 pip install hermes-agent[tts-premium]
 
+# Local TTS (NeuTTS, optional)
+python -m pip install -U neutts[all]
+
 # Everything at once
 pip install hermes-agent[all]
 ```
@@ -54,6 +57,8 @@ pip install hermes-agent[all]
 | `messaging` | `discord.py[voice]`, `python-telegram-bot`, `aiohttp` | Discord & Telegram bots |
 | `tts-premium` | `elevenlabs` | ElevenLabs TTS provider |
 
+Optional local TTS provider: install `neutts` separately with `python -m pip install -U neutts[all]`. On first use it downloads the model automatically.
+
 :::info
 `discord.py[voice]` installs **PyNaCl** (for voice encryption) and **opus bindings** automatically. This is required for Discord voice channel support.
 :::
@@ -63,9 +68,11 @@ pip install hermes-agent[all]
 ```bash
 # macOS
 brew install portaudio ffmpeg opus
+brew install espeak-ng   # for NeuTTS
 
 # Ubuntu/Debian
 sudo apt install portaudio19-dev ffmpeg libopus0
+sudo apt install espeak-ng   # for NeuTTS
 ```
 
 | Dependency | Purpose | Required For |
@@ -73,6 +80,7 @@ sudo apt install portaudio19-dev ffmpeg libopus0
 | **PortAudio** | Microphone input and audio playback | CLI voice mode |
 | **ffmpeg** | Audio format conversion (MP3 → Opus, PCM → WAV) | All platforms |
 | **Opus** | Discord voice codec | Discord voice channels |
+| **espeak-ng** | Phonemizer backend | Local NeuTTS provider |
 
 ### API Keys
 
@@ -84,8 +92,9 @@ Add to `~/.hermes/.env`:
 GROQ_API_KEY=your-key                 # Groq Whisper — fast, free tier (cloud)
 VOICE_TOOLS_OPENAI_KEY=your-key       # OpenAI Whisper — paid (cloud)
 
-# Text-to-Speech (optional — Edge TTS works without any key)
-ELEVENLABS_API_KEY=your-key           # ElevenLabs — premium quality
+# Text-to-Speech (optional — Edge TTS and NeuTTS work without any key)
+ELEVENLABS_API_KEY=***           # ElevenLabs — premium quality
+# VOICE_TOOLS_OPENAI_KEY above also enables OpenAI TTS
 ```
 
 :::tip
@@ -303,8 +312,9 @@ DISCORD_ALLOWED_USERS=your-user-id
 # STT — local provider needs no key (pip install faster-whisper)
 # GROQ_API_KEY=your-key            # Alternative: cloud-based, fast, free tier
 
-# TTS — optional, Edge TTS (free) is the default
-# ELEVENLABS_API_KEY=your-key      # Premium quality
+# TTS — optional. Edge TTS and NeuTTS need no key.
+# ELEVENLABS_API_KEY=***      # Premium quality
+# VOICE_TOOLS_OPENAI_KEY=***  # OpenAI TTS / Whisper
 ```
 
 ### Start the Gateway
@@ -385,7 +395,7 @@ stt:
 
 # Text-to-Speech
 tts:
-  provider: "edge"                 # "edge" (free) | "elevenlabs" | "openai"
+  provider: "edge"                 # "edge" (free) | "elevenlabs" | "openai" | "neutts"
   edge:
     voice: "en-US-AriaNeural"      # 322 voices, 74 languages
   elevenlabs:
@@ -394,6 +404,11 @@ tts:
   openai:
     model: "gpt-4o-mini-tts"
     voice: "alloy"                 # alloy, echo, fable, onyx, nova, shimmer
+  neutts:
+    ref_audio: ''
+    ref_text: ''
+    model: neuphonic/neutts-air-q4-gguf
+    device: cpu
 ```
 
 ### Environment Variables
@@ -410,9 +425,9 @@ STT_OPENAI_MODEL=whisper-1               # Override default OpenAI STT model
 GROQ_BASE_URL=https://api.groq.com/openai/v1     # Custom Groq endpoint
 STT_OPENAI_BASE_URL=https://api.openai.com/v1    # Custom OpenAI STT endpoint
 
-# Text-to-Speech providers (Edge TTS needs no key)
-ELEVENLABS_API_KEY=...             # ElevenLabs (premium quality)
-# OpenAI TTS uses VOICE_TOOLS_OPENAI_KEY
+# Text-to-Speech providers (Edge TTS and NeuTTS need no key)
+ELEVENLABS_API_KEY=***             # ElevenLabs (premium quality)
+# VOICE_TOOLS_OPENAI_KEY above also enables OpenAI TTS
 
 # Discord voice channel
 DISCORD_BOT_TOKEN=...
@@ -440,6 +455,9 @@ Provider priority (automatic fallback): **local** > **groq** > **openai**
 | **Edge TTS** | Good | Free | ~1s | No |
 | **ElevenLabs** | Excellent | Paid | ~2s | Yes |
 | **OpenAI TTS** | Good | Paid | ~1.5s | Yes |
+| **NeuTTS** | Good | Free | Depends on CPU/GPU | No |
+
+NeuTTS uses the `tts.neutts` config block above.
 
 ---
 

From 9c0f3462581ffa5fa875d03dd54cdadcd281ae3a Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 03:01:41 -0700
Subject: [PATCH 09/25] fix: direct user message on STT failure +
 hermes-agent-setup skill

When a user sends a voice message and STT isn't configured, the gateway
now sends a clear message directly to the user explaining how to set up
voice transcription, rather than relying on the agent to relay an
injected context note (which often gets misinterpreted).

Also adds a hermes-agent-setup bundled skill covering STT/TTS setup,
tool configuration, dependency installation, and troubleshooting.
---
 gateway/run.py                             |  27 ++++
 skills/dogfood/hermes-agent-setup/SKILL.md | 154 +++++++++++++++++++++
 2 files changed, 181 insertions(+)
 create mode 100644 skills/dogfood/hermes-agent-setup/SKILL.md

diff --git a/gateway/run.py b/gateway/run.py
index ea9f2a28..668977ef 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1870,6 +1870,33 @@ class GatewayRunner:
                 message_text = await self._enrich_message_with_transcription(
                     message_text, audio_paths
                 )
+                # If STT failed, send a direct message to the user so they
+                # know voice isn't configured — don't rely on the agent to
+                # relay the error clearly.
+                _stt_fail_markers = (
+                    "No STT provider",
+                    "STT is disabled",
+                    "can't listen",
+                    "VOICE_TOOLS_OPENAI_KEY",
+                )
+                if any(m in message_text for m in _stt_fail_markers):
+                    _stt_adapter = self.adapters.get(source.platform)
+                    _stt_meta = {"thread_id": source.thread_id} if source.thread_id else None
+                    if _stt_adapter:
+                        try:
+                            await _stt_adapter.send(
+                                source.chat_id,
+                                "🎤 I received your voice message but can't transcribe it — "
+                                "no speech-to-text provider is configured.\n\n"
+                                "To enable voice: install faster-whisper "
+                                "(`pip install faster-whisper` in the Hermes venv) "
+                                "and set `stt.enabled: true` in config.yaml, "
+                                "then /restart the gateway.\n\n"
+                                "For full setup instructions, type: `/skill hermes-agent-setup`",
+                                metadata=_stt_meta,
+                            )
+                        except Exception:
+                            pass
 
         # -----------------------------------------------------------------
         # Enrich document messages with context notes for the agent
diff --git a/skills/dogfood/hermes-agent-setup/SKILL.md b/skills/dogfood/hermes-agent-setup/SKILL.md
new file mode 100644
index 00000000..275c0686
--- /dev/null
+++ b/skills/dogfood/hermes-agent-setup/SKILL.md
@@ -0,0 +1,154 @@
+---
+name: hermes-agent-setup
+description: Help users configure Hermes Agent — enable tools, set up voice/STT/TTS, install dependencies, and troubleshoot. Use when someone asks to enable features, configure voice, or when the system detects missing config.
+version: 1.0.0
+author: Hermes Agent
+tags: [setup, configuration, tools, stt, tts, voice, hermes]
+---
+
+# Hermes Agent Setup & Configuration
+
+Use this skill when a user asks to enable features, configure voice messages, set up tools, or troubleshoot configuration.
+
+## Key Paths
+
+- Config: `~/.hermes/config.yaml`
+- API keys: `~/.hermes/.env`
+- Skills: `~/.hermes/skills/`
+- Hermes install: `~/.hermes/hermes-agent/`
+
+## Voice Messages (STT)
+
+Voice messages from Telegram/Discord/WhatsApp/Slack/Signal are auto-transcribed when an STT provider is available.
+
+### Provider priority (auto-detected):
+1. **Local faster-whisper** — free, no API key, runs on CPU/GPU
+2. **Groq Whisper** — free tier, needs GROQ_API_KEY
+3. **OpenAI Whisper** — paid, needs VOICE_TOOLS_OPENAI_KEY
+
+### Setup local STT (recommended):
+
+```bash
+cd ~/.hermes/hermes-agent
+source .venv/bin/activate  # or: source venv/bin/activate
+pip install faster-whisper
+```
+
+Add to config.yaml under the `stt:` section:
+```yaml
+stt:
+  enabled: true
+  provider: local
+  local:
+    model: base  # Options: tiny, base, small, medium, large-v3
+```
+
+Model downloads automatically on first use (~150 MB for base).
+
+### Setup Groq STT (free cloud):
+
+1. Get free key from https://console.groq.com
+2. Add GROQ_API_KEY to the env file
+3. Set provider to groq in config.yaml stt section
+
+### Verify STT:
+
+After config changes, restart the gateway (send /restart in chat, or restart `hermes gateway run`). Then send a voice message.
+
+## Voice Replies (TTS)
+
+Hermes can reply with voice when users send voice messages.
+
+### TTS providers (set API key in env file):
+
+| Provider | Env var | Free? |
+|----------|---------|-------|
+| ElevenLabs | ELEVENLABS_API_KEY | Free tier |
+| OpenAI | VOICE_TOOLS_OPENAI_KEY | Paid |
+| Kokoro (local) | None needed | Free |
+| Fish Audio | FISH_AUDIO_API_KEY | Free tier |
+
+### Voice commands (in any chat):
+- `/voice on` — voice reply to voice messages only
+- `/voice tts` — voice reply to all messages
+- `/voice off` — text only (default)
+
+## Enabling/Disabling Tools
+
+### Interactive tool config (requires terminal):
+
+```bash
+cd ~/.hermes/hermes-agent
+source .venv/bin/activate
+python -m hermes_cli.main tools
+```
+
+This opens a curses UI to enable/disable toolsets per platform.
+
+### After changing tools:
+
+Use `/reset` in the chat to start a fresh session with the new toolset. Tool changes do NOT take effect mid-conversation (this preserves prompt caching).
+
+### Common toolsets:
+
+| Toolset | What it provides |
+|---------|-----------------|
+| terminal | Shell command execution |
+| file | File read/write/search/patch |
+| web | Web search and extraction |
+| browser | Browser automation |
+| image_gen | AI image generation |
+| mcp | MCP server connections |
+| voice | Text-to-speech |
+| cronjob | Scheduled tasks |
+
+## Installing Dependencies
+
+Some tools need extra packages:
+
+```bash
+cd ~/.hermes/hermes-agent && source .venv/bin/activate
+
+pip install faster-whisper    # Local STT
+pip install browserbase       # Browser automation
+pip install mcp               # MCP servers
+```
+
+## Setup Wizard
+
+For first-time setup or full reconfiguration:
+
+```bash
+cd ~/.hermes/hermes-agent
+source .venv/bin/activate
+python -m hermes_cli.main setup
+```
+
+## Gateway Commands
+
+| Command | What it does |
+|---------|-------------|
+| /reset or /new | Fresh session (picks up new tool config) |
+| /help | Show all commands |
+| /model [name] | Show or change model |
+| /compact | Compress conversation to save context |
+| /voice [mode] | Configure voice replies |
+| /reasoning [effort] | Set reasoning level |
+| /sethome | Set home channel for cron/notifications |
+| /restart | Restart the gateway (picks up config changes) |
+
+## Troubleshooting
+
+### Voice messages not working
+1. Check stt.enabled is true in config.yaml
+2. Check a provider is available (faster-whisper installed, or API key set)
+3. Restart gateway after config changes
+
+### Tool not available
+1. Check if the toolset is enabled for your platform (run `hermes tools`)
+2. Some tools need env vars — check the env file
+3. Use /reset after enabling tools
+
+### Changes not taking effect
+- Gateway: /reset for tool changes, /restart for config changes
+- CLI: start a new session

From a2440f72f63a1412c1254e2a1eba168b33abe5b1 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 03:04:07 -0700
Subject: [PATCH 10/25] feat: use endpoint metadata for custom model context
 and pricing (#1906)

* perf: cache base_url.lower() via property, consolidate triple load_config(), hoist set constant

run_agent.py:
- Add base_url property that auto-caches _base_url_lower on every
  assignment, eliminating 12+ redundant .lower() calls per API cycle
  across __init__, _build_api_kwargs, _supports_reasoning_extra_body,
  and the main conversation loop
- Consolidate three separate load_config() disk reads in __init__
  (memory, skills, compression) into a single call, reusing the
  result dict for all three config sections

model_tools.py:
- Hoist _READ_SEARCH_TOOLS set to module level (was rebuilt inside
  handle_function_call on every tool invocation)

* Use endpoint metadata for custom model context and pricing

---------

Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
---
 agent/context_compressor.py        |   4 +-
 agent/model_metadata.py            | 227 +++++++++++++++++++++++++++--
 agent/usage_pricing.py             |  45 +++++-
 model_tools.py                     |   2 +-
 run_agent.py                       |  73 ++++++----
 tests/agent/test_model_metadata.py |  49 +++++++
 tests/agent/test_usage_pricing.py  |  24 +++
 7 files changed, 375 insertions(+), 49 deletions(-)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 8ff43da5..11b5c5b8 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -45,16 +45,18 @@ class ContextCompressor:
         quiet_mode: bool = False,
         summary_model_override: str = None,
         base_url: str = "",
+        api_key: str = "",
     ):
         self.model = model
         self.base_url = base_url
+        self.api_key = api_key
         self.threshold_percent = threshold_percent
         self.protect_first_n = protect_first_n
         self.protect_last_n = protect_last_n
         self.summary_target_tokens = summary_target_tokens
         self.quiet_mode = quiet_mode
 
-        self.context_length = get_model_context_length(model, base_url=base_url)
+        self.context_length = get_model_context_length(model, base_url=base_url, api_key=api_key)
         self.threshold_tokens = int(self.context_length * threshold_percent)
         self.compression_count = 0
         self._context_probed = False  # True after a step-down from context error
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index fb0d3846..8283e8d3 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -10,6 +10,7 @@ import re
 import time
 from pathlib import Path
 from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
 
 import requests
 import yaml
@@ -21,6 +22,9 @@ logger = logging.getLogger(__name__)
 _model_metadata_cache: Dict[str, Dict[str, Any]] = {}
 _model_metadata_cache_time: float = 0
 _MODEL_CACHE_TTL = 3600
+_endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
+_endpoint_model_metadata_cache_time: Dict[str, float] = {}
+_ENDPOINT_MODEL_CACHE_TTL = 300
 
 # Descending tiers for context length probing when the model is unknown.
 # We start high and step down on context-length errors until one works.
@@ -123,6 +127,128 @@ DEFAULT_CONTEXT_LENGTHS = {
     "qwen-vl-max": 32768,
 }
 
+_CONTEXT_LENGTH_KEYS = (
+    "context_length",
+    "context_window",
+    "max_context_length",
+    "max_position_embeddings",
+    "max_model_len",
+    "max_input_tokens",
+    "max_sequence_length",
+    "max_seq_len",
+)
+
+_MAX_COMPLETION_KEYS = (
+    "max_completion_tokens",
+    "max_output_tokens",
+    "max_tokens",
+)
+
+
+def _normalize_base_url(base_url: str) -> str:
+    return (base_url or "").strip().rstrip("/")
+
+
+def _is_openrouter_base_url(base_url: str) -> bool:
+    return "openrouter.ai" in _normalize_base_url(base_url).lower()
+
+
+def _is_custom_endpoint(base_url: str) -> bool:
+    normalized = _normalize_base_url(base_url)
+    return bool(normalized) and not _is_openrouter_base_url(normalized)
+
+
+def _is_known_provider_base_url(base_url: str) -> bool:
+    normalized = _normalize_base_url(base_url)
+    if not normalized:
+        return False
+    parsed = urlparse(normalized if "://" in normalized else f"https://{normalized}")
+    host = parsed.netloc.lower() or parsed.path.lower()
+    known_hosts = (
+        "api.openai.com",
+        "chatgpt.com",
+        "api.anthropic.com",
+        "api.z.ai",
+        "api.moonshot.ai",
+        "api.kimi.com",
+        "api.minimax",
+    )
+    return any(known_host in host for known_host in known_hosts)
+
+
+def _iter_nested_dicts(value: Any):
+    if isinstance(value, dict):
+        yield value
+        for nested in value.values():
+            yield from _iter_nested_dicts(nested)
+    elif isinstance(value, list):
+        for item in value:
+            yield from _iter_nested_dicts(item)
+
+
+def _coerce_reasonable_int(value: Any, minimum: int = 1024, maximum: int = 10_000_000) -> Optional[int]:
+    try:
+        if isinstance(value, bool):
+            return None
+        if isinstance(value, str):
+            value = value.strip().replace(",", "")
+        result = int(value)
+    except (TypeError, ValueError):
+        return None
+    if minimum <= result <= maximum:
+        return result
+    return None
+
+
+def _extract_first_int(payload: Dict[str, Any], keys: tuple[str, ...]) -> Optional[int]:
+    keyset = {key.lower() for key in keys}
+    for mapping in _iter_nested_dicts(payload):
+        for key, value in mapping.items():
+            if str(key).lower() not in keyset:
+                continue
+            coerced = _coerce_reasonable_int(value)
+            if coerced is not None:
+                return coerced
+    return None
+
+
+def _extract_context_length(payload: Dict[str, Any]) -> Optional[int]:
+    return _extract_first_int(payload, _CONTEXT_LENGTH_KEYS)
+
+
+def _extract_max_completion_tokens(payload: Dict[str, Any]) -> Optional[int]:
+    return _extract_first_int(payload, _MAX_COMPLETION_KEYS)
+
+
+def _extract_pricing(payload: Dict[str, Any]) -> Dict[str, Any]:
+    alias_map = {
+        "prompt": ("prompt", "input", "input_cost_per_token", "prompt_token_cost"),
+        "completion": ("completion", "output", "output_cost_per_token", "completion_token_cost"),
+        "request": ("request", "request_cost"),
+        "cache_read": ("cache_read", "cached_prompt", "input_cache_read", "cache_read_cost_per_token"),
+        "cache_write": ("cache_write", "cache_creation", "input_cache_write", "cache_write_cost_per_token"),
+    }
+    for mapping in _iter_nested_dicts(payload):
+        normalized = {str(key).lower(): value for key, value in mapping.items()}
+        if not any(any(alias in normalized for alias in aliases) for aliases in alias_map.values()):
+            continue
+        pricing: Dict[str, Any] = {}
+        for target, aliases in alias_map.items():
+            for alias in aliases:
+                if alias in normalized and normalized[alias] not in (None, ""):
+                    pricing[target] = normalized[alias]
+                    break
+        if pricing:
+            return pricing
+    return {}
+
+
+def _add_model_aliases(cache: Dict[str, Dict[str, Any]], model_id: str, entry: Dict[str, Any]) -> None:
+    cache[model_id] = entry
+    if "/" in model_id:
+        bare_model = model_id.split("/", 1)[1]
+        cache.setdefault(bare_model, entry)
+
 
 def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any]]:
     """Fetch model metadata from OpenRouter (cached for 1 hour)."""
@@ -139,15 +265,16 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
         cache = {}
         for model in data.get("data", []):
             model_id = model.get("id", "")
-            cache[model_id] = {
+            entry = {
                 "context_length": model.get("context_length", 128000),
                 "max_completion_tokens": model.get("top_provider", {}).get("max_completion_tokens", 4096),
                 "name": model.get("name", model_id),
                 "pricing": model.get("pricing", {}),
             }
+            _add_model_aliases(cache, model_id, entry)
             canonical = model.get("canonical_slug", "")
             if canonical and canonical != model_id:
-                cache[canonical] = cache[model_id]
+                _add_model_aliases(cache, canonical, entry)
 
         _model_metadata_cache = cache
         _model_metadata_cache_time = time.time()
@@ -159,6 +286,75 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
         return _model_metadata_cache or {}
 
 
+def fetch_endpoint_model_metadata(
+    base_url: str,
+    api_key: str = "",
+    force_refresh: bool = False,
+) -> Dict[str, Dict[str, Any]]:
+    """Fetch model metadata from an OpenAI-compatible ``/models`` endpoint.
+
+    This is used for explicit custom endpoints where hardcoded global model-name
+    defaults are unreliable. Results are cached in memory per base URL.
+    """
+    normalized = _normalize_base_url(base_url)
+    if not normalized or _is_openrouter_base_url(normalized):
+        return {}
+
+    if not force_refresh:
+        cached = _endpoint_model_metadata_cache.get(normalized)
+        cached_at = _endpoint_model_metadata_cache_time.get(normalized, 0)
+        if cached is not None and (time.time() - cached_at) < _ENDPOINT_MODEL_CACHE_TTL:
+            return cached
+
+    candidates = [normalized]
+    if normalized.endswith("/v1"):
+        alternate = normalized[:-3].rstrip("/")
+    else:
+        alternate = normalized + "/v1"
+    if alternate and alternate not in candidates:
+        candidates.append(alternate)
+
+    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
+    last_error: Optional[Exception] = None
+
+    for candidate in candidates:
+        url = candidate.rstrip("/") + "/models"
+        try:
+            response = requests.get(url, headers=headers, timeout=10)
+            response.raise_for_status()
+            payload = response.json()
+            cache: Dict[str, Dict[str, Any]] = {}
+            for model in payload.get("data", []):
+                if not isinstance(model, dict):
+                    continue
+                model_id = model.get("id")
+                if not model_id:
+                    continue
+                entry: Dict[str, Any] = {"name": model.get("name", model_id)}
+                context_length = _extract_context_length(model)
+                if context_length is not None:
+                    entry["context_length"] = context_length
+                max_completion_tokens = _extract_max_completion_tokens(model)
+                if max_completion_tokens is not None:
+                    entry["max_completion_tokens"] = max_completion_tokens
+                pricing = _extract_pricing(model)
+                if pricing:
+                    entry["pricing"] = pricing
+                _add_model_aliases(cache, model_id, entry)
+
+            _endpoint_model_metadata_cache[normalized] = cache
+            _endpoint_model_metadata_cache_time[normalized] = time.time()
+            return cache
+        except Exception as exc:
+            last_error = exc
+
+    if last_error:
+        logger.debug("Failed to fetch model metadata from %s/models: %s", normalized, last_error)
+    _endpoint_model_metadata_cache[normalized] = {}
+    _endpoint_model_metadata_cache_time[normalized] = time.time()
+    return {}
+
+
 def _get_context_cache_path() -> Path:
     """Return path to the persistent context length cache file."""
     hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
@@ -243,14 +439,15 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
     return None
 
 
-def get_model_context_length(model: str, base_url: str = "") -> int:
+def get_model_context_length(model: str, base_url: str = "", api_key: str = "") -> int:
     """Get the context length for a model.
 
     Resolution order:
     1. Persistent cache (previously discovered via probing)
-    2. OpenRouter API metadata
-    3. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match)
-    4. First probe tier (2M) — will be narrowed on first context error
+    2. Active endpoint metadata (/models for explicit custom endpoints)
+    3. OpenRouter API metadata
+    4. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match for hosted routes only)
+    5. First probe tier (2M) — will be narrowed on first context error
     """
     # 1. Check persistent cache (model+provider)
     if base_url:
@@ -258,19 +455,31 @@ def get_model_context_length(model: str, base_url: str = "") -> int:
         if cached is not None:
             return cached
 
-    # 2. OpenRouter API metadata
+    # 2. Active endpoint metadata for explicit custom routes
+    if _is_custom_endpoint(base_url):
+        endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
+        if model in endpoint_metadata:
+            context_length = endpoint_metadata[model].get("context_length")
+            if isinstance(context_length, int):
+                return context_length
+        if not _is_known_provider_base_url(base_url):
+            # Explicit third-party endpoints should not borrow fuzzy global
+            # defaults from unrelated providers with similarly named models.
+            return CONTEXT_PROBE_TIERS[0]
+
+    # 3. OpenRouter API metadata
     metadata = fetch_model_metadata()
     if model in metadata:
         return metadata[model].get("context_length", 128000)
 
-    # 3. Hardcoded defaults (fuzzy match — longest key first for specificity)
+    # 4. Hardcoded defaults (fuzzy match — longest key first for specificity)
     for default_model, length in sorted(
         DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True
     ):
         if default_model in model or model in default_model:
             return length
 
-    # 4. Unknown model — start at highest probe tier
+    # 5. Unknown model — start at highest probe tier
     return CONTEXT_PROBE_TIERS[0]
 
 
diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py
index 29e7df25..81c50026 100644
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -5,7 +5,7 @@ from datetime import datetime, timezone
 from decimal import Decimal
 from typing import Any, Dict, Literal, Optional
 
-from agent.model_metadata import fetch_model_metadata
+from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
 
 DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
 
@@ -335,8 +335,21 @@ def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]
 
 
 def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:
-    metadata = fetch_model_metadata()
-    model_id = route.model
+    return _pricing_entry_from_metadata(
+        fetch_model_metadata(),
+        route.model,
+        source_url="https://openrouter.ai/docs/api/api-reference/models/get-models",
+        pricing_version="openrouter-models-api",
+    )
+
+
+def _pricing_entry_from_metadata(
+    metadata: Dict[str, Dict[str, Any]],
+    model_id: str,
+    *,
+    source_url: str,
+    pricing_version: str,
+) -> Optional[PricingEntry]:
     if model_id not in metadata:
         return None
     pricing = metadata[model_id].get("pricing") or {}
@@ -355,6 +368,7 @@ def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:
     )
     if prompt is None and completion is None and request is None:
         return None
+
     def _per_token_to_per_million(value: Optional[Decimal]) -> Optional[Decimal]:
         if value is None:
             return None
@@ -367,8 +381,8 @@ def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:
         cache_write_cost_per_million=_per_token_to_per_million(cache_write),
         request_cost=request,
         source="provider_models_api",
-        source_url="https://openrouter.ai/docs/api/api-reference/models/get-models",
-        pricing_version="openrouter-models-api",
+        source_url=source_url,
+        pricing_version=pricing_version,
         fetched_at=_UTC_NOW(),
     )
 
@@ -377,6 +391,7 @@ def get_pricing_entry(
     model_name: str,
     provider: Optional[str] = None,
     base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
 ) -> Optional[PricingEntry]:
     route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
     if route.billing_mode == "subscription_included":
@@ -390,6 +405,15 @@ def get_pricing_entry(
         )
     if route.provider == "openrouter":
         return _openrouter_pricing_entry(route)
+    if route.base_url:
+        entry = _pricing_entry_from_metadata(
+            fetch_endpoint_model_metadata(route.base_url, api_key=api_key or ""),
+            route.model,
+            source_url=f"{route.base_url.rstrip('/')}/models",
+            pricing_version="openai-compatible-models-api",
+        )
+        if entry:
+            return entry
     return _lookup_official_docs_pricing(route)
 
 
@@ -460,6 +484,7 @@ def estimate_usage_cost(
     *,
     provider: Optional[str] = None,
     base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
 ) -> CostResult:
     route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
     if route.billing_mode == "subscription_included":
@@ -471,7 +496,7 @@ def estimate_usage_cost(
             pricing_version="included-route",
         )
 
-    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
     if not entry:
         return CostResult(amount_usd=None, status="unknown", source="none", label="n/a")
 
@@ -536,6 +561,7 @@ def has_known_pricing(
     model_name: str,
     provider: Optional[str] = None,
     base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
 ) -> bool:
     """Check whether we have pricing data for this model+route.
 
@@ -545,7 +571,7 @@ def has_known_pricing(
     route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
     if route.billing_mode == "subscription_included":
         return True
-    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
     return entry is not None
 
 
@@ -553,13 +579,14 @@ def get_pricing(
     model_name: str,
     provider: Optional[str] = None,
     base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
 ) -> Dict[str, float]:
     """Backward-compatible thin wrapper for legacy callers.
 
     Returns only non-cache input/output fields when a pricing entry exists.
     Unknown routes return zeroes.
     """
-    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
     if not entry:
         return {"input": 0.0, "output": 0.0}
     return {
@@ -575,6 +602,7 @@ def estimate_cost_usd(
     *,
     provider: Optional[str] = None,
     base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
 ) -> float:
     """Backward-compatible helper for legacy callers.
 
@@ -586,6 +614,7 @@ def estimate_cost_usd(
         CanonicalUsage(input_tokens=input_tokens, output_tokens=output_tokens),
         provider=provider,
         base_url=base_url,
+        api_key=api_key,
     )
     return float(result.amount_usd or _ZERO)
 
diff --git a/model_tools.py b/model_tools.py
index 87d52109..3d252f44 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -276,6 +276,7 @@ def get_tool_definitions(
 # The registry still holds their schemas; dispatch just returns a stub error
 # so if something slips through, the LLM sees a sensible message.
 _AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
+_READ_SEARCH_TOOLS = {"read_file", "search_files"}
 
 
 def handle_function_call(
@@ -305,7 +306,6 @@ def handle_function_call(
     """
     # Notify the read-loop tracker when a non-read/search tool runs,
     # so the *consecutive* counter resets (reads after other work are fine).
-    _READ_SEARCH_TOOLS = {"read_file", "search_files"}
     if function_name not in _READ_SEARCH_TOOLS:
         try:
             from tools.file_tools import notify_other_tool_call
diff --git a/run_agent.py b/run_agent.py
index 0ce3919f..f0e8f25d 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -263,11 +263,20 @@ def _inject_honcho_turn_context(content, turn_context: str):
 class AIAgent:
     """
     AI Agent with tool calling capabilities.
-    
+
     This class manages the conversation flow, tool execution, and response handling
     for AI models that support function calling.
     """
-    
+
+    @property
+    def base_url(self) -> str:
+        return self._base_url
+
+    @base_url.setter
+    def base_url(self, value: str) -> None:
+        self._base_url = value
+        self._base_url_lower = value.lower() if value else ""
+
     def __init__(
         self,
         base_url: str = None,
@@ -383,10 +392,10 @@ class AIAgent:
             self.api_mode = api_mode
         elif self.provider == "openai-codex":
             self.api_mode = "codex_responses"
-        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
+        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
             self.api_mode = "codex_responses"
             self.provider = "openai-codex"
-        elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self.base_url.lower()):
+        elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
             self.api_mode = "anthropic_messages"
             self.provider = "anthropic"
         else:
@@ -395,7 +404,7 @@ class AIAgent:
         # Pre-warm OpenRouter model metadata cache in a background thread.
         # fetch_model_metadata() is cached for 1 hour; this avoids a blocking
         # HTTP request on the first API response when pricing is estimated.
-        if self.provider == "openrouter" or "openrouter" in self.base_url.lower():
+        if self.provider == "openrouter" or "openrouter" in self._base_url_lower:
             threading.Thread(
                 target=lambda: fetch_model_metadata(),
                 daemon=True,
@@ -439,7 +448,7 @@ class AIAgent:
         # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
         # Reduces input costs by ~75% on multi-turn conversations by caching the
         # conversation prefix. Uses system_and_3 strategy (4 breakpoints).
-        is_openrouter = "openrouter" in self.base_url.lower()
+        is_openrouter = "openrouter" in self._base_url_lower
         is_claude = "claude" in self.model.lower()
         is_native_anthropic = self.api_mode == "anthropic_messages"
         self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
@@ -555,6 +564,7 @@ class AIAgent:
         if self.api_mode == "anthropic_messages":
             from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
             effective_key = api_key or resolve_anthropic_token() or ""
+            self.api_key = effective_key
             self._anthropic_api_key = effective_key
             self._anthropic_base_url = base_url
             from agent.anthropic_adapter import _is_oauth_token as _is_oat
@@ -609,6 +619,7 @@ class AIAgent:
                     }
             
             self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
+            self.api_key = client_kwargs.get("api_key", "")
             try:
                 self.client = self._create_openai_client(client_kwargs, reason="agent_init", shared=True)
                 if not self.quiet_mode:
@@ -732,6 +743,13 @@ class AIAgent:
         from tools.todo_tool import TodoStore
         self._todo_store = TodoStore()
         
+        # Load config once for memory, skills, and compression sections
+        try:
+            from hermes_cli.config import load_config as _load_agent_config
+            _agent_cfg = _load_agent_config()
+        except Exception:
+            _agent_cfg = {}
+
         # Persistent memory (MEMORY.md + USER.md) -- loaded from disk
         self._memory_store = None
         self._memory_enabled = False
@@ -742,8 +760,7 @@ class AIAgent:
         self._iters_since_skill = 0
         if not skip_memory:
             try:
-                from hermes_cli.config import load_config as _load_mem_config
-                mem_config = _load_mem_config().get("memory", {})
+                mem_config = _agent_cfg.get("memory", {})
                 self._memory_enabled = mem_config.get("memory_enabled", False)
                 self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
                 self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
@@ -831,21 +848,16 @@ class AIAgent:
         # Skills config: nudge interval for skill creation reminders
         self._skill_nudge_interval = 10
         try:
-            from hermes_cli.config import load_config as _load_skills_config
-            skills_config = _load_skills_config().get("skills", {})
+            skills_config = _agent_cfg.get("skills", {})
             self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
         except Exception:
             pass
-        
+
         # Initialize context compressor for automatic context management
         # Compresses conversation when approaching model's context limit
         # Configuration via config.yaml (compression section)
-        try:
-            from hermes_cli.config import load_config as _load_compression_config
-            _compression_cfg = _load_compression_config().get("compression", {})
-            if not isinstance(_compression_cfg, dict):
-                _compression_cfg = {}
-        except ImportError:
+        _compression_cfg = _agent_cfg.get("compression", {})
+        if not isinstance(_compression_cfg, dict):
             _compression_cfg = {}
         compression_threshold = float(_compression_cfg.get("threshold", 0.50))
         compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
@@ -860,6 +872,7 @@ class AIAgent:
             summary_model_override=compression_summary_model,
             quiet_mode=self.quiet_mode,
             base_url=self.base_url,
+            api_key=getattr(self, "api_key", ""),
         )
         self.compression_enabled = compression_enabled
         self._user_turn_count = 0
@@ -915,8 +928,8 @@ class AIAgent:
         OpenAI models use 'max_tokens'.
         """
         _is_direct_openai = (
-            "api.openai.com" in self.base_url.lower()
-            and "openrouter" not in self.base_url.lower()
+            "api.openai.com" in self._base_url_lower
+            and "openrouter" not in self._base_url_lower
         )
         if _is_direct_openai:
             return {"max_completion_tokens": value}
@@ -3643,7 +3656,7 @@ class AIAgent:
 
         extra_body = {}
 
-        _is_openrouter = "openrouter" in self.base_url.lower()
+        _is_openrouter = "openrouter" in self._base_url_lower
 
         # Provider preferences (only, ignore, order, sort) are OpenRouter-
         # specific.  Only send to OpenRouter-compatible endpoints.
@@ -3651,7 +3664,7 @@ class AIAgent:
         # for _is_nous when their backend is updated.
         if provider_preferences and _is_openrouter:
             extra_body["provider"] = provider_preferences
-        _is_nous = "nousresearch" in self.base_url.lower()
+        _is_nous = "nousresearch" in self._base_url_lower
 
         if self._supports_reasoning_extra_body():
             if self.reasoning_config is not None:
@@ -3684,14 +3697,13 @@ class AIAgent:
         Some providers/routes reject `reasoning` with 400s, so gate it to
         known reasoning-capable model families and direct Nous Portal.
         """
-        base_url = (self.base_url or "").lower()
-        if "nousresearch" in base_url:
+        if "nousresearch" in self._base_url_lower:
             return True
-        if "ai-gateway.vercel.sh" in base_url:
+        if "ai-gateway.vercel.sh" in self._base_url_lower:
             return True
-        if "openrouter" not in base_url:
+        if "openrouter" not in self._base_url_lower:
             return False
-        if "api.mistral.ai" in base_url:
+        if "api.mistral.ai" in self._base_url_lower:
             return False
 
         model = (self.model or "").lower()
@@ -3877,7 +3889,7 @@ class AIAgent:
 
         try:
             # Build API messages for the flush call
-            _is_strict_api = "api.mistral.ai" in self.base_url.lower()
+            _is_strict_api = "api.mistral.ai" in self._base_url_lower
             api_messages = []
             for msg in messages:
                 api_msg = msg.copy()
@@ -4653,7 +4665,7 @@ class AIAgent:
         try:
             # Build API messages, stripping internal-only fields
             # (finish_reason, reasoning) that strict APIs like Mistral reject with 422
-            _is_strict_api = "api.mistral.ai" in self.base_url.lower()
+            _is_strict_api = "api.mistral.ai" in self._base_url_lower
             api_messages = []
             for msg in messages:
                 api_msg = msg.copy()
@@ -4674,7 +4686,7 @@ class AIAgent:
                     api_messages.insert(sys_offset + idx, pfm.copy())
 
             summary_extra_body = {}
-            _is_nous = "nousresearch" in self.base_url.lower()
+            _is_nous = "nousresearch" in self._base_url_lower
             if self._supports_reasoning_extra_body():
                 if self.reasoning_config is not None:
                     summary_extra_body["reasoning"] = self.reasoning_config
@@ -5092,7 +5104,7 @@ class AIAgent:
                 # strict providers like Mistral that reject unknown fields with 422.
                 # Uses new dicts so the internal messages list retains the fields
                 # for Codex Responses compatibility.
-                if "api.mistral.ai" in self.base_url.lower():
+                if "api.mistral.ai" in self._base_url_lower:
                     self._sanitize_tool_calls_for_strict_api(api_msg)
                 # Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
                 # The signature field helps maintain reasoning continuity
@@ -5464,6 +5476,7 @@ class AIAgent:
                             canonical_usage,
                             provider=self.provider,
                             base_url=self.base_url,
+                            api_key=getattr(self, "api_key", ""),
                         )
                         if cost_result.amount_usd is not None:
                             self.session_estimated_cost_usd += float(cost_result.amount_usd)
diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py
index 75570e34..aa35be9b 100644
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -188,6 +188,36 @@ class TestGetModelContextLength:
             result = get_model_context_length("custom/model")
             assert result == CONTEXT_PROBE_TIERS[0]
 
+    @patch("agent.model_metadata.fetch_model_metadata")
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    def test_custom_endpoint_metadata_beats_fuzzy_default(self, mock_endpoint_fetch, mock_fetch):
+        mock_fetch.return_value = {}
+        mock_endpoint_fetch.return_value = {
+            "zai-org/GLM-5-TEE": {"context_length": 65536}
+        }
+
+        result = get_model_context_length(
+            "zai-org/GLM-5-TEE",
+            base_url="https://llm.chutes.ai/v1",
+            api_key="test-key",
+        )
+
+        assert result == 65536
+
+    @patch("agent.model_metadata.fetch_model_metadata")
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    def test_custom_endpoint_without_metadata_skips_name_based_default(self, mock_endpoint_fetch, mock_fetch):
+        mock_fetch.return_value = {}
+        mock_endpoint_fetch.return_value = {}
+
+        result = get_model_context_length(
+            "zai-org/GLM-5-TEE",
+            base_url="https://llm.chutes.ai/v1",
+            api_key="test-key",
+        )
+
+        assert result == CONTEXT_PROBE_TIERS[0]
+
 
 # =========================================================================
 # fetch_model_metadata — caching, TTL, slugs, failures
@@ -258,6 +288,25 @@ class TestFetchModelMetadata:
         assert "anthropic/claude-3.5-sonnet" in result
         assert result["anthropic/claude-3.5-sonnet"]["context_length"] == 200000
 
+    @patch("agent.model_metadata.requests.get")
+    def test_provider_prefixed_models_get_bare_aliases(self, mock_get):
+        self._reset_cache()
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "data": [{
+                "id": "provider/test-model",
+                "context_length": 123456,
+                "name": "Provider: Test Model",
+            }]
+        }
+        mock_response.raise_for_status = MagicMock()
+        mock_get.return_value = mock_response
+
+        result = fetch_model_metadata(force_refresh=True)
+
+        assert result["provider/test-model"]["context_length"] == 123456
+        assert result["test-model"]["context_length"] == 123456
+
     @patch("agent.model_metadata.requests.get")
     def test_ttl_expiry_triggers_refetch(self, mock_get):
         """Cache expires after _MODEL_CACHE_TTL seconds."""
diff --git a/tests/agent/test_usage_pricing.py b/tests/agent/test_usage_pricing.py
index 6d972dfa..a65668bb 100644
--- a/tests/agent/test_usage_pricing.py
+++ b/tests/agent/test_usage_pricing.py
@@ -99,3 +99,27 @@ def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(m
     )
 
     assert result.status == "unknown"
+
+
+def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
+    monkeypatch.setattr(
+        "agent.usage_pricing.fetch_endpoint_model_metadata",
+        lambda base_url, api_key=None: {
+            "zai-org/GLM-5-TEE": {
+                "pricing": {
+                    "prompt": "0.0000005",
+                    "completion": "0.000002",
+                }
+            }
+        },
+    )
+
+    entry = get_pricing_entry(
+        "zai-org/GLM-5-TEE",
+        provider="custom",
+        base_url="https://llm.chutes.ai/v1",
+        api_key="test-key",
+    )
+
+    assert float(entry.input_cost_per_million) == 0.5
+    assert float(entry.output_cost_per_million) == 2.0

From 4b53b89f0964933790b5c31819b3f53446dda183 Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 03:04:17 -0700
Subject: [PATCH 11/25] feat(mcp): expose MCP servers as standalone toolsets

Each configured MCP server now registers as its own toolset in TOOLSETS
(e.g. TOOLSETS['github'] = {tools: ['mcp_github_list_files', ...]}),
making raw server names resolvable in platform_toolsets overrides.

Previously MCP tools were only injected into hermes-* umbrella toolsets,
so gateway sessions using raw toolset names like ['terminal', 'github']
in platform_toolsets couldn't resolve MCP tools.

Skips server names that collide with built-in toolsets. Also handles
idempotent reloads (syncs toolsets even when no new servers connect).

Inspired by PR #1876 by @kshitijk4poor.
Adds 2 tests (standalone toolset creation + built-in collision guard).
---
 tests/tools/test_mcp_tool.py | 36 +++++++++++++++++++++
 tools/mcp_tool.py            | 61 +++++++++++++++++++++++++++++++-----
 2 files changed, 89 insertions(+), 8 deletions(-)

diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py
index 9c49bd2c..38654a18 100644
--- a/tests/tools/test_mcp_tool.py
+++ b/tests/tools/test_mcp_tool.py
@@ -505,6 +505,42 @@ class TestToolsetInjection:
         assert "mcp_fs_list_files" not in fake_toolsets["non-hermes"]["tools"]
         # Original tools preserved
         assert "terminal" in fake_toolsets["hermes-cli"]["tools"]
+        # Server name becomes a standalone toolset
+        assert "fs" in fake_toolsets
+        assert "mcp_fs_list_files" in fake_toolsets["fs"]["tools"]
+        assert fake_toolsets["fs"]["description"].startswith("MCP server '")
+
+    def test_server_toolset_skips_builtin_collision(self):
+        """MCP server named after a built-in toolset shouldn't overwrite it."""
+        from tools.mcp_tool import MCPServerTask
+
+        mock_tools = [_make_mcp_tool("run", "Run command")]
+        mock_session = MagicMock()
+        fresh_servers = {}
+
+        async def fake_connect(name, config):
+            server = MCPServerTask(name)
+            server.session = mock_session
+            server._tools = mock_tools
+            return server
+
+        fake_toolsets = {
+            "hermes-cli": {"tools": ["terminal"], "description": "CLI", "includes": []},
+            # Built-in toolset named "terminal" — must not be overwritten
+            "terminal": {"tools": ["terminal"], "description": "Terminal tools", "includes": []},
+        }
+        fake_config = {"terminal": {"command": "npx", "args": []}}
+
+        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._servers", fresh_servers), \
+             patch("tools.mcp_tool._load_mcp_config", return_value=fake_config), \
+             patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
+             patch("toolsets.TOOLSETS", fake_toolsets):
+            from tools.mcp_tool import discover_mcp_tools
+            discover_mcp_tools()
+
+        # Built-in toolset preserved — description unchanged
+        assert fake_toolsets["terminal"]["description"] == "Terminal tools"
 
     def test_server_connection_failure_skipped(self):
         """If one server fails to connect, others still proceed."""
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index 7ff8103b..c22b824f 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1238,6 +1238,57 @@ def _convert_mcp_schema(server_name: str, mcp_tool) -> dict:
     }
 
 
+def _sync_mcp_toolsets(server_names: Optional[List[str]] = None) -> None:
+    """Expose each MCP server as a standalone toolset and inject into hermes-* sets.
+
+    Creates a real toolset entry in TOOLSETS for each server name (e.g.
+    TOOLSETS["github"] = {"tools": ["mcp_github_list_files", ...]}). This
+    makes raw server names resolvable in platform_toolsets overrides.
+
+    Also injects all MCP tools into hermes-* umbrella toolsets for the
+    default behavior.
+
+    Skips server names that collide with built-in toolsets.
+    """
+    from toolsets import TOOLSETS
+
+    if server_names is None:
+        server_names = list(_load_mcp_config().keys())
+
+    existing = _existing_tool_names()
+    all_mcp_tools: List[str] = []
+
+    for server_name in server_names:
+        safe_prefix = f"mcp_{server_name.replace('-', '_').replace('.', '_')}_"
+        server_tools = sorted(
+            t for t in existing if t.startswith(safe_prefix)
+        )
+        all_mcp_tools.extend(server_tools)
+
+        # Don't overwrite a built-in toolset that happens to share the name.
+        existing_ts = TOOLSETS.get(server_name)
+        if existing_ts and not str(existing_ts.get("description", "")).startswith("MCP server '"):
+            logger.warning(
+                "Skipping MCP toolset alias '%s' — a built-in toolset already uses that name",
+                server_name,
+            )
+            continue
+
+        TOOLSETS[server_name] = {
+            "description": f"MCP server '{server_name}' tools",
+            "tools": server_tools,
+            "includes": [],
+        }
+
+    # Also inject into hermes-* umbrella toolsets for default behavior.
+    for ts_name, ts in TOOLSETS.items():
+        if not ts_name.startswith("hermes-"):
+            continue
+        for tool_name in all_mcp_tools:
+            if tool_name not in ts["tools"]:
+                ts["tools"].append(tool_name)
+
+
 def _build_utility_schemas(server_name: str) -> List[dict]:
     """Build schemas for the MCP utility tools (resources & prompts).
 
@@ -1523,6 +1574,7 @@ def discover_mcp_tools() -> List[str]:
         }
 
     if not new_servers:
+        _sync_mcp_toolsets(list(servers.keys()))
         return _existing_tool_names()
 
     # Start the background event loop for MCP connections
@@ -1562,14 +1614,7 @@ def discover_mcp_tools() -> List[str]:
     # The outer timeout is generous: 120s total for parallel discovery.
     _run_on_mcp_loop(_discover_all(), timeout=120)
 
-    if all_tools:
-        # Dynamically inject into all hermes-* platform toolsets
-        from toolsets import TOOLSETS
-        for ts_name, ts in TOOLSETS.items():
-            if ts_name.startswith("hermes-"):
-                for tool_name in all_tools:
-                    if tool_name not in ts["tools"]:
-                        ts["tools"].append(tool_name)
+    _sync_mcp_toolsets(list(servers.keys()))
 
     # Print summary
     total_servers = len(new_servers)

From 764825bbffde01624469ecd3a62d39d789bcb330 Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 03:05:17 -0700
Subject: [PATCH 12/25] feat: expand hermes-agent-setup skill + tell agent
 about it in STT notes

Skill now covers full CLI usage (hermes setup, hermes skills, hermes
tools, hermes config, session management, etc.), config file reference,
and expanded gateway commands.

Agent context notes for STT failure now mention the hermes-agent-setup
skill is available to help users configure Hermes features.
---
 gateway/run.py                             |  13 +-
 skills/dogfood/hermes-agent-setup/SKILL.md | 188 ++++++++++++++++++---
 2 files changed, 177 insertions(+), 24 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 668977ef..02275372 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3966,7 +3966,11 @@ class GatewayRunner:
             The enriched message string with transcriptions prepended.
         """
         if not getattr(self.config, "stt_enabled", True):
-            disabled_note = "[The user sent voice message(s), but transcription is disabled in config.]"
+            disabled_note = (
+                "[The user sent voice message(s), but transcription is disabled in config. "
+                "You have a skill called hermes-agent-setup that can help users configure "
+                "Hermes features including voice, tools, and more.]"
+            )
             if user_text:
                 return f"{disabled_note}\n\n{user_text}"
             return disabled_note
@@ -3995,8 +3999,11 @@ class GatewayRunner:
                     ):
                         enriched_parts.append(
                             "[The user sent a voice message but I can't listen "
-                            "to it right now~ No STT provider is configured "
-                            "(';w;') Let them know!]"
+                            "to it right now — no STT provider is configured. "
+                            "A direct message has already been sent to the user "
+                            "with setup instructions. You have a skill called "
+                            "hermes-agent-setup that can help users configure "
+                            "Hermes features including voice, tools, and more.]"
                         )
                     else:
                         enriched_parts.append(
diff --git a/skills/dogfood/hermes-agent-setup/SKILL.md b/skills/dogfood/hermes-agent-setup/SKILL.md
index 275c0686..7b7b1c21 100644
--- a/skills/dogfood/hermes-agent-setup/SKILL.md
+++ b/skills/dogfood/hermes-agent-setup/SKILL.md
@@ -1,14 +1,14 @@
 ---
 name: hermes-agent-setup
-description: Help users configure Hermes Agent — enable tools, set up voice/STT/TTS, install dependencies, and troubleshoot. Use when someone asks to enable features, configure voice, or when the system detects missing config.
-version: 1.0.0
+description: Help users configure Hermes Agent — CLI usage, setup wizard, model/provider selection, tools, skills, voice/STT/TTS, gateway, and troubleshooting. Use when someone asks to enable features, configure settings, or needs help with Hermes itself.
+version: 1.1.0
 author: Hermes Agent
-tags: [setup, configuration, tools, stt, tts, voice, hermes]
+tags: [setup, configuration, tools, stt, tts, voice, hermes, cli, skills]
 ---
 
 # Hermes Agent Setup & Configuration
 
-Use this skill when a user asks to enable features, configure voice messages, set up tools, or troubleshoot configuration.
+Use this skill when a user asks about configuring Hermes, enabling features, setting up voice, managing tools/skills, or troubleshooting.
 
 ## Key Paths
 
@@ -16,6 +16,116 @@ Use this skill when a user asks to enable features, configure voice messages, se
 - API keys: `~/.hermes/.env`
 - Skills: `~/.hermes/skills/`
 - Hermes install: `~/.hermes/hermes-agent/`
+- Venv: `~/.hermes/hermes-agent/.venv/` (or `venv/`)
+
+## CLI Overview
+
+Hermes is used via the `hermes` command (or `python -m hermes_cli.main` from the repo).
+
+### Core commands:
+
+```
+hermes                          Interactive chat (default)
+hermes chat -q "question"       Single query, then exit
+hermes chat -m MODEL            Chat with a specific model
+hermes -c                       Resume most recent session
+hermes -c "project name"        Resume session by name
+hermes --resume SESSION_ID      Resume by exact ID
+hermes -w                       Isolated git worktree mode
+hermes -s skill1,skill2         Preload skills for the session
+hermes --yolo                   Skip dangerous command approval
+```
+
+### Configuration & setup:
+
+```
+hermes setup                    Interactive setup wizard (provider, API keys, model)
+hermes model                    Interactive model/provider selection
+hermes config                   View current configuration
+hermes config edit              Open config.yaml in $EDITOR
+hermes config set KEY VALUE     Set a config value directly
+hermes login                    Authenticate with a provider
+hermes logout                   Clear stored auth
+hermes doctor                   Check configuration and dependencies
+```
+
+### Tools & skills:
+
+```
+hermes tools                    Interactive tool enable/disable per platform
+hermes skills list              List installed skills
+hermes skills search QUERY      Search the skills hub
+hermes skills install NAME      Install a skill from the hub
+hermes skills config            Enable/disable skills per platform
+```
+
+### Gateway (messaging platforms):
+
+```
+hermes gateway run              Start the messaging gateway
+hermes gateway install          Install gateway as background service
+hermes gateway status           Check gateway status
+```
+
+### Session management:
+
+```
+hermes sessions list            List past sessions
+hermes sessions browse          Interactive session picker
+hermes sessions rename ID TITLE Rename a session
+hermes sessions export ID       Export session as markdown
+hermes sessions prune           Clean up old sessions
+```
+
+### Other:
+
+```
+hermes status                   Show status of all components
+hermes cron list                List cron jobs
+hermes insights                 Usage analytics
+hermes update                   Update to latest version
+hermes pairing                  Manage DM authorization codes
+```
+
+## Setup Wizard (`hermes setup`)
+
+The interactive setup wizard walks through:
+1. **Provider selection** — OpenRouter, Anthropic, OpenAI, Google, DeepSeek, and many more
+2. **API key entry** — stores securely in the env file
+3. **Model selection** — picks from available models for the chosen provider
+4. **Basic settings** — reasoning effort, tool preferences
+
+Run it from terminal:
+```bash
+cd ~/.hermes/hermes-agent
+source .venv/bin/activate
+python -m hermes_cli.main setup
+```
+
+To change just the model/provider later: `hermes model`
+
+## Skills Configuration (`hermes skills`)
+
+Skills are reusable instruction sets that extend what Hermes can do.
+
+### Managing skills:
+
+```bash
+hermes skills list              # Show installed skills
+hermes skills search "docker"   # Search the hub
+hermes skills install NAME      # Install from hub
+hermes skills config            # Enable/disable per platform
+```
+
+### Per-platform skill control:
+
+`hermes skills config` opens an interactive UI where you can enable or disable specific skills for each platform (cli, telegram, discord, etc.). Disabled skills won't appear in the agent's available skills list for that platform.
+
+### Loading skills in a session:
+
+- CLI: `hermes -s skill-name` or `hermes -s skill1,skill2`
+- Chat: `/skill skill-name`
+- Gateway: type `/skill skill-name` in any chat
 
 ## Voice Messages (STT)
 
@@ -73,9 +183,9 @@ Hermes can reply with voice when users send voice messages.
 - `/voice tts` — voice reply to all messages
 - `/voice off` — text only (default)
 
-## Enabling/Disabling Tools
+## Enabling/Disabling Tools (`hermes tools`)
 
-### Interactive tool config (requires terminal):
+### Interactive tool config:
 
 ```bash
 cd ~/.hermes/hermes-agent
@@ -83,11 +193,11 @@ source .venv/bin/activate
 python -m hermes_cli.main tools
 ```
 
-This opens a curses UI to enable/disable toolsets per platform.
+This opens a curses UI to enable/disable toolsets per platform (cli, telegram, discord, slack, etc.).
 
 ### After changing tools:
 
-Use `/reset` in the chat to start a fresh session with the new toolset. Tool changes do NOT take effect mid-conversation (this preserves prompt caching).
+Use `/reset` in the chat to start a fresh session with the new toolset. Tool changes do NOT take effect mid-conversation (this preserves prompt caching and avoids cost spikes).
 
 ### Common toolsets:
 
@@ -96,10 +206,10 @@ Use `/reset` in the chat to start a fresh session with the new toolset. Tool cha
 | terminal | Shell command execution |
 | file | File read/write/search/patch |
 | web | Web search and extraction |
-| browser | Browser automation |
+| browser | Browser automation (needs Browserbase) |
 | image_gen | AI image generation |
 | mcp | MCP server connections |
-| voice | Text-to-speech |
+| voice | Text-to-speech output |
 | cronjob | Scheduled tasks |
 
 ## Installing Dependencies
@@ -109,22 +219,43 @@ Some tools need extra packages:
 ```bash
 cd ~/.hermes/hermes-agent && source .venv/bin/activate
 
-pip install faster-whisper    # Local STT
+pip install faster-whisper    # Local STT (voice transcription)
 pip install browserbase       # Browser automation
-pip install mcp               # MCP servers
+pip install mcp               # MCP server connections
 ```
 
-## Setup Wizard
+## Config File Reference
 
-For first-time setup or full reconfiguration:
+The main config file is `~/.hermes/config.yaml`. Key sections:
 
-```bash
-cd ~/.hermes/hermes-agent
-source .venv/bin/activate
-python -m hermes_cli.main setup
+```yaml
+# Model and provider
+model:
+  default: anthropic/claude-opus-4.6
+  provider: openrouter
+
+# Agent behavior
+agent:
+  max_turns: 90
+  reasoning_effort: high    # xhigh, high, medium, low, minimal, none
+
+# Voice
+stt:
+  enabled: true
+  provider: local           # local, groq, openai
+tts:
+  provider: elevenlabs      # elevenlabs, openai, kokoro, fish
+
+# Display
+display:
+  skin: default             # default, ares, mono, slate
+  tool_progress: full       # full, compact, off
+  background_process_notifications: all  # all, result, error, off
 ```
 
-## Gateway Commands
+Edit with `hermes config edit` or `hermes config set KEY VALUE`.
+
+## Gateway Commands (Messaging Platforms)
 
 | Command | What it does |
 |---------|-------------|
@@ -136,19 +267,34 @@ python -m hermes_cli.main setup
 | /reasoning [effort] | Set reasoning level |
 | /sethome | Set home channel for cron/notifications |
 | /restart | Restart the gateway (picks up config changes) |
+| /status | Show session info |
+| /retry | Retry last message |
+| /undo | Remove last exchange |
+| /personality [name] | Set agent personality |
+| /skill [name] | Load a skill |
 
 ## Troubleshooting
 
 ### Voice messages not working
 1. Check stt.enabled is true in config.yaml
 2. Check a provider is available (faster-whisper installed, or API key set)
-3. Restart gateway after config changes
+3. Restart gateway after config changes (/restart)
 
 ### Tool not available
-1. Check if the toolset is enabled for your platform (run `hermes tools`)
+1. Run `hermes tools` to check if the toolset is enabled for your platform
 2. Some tools need env vars — check the env file
 3. Use /reset after enabling tools
 
+### Model/provider issues
+1. Run `hermes doctor` to check configuration
+2. Run `hermes login` to re-authenticate
+3. Check the env file has the right API key
+
 ### Changes not taking effect
 - Gateway: /reset for tool changes, /restart for config changes
 - CLI: start a new session
+
+### Skills not showing up
+1. Check `hermes skills list` shows the skill
+2. Check `hermes skills config` has it enabled for your platform
+3. Load explicitly with `/skill name` or `hermes -s name`

From 5c4c4b8b7d097d2b6803a827208b3fb43d7bd4ce Mon Sep 17 00:00:00 2001
From: TheSameCat2 <thesamecat@proton.me>
Date: Tue, 17 Mar 2026 17:54:51 -0500
Subject: [PATCH 13/25] fix(gateway): detect script-style gateway processes for
 --replace

Recognize hermes_cli/main.py gateway command lines in gateway
process detection and PID validation so --replace reliably finds
existing gateway instances.

Adds a regression test covering script-style cmdline detection.

Closes #1830
---
 gateway/status.py            |  2 ++
 hermes_cli/gateway.py        |  1 +
 tests/gateway/test_status.py | 20 ++++++++++++++++++++
 3 files changed, 23 insertions(+)

diff --git a/gateway/status.py b/gateway/status.py
index 4d999604..72a19a56 100644
--- a/gateway/status.py
+++ b/gateway/status.py
@@ -87,6 +87,7 @@ def _looks_like_gateway_process(pid: int) -> bool:
 
     patterns = (
         "hermes_cli.main gateway",
+        "hermes_cli/main.py gateway",
         "hermes gateway",
         "gateway/run.py",
     )
@@ -105,6 +106,7 @@ def _record_looks_like_gateway(record: dict[str, Any]) -> bool:
     cmdline = " ".join(str(part) for part in argv)
     patterns = (
         "hermes_cli.main gateway",
+        "hermes_cli/main.py gateway",
         "hermes gateway",
         "gateway/run.py",
     )
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index a7876bc4..fb2de2d1 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -31,6 +31,7 @@ def find_gateway_pids() -> list:
     pids = []
     patterns = [
         "hermes_cli.main gateway",
+        "hermes_cli/main.py gateway",
         "hermes gateway",
         "gateway/run.py",
     ]
diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py
index 96dfa537..510892b8 100644
--- a/tests/gateway/test_status.py
+++ b/tests/gateway/test_status.py
@@ -42,6 +42,26 @@ class TestGatewayPidState:
 
         assert status.get_running_pid() == os.getpid()
 
+    def test_get_running_pid_accepts_script_style_gateway_cmdline(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        pid_path = tmp_path / "gateway.pid"
+        pid_path.write_text(json.dumps({
+            "pid": os.getpid(),
+            "kind": "hermes-gateway",
+            "argv": ["/venv/bin/python", "/repo/hermes_cli/main.py", "gateway", "run", "--replace"],
+            "start_time": 123,
+        }))
+
+        monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
+        monkeypatch.setattr(
+            status,
+            "_read_process_cmdline",
+            lambda pid: "/venv/bin/python /repo/hermes_cli/main.py gateway run --replace",
+        )
+
+        assert status.get_running_pid() == os.getpid()
+
 
 class TestGatewayRuntimeStatus:
     def test_write_runtime_status_overwrites_stale_pid_on_restart(self, tmp_path, monkeypatch):

From a9c405fac93e91802473af9ffeefa3096026d696 Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 03:14:58 -0700
Subject: [PATCH 14/25] =?UTF-8?q?docs:=20fix=20MCP=20install=20commands=20?=
 =?UTF-8?q?=E2=80=94=20use=20uv,=20not=20bare=20pip?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The standard install already includes MCP via .[all]. For users who
need to add it separately, the correct command is:
  cd ~/.hermes/hermes-agent && uv pip install -e ".[mcp]"

The venv is created by uv, so bare 'pip' isn't available. All four
occurrences across 3 docs pages updated.
---
 website/docs/guides/use-mcp-with-hermes.md | 7 ++++++-
 website/docs/reference/faq.md              | 4 ++--
 website/docs/user-guide/features/mcp.md    | 9 ++++++---
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/website/docs/guides/use-mcp-with-hermes.md b/website/docs/guides/use-mcp-with-hermes.md
index e202594d..9083bdae 100644
--- a/website/docs/guides/use-mcp-with-hermes.md
+++ b/website/docs/guides/use-mcp-with-hermes.md
@@ -37,8 +37,13 @@ That last part matters. Good MCP usage is not just “connect everything.” It
 
 ## Step 1: install MCP support
 
+If you installed Hermes with the standard install script, MCP support is already included (the installer runs `uv pip install -e ".[all]"`).
+
+If you installed without extras and need to add MCP separately:
+
 ```bash
-pip install hermes-agent[mcp]
+cd ~/.hermes/hermes-agent
+uv pip install -e ".[mcp]"
 ```
 
 For npm-based servers, make sure Node.js and `npx` are available.
diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md
index 4d7be7aa..eaa92a06 100644
--- a/website/docs/reference/faq.md
+++ b/website/docs/reference/faq.md
@@ -372,8 +372,8 @@ hermes chat --continue
 
 **Solution:**
 ```bash
-# Ensure MCP dependencies are installed
-pip install hermes-agent[mcp]
+# Ensure MCP dependencies are installed (already included in standard install)
+cd ~/.hermes/hermes-agent && uv pip install -e ".[mcp]"
 
 # For npm-based servers, ensure Node.js is available
 node --version
diff --git a/website/docs/user-guide/features/mcp.md b/website/docs/user-guide/features/mcp.md
index 5009fab7..15890015 100644
--- a/website/docs/user-guide/features/mcp.md
+++ b/website/docs/user-guide/features/mcp.md
@@ -20,10 +20,11 @@ If you have ever wanted Hermes to use a tool that already exists somewhere else,
 
 ## Quick start
 
-1. Install MCP support:
+1. Install MCP support (already included if you used the standard install script):
 
 ```bash
-pip install hermes-agent[mcp]
+cd ~/.hermes/hermes-agent
+uv pip install -e ".[mcp]"
 ```
 
 2. Add an MCP server to `~/.hermes/config.yaml`:
@@ -374,7 +375,9 @@ Inspect the project root and explain the directory layout.
 Check:
 
 ```bash
-pip install hermes-agent[mcp]
+# Verify MCP deps are installed (already included in standard install)
+cd ~/.hermes/hermes-agent && uv pip install -e ".[mcp]"
+
 node --version
 npx --version
 ```

From 190c07975d7bdb896bd9106ca25c6cf2668c0e1d Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 03:17:23 -0700
Subject: [PATCH 15/25] fix: check skill availability before hinting at
 hermes-agent-setup

Only mention the hermes-agent-setup skill in STT failure notes (both
the direct user message and the agent context note) when the skill is
actually installed. Uses _find_skill() from skill_manager_tool.

Also confirmed: STT is the only user-facing failure case where the
setup skill hint helps. Vision failures are transient API issues,
runtime transcription errors indicate a configured-but-broken provider,
and platform startup warnings are server logs.
---
 gateway/run.py | 48 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 02275372..62d16e68 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -432,6 +432,16 @@ class GatewayRunner:
         for session_key in list(managers.keys()):
             self._shutdown_gateway_honcho(session_key)
     
+    # -- Setup skill availability ----------------------------------------
+
+    def _has_setup_skill(self) -> bool:
+        """Check if the hermes-agent-setup skill is installed."""
+        try:
+            from tools.skill_manager_tool import _find_skill
+            return _find_skill("hermes-agent-setup") is not None
+        except Exception:
+            return False
+
     # -- Voice mode persistence ------------------------------------------
 
     _VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json"
@@ -1884,15 +1894,19 @@ class GatewayRunner:
                     _stt_meta = {"thread_id": source.thread_id} if source.thread_id else None
                     if _stt_adapter:
                         try:
-                            await _stt_adapter.send(
-                                source.chat_id,
+                            _stt_msg = (
                                 "🎤 I received your voice message but can't transcribe it — "
                                 "no speech-to-text provider is configured.\n\n"
                                 "To enable voice: install faster-whisper "
                                 "(`pip install faster-whisper` in the Hermes venv) "
                                 "and set `stt.enabled: true` in config.yaml, "
-                                "then /restart the gateway.\n\n"
-                                "For full setup instructions, type: `/skill hermes-agent-setup`",
+                                "then /restart the gateway."
+                            )
+                            # Point to setup skill if it's installed
+                            if self._has_setup_skill():
+                                _stt_msg += "\n\nFor full setup instructions, type: `/skill hermes-agent-setup`"
+                            await _stt_adapter.send(
+                                source.chat_id, _stt_msg,
                                 metadata=_stt_meta,
                             )
                         except Exception:
@@ -3966,11 +3980,13 @@ class GatewayRunner:
             The enriched message string with transcriptions prepended.
         """
         if not getattr(self.config, "stt_enabled", True):
-            disabled_note = (
-                "[The user sent voice message(s), but transcription is disabled in config. "
-                "You have a skill called hermes-agent-setup that can help users configure "
-                "Hermes features including voice, tools, and more.]"
-            )
+            disabled_note = "[The user sent voice message(s), but transcription is disabled in config."
+            if self._has_setup_skill():
+                disabled_note += (
+                    " You have a skill called hermes-agent-setup that can help "
+                    "users configure Hermes features including voice, tools, and more."
+                )
+            disabled_note += "]"
             if user_text:
                 return f"{disabled_note}\n\n{user_text}"
             return disabled_note
@@ -3997,14 +4013,20 @@ class GatewayRunner:
                         "No STT provider" in error
                         or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set")
                     ):
-                        enriched_parts.append(
+                        _no_stt_note = (
                             "[The user sent a voice message but I can't listen "
                             "to it right now — no STT provider is configured. "
                             "A direct message has already been sent to the user "
-                            "with setup instructions. You have a skill called "
-                            "hermes-agent-setup that can help users configure "
-                            "Hermes features including voice, tools, and more.]"
+                            "with setup instructions."
                         )
+                        if self._has_setup_skill():
+                            _no_stt_note += (
+                                " You have a skill called hermes-agent-setup "
+                                "that can help users configure Hermes features "
+                                "including voice, tools, and more."
+                            )
+                        _no_stt_note += "]"
+                        enriched_parts.append(_no_stt_note)
                     else:
                         enriched_parts.append(
                             "[The user sent a voice message but I had trouble "

From b70dd51cfab01ec7e61ef8db730319d8df621f86 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 03:17:37 -0700
Subject: [PATCH 16/25] fix: disabled skills respected across banner, system
 prompt, slash commands, and skill_view (#1897)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: banner skill count now respects disabled skills and platform filtering

The banner's get_available_skills() was doing a raw rglob scan of
~/.hermes/skills/ without checking:
- Whether skills are disabled (skills.disabled config)
- Whether skills match the current platform (platforms: frontmatter)

This caused the banner to show inflated skill counts (e.g. '100 skills'
when many are disabled) and list macOS-only skills on Linux.

Fix: delegate to _find_all_skills() from tools/skills_tool which already
handles both platform gating and disabled-skill filtering.

* fix: system prompt and slash commands now respect disabled skills

Two more places where disabled skills were still surfaced:

1. build_skills_system_prompt() in prompt_builder.py — disabled skills
   appeared in the <available_skills> system prompt section, causing
   the agent to suggest/load them despite being disabled.

2. scan_skill_commands() in skill_commands.py — disabled skills still
   registered as /skill-name slash commands in CLI help and could be
   invoked.

Both now load _get_disabled_skill_names() and filter accordingly.

* fix: skill_view blocks disabled skills

skill_view() checked platform compatibility but not disabled state,
so the agent could still load and read disabled skills directly.

Now returns a clear error when a disabled skill is requested, telling
the user to enable it via hermes skills or inspect the files manually.

---------

Co-authored-by: Test <test@test.com>
---
 agent/prompt_builder.py                | 26 ++++++----
 agent/skill_commands.py                |  6 ++-
 hermes_cli/banner.py                   | 33 ++++++-------
 tests/agent/test_prompt_builder.py     | 29 +++++++++++
 tests/agent/test_skill_commands.py     | 15 ++++++
 tests/hermes_cli/test_banner_skills.py | 68 ++++++++++++++++++++++++++
 tests/tools/test_skills_tool.py        | 29 +++++++++++
 tools/skills_tool.py                   | 14 ++++++
 8 files changed, 190 insertions(+), 30 deletions(-)
 create mode 100644 tests/hermes_cli/test_banner_skills.py

diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index 4ce84473..8dc3124b 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -330,28 +330,34 @@ def build_skills_system_prompt(
     # Each entry: (skill_name, description)
     # Supports sub-categories: skills/mlops/training/axolotl/SKILL.md
     # -> category "mlops/training", skill "axolotl"
+    # Load disabled skill names once for the entire scan
+    try:
+        from tools.skills_tool import _get_disabled_skill_names
+        disabled = _get_disabled_skill_names()
+    except Exception:
+        disabled = set()
+
     skills_by_category: dict[str, list[tuple[str, str]]] = {}
     for skill_file in skills_dir.rglob("SKILL.md"):
-        is_compatible, _, desc = _parse_skill_file(skill_file)
+        is_compatible, frontmatter, desc = _parse_skill_file(skill_file)
         if not is_compatible:
             continue
-        # Skip skills whose conditional activation rules exclude them
-        conditions = _read_skill_conditions(skill_file)
-        if not _skill_should_show(conditions, available_tools, available_toolsets):
-            continue
         rel_path = skill_file.relative_to(skills_dir)
         parts = rel_path.parts
         if len(parts) >= 2:
-            # Category is everything between skills_dir and the skill folder
-            # e.g. parts = ("mlops", "training", "axolotl", "SKILL.md")
-            #   → category = "mlops/training", skill_name = "axolotl"
-            # e.g. parts = ("github", "github-auth", "SKILL.md")
-            #   → category = "github", skill_name = "github-auth"
             skill_name = parts[-2]
             category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
         else:
             category = "general"
             skill_name = skill_file.parent.name
+        # Respect user's disabled skills config
+        fm_name = frontmatter.get("name", skill_name)
+        if fm_name in disabled or skill_name in disabled:
+            continue
+        # Skip skills whose conditional activation rules exclude them
+        conditions = _read_skill_conditions(skill_file)
+        if not _skill_should_show(conditions, available_tools, available_toolsets):
+            continue
         skills_by_category.setdefault(category, []).append((skill_name, desc))
 
     if not skills_by_category:
diff --git a/agent/skill_commands.py b/agent/skill_commands.py
index 67315ee8..b266ad25 100644
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -157,9 +157,10 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
     global _skill_commands
     _skill_commands = {}
     try:
-        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform
+        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
         if not SKILLS_DIR.exists():
             return _skill_commands
+        disabled = _get_disabled_skill_names()
         for skill_md in SKILLS_DIR.rglob("SKILL.md"):
             if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                 continue
@@ -170,6 +171,9 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
                 if not skill_matches_platform(frontmatter):
                     continue
                 name = frontmatter.get('name', skill_md.parent.name)
+                # Respect user's disabled skills config
+                if name in disabled:
+                    continue
                 description = frontmatter.get('description', '')
                 if not description:
                     for line in body.strip().split('\n'):
diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index c1a1d4c7..addcf98c 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -102,27 +102,22 @@ COMPACT_BANNER = """
 # =========================================================================
 
 def get_available_skills() -> Dict[str, List[str]]:
-    """Scan ~/.hermes/skills/ and return skills grouped by category."""
-    import os
+    """Return skills grouped by category, filtered by platform and disabled state.
 
-    hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
-    skills_dir = hermes_home / "skills"
-    skills_by_category = {}
-
-    if not skills_dir.exists():
-        return skills_by_category
-
-    for skill_file in skills_dir.rglob("SKILL.md"):
-        rel_path = skill_file.relative_to(skills_dir)
-        parts = rel_path.parts
-        if len(parts) >= 2:
-            category = parts[0]
-            skill_name = parts[-2]
-        else:
-            category = "general"
-            skill_name = skill_file.parent.name
-        skills_by_category.setdefault(category, []).append(skill_name)
+    Delegates to ``_find_all_skills()`` from ``tools/skills_tool`` which already
+    handles platform gating (``platforms:`` frontmatter) and respects the
+    user's ``skills.disabled`` config list.
+    """
+    try:
+        from tools.skills_tool import _find_all_skills
+        all_skills = _find_all_skills()  # already filtered
+    except Exception:
+        return {}
 
+    skills_by_category: Dict[str, List[str]] = {}
+    for skill in all_skills:
+        category = skill.get("category") or "general"
+        skills_by_category.setdefault(category, []).append(skill["name"])
     return skills_by_category
 
 
diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py
index 1de37efb..07c8da18 100644
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -309,6 +309,35 @@ class TestBuildSkillsSystemPrompt:
         assert "imessage" in result
         assert "Send iMessages" in result
 
+    def test_excludes_disabled_skills(self, monkeypatch, tmp_path):
+        """Skills in the user's disabled list should not appear in the system prompt."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        skills_dir = tmp_path / "skills" / "tools"
+        skills_dir.mkdir(parents=True)
+
+        enabled_skill = skills_dir / "web-search"
+        enabled_skill.mkdir()
+        (enabled_skill / "SKILL.md").write_text(
+            "---\nname: web-search\ndescription: Search the web\n---\n"
+        )
+
+        disabled_skill = skills_dir / "old-tool"
+        disabled_skill.mkdir()
+        (disabled_skill / "SKILL.md").write_text(
+            "---\nname: old-tool\ndescription: Deprecated tool\n---\n"
+        )
+
+        from unittest.mock import patch
+
+        with patch(
+            "tools.skills_tool._get_disabled_skill_names",
+            return_value={"old-tool"},
+        ):
+            result = build_skills_system_prompt()
+
+        assert "web-search" in result
+        assert "old-tool" not in result
+
     def test_includes_setup_needed_skills(self, monkeypatch, tmp_path):
         monkeypatch.setenv("HERMES_HOME", str(tmp_path))
         monkeypatch.delenv("MISSING_API_KEY_XYZ", raising=False)
diff --git a/tests/agent/test_skill_commands.py b/tests/agent/test_skill_commands.py
index c0244613..f6a114db 100644
--- a/tests/agent/test_skill_commands.py
+++ b/tests/agent/test_skill_commands.py
@@ -85,6 +85,21 @@ class TestScanSkillCommands:
             result = scan_skill_commands()
         assert "/generic-tool" in result
 
+    def test_excludes_disabled_skills(self, tmp_path):
+        """Disabled skills should not register slash commands."""
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "tools.skills_tool._get_disabled_skill_names",
+                return_value={"disabled-skill"},
+            ),
+        ):
+            _make_skill(tmp_path, "enabled-skill")
+            _make_skill(tmp_path, "disabled-skill")
+            result = scan_skill_commands()
+        assert "/enabled-skill" in result
+        assert "/disabled-skill" not in result
+
 
 class TestBuildPreloadedSkillsPrompt:
     def test_builds_prompt_for_multiple_named_skills(self, tmp_path):
diff --git a/tests/hermes_cli/test_banner_skills.py b/tests/hermes_cli/test_banner_skills.py
new file mode 100644
index 00000000..1006fcc8
--- /dev/null
+++ b/tests/hermes_cli/test_banner_skills.py
@@ -0,0 +1,68 @@
+"""Tests for banner get_available_skills() — disabled and platform filtering."""
+
+from unittest.mock import patch
+
+import pytest
+
+
+_MOCK_SKILLS = [
+    {"name": "skill-a", "description": "A skill", "category": "tools"},
+    {"name": "skill-b", "description": "B skill", "category": "tools"},
+    {"name": "skill-c", "description": "C skill", "category": "creative"},
+]
+
+
+def test_get_available_skills_delegates_to_find_all_skills():
+    """get_available_skills should call _find_all_skills (which handles filtering)."""
+    with patch("tools.skills_tool._find_all_skills", return_value=list(_MOCK_SKILLS)):
+        from hermes_cli.banner import get_available_skills
+        result = get_available_skills()
+
+    assert "tools" in result
+    assert "creative" in result
+    assert sorted(result["tools"]) == ["skill-a", "skill-b"]
+    assert result["creative"] == ["skill-c"]
+
+
+def test_get_available_skills_excludes_disabled():
+    """Disabled skills should not appear in the banner count."""
+    # _find_all_skills already filters disabled skills, so if we give it
+    # a filtered list, get_available_skills should reflect that.
+    filtered = [s for s in _MOCK_SKILLS if s["name"] != "skill-b"]
+    with patch("tools.skills_tool._find_all_skills", return_value=filtered):
+        from hermes_cli.banner import get_available_skills
+        result = get_available_skills()
+
+    all_names = [n for names in result.values() for n in names]
+    assert "skill-b" not in all_names
+    assert "skill-a" in all_names
+    assert len(all_names) == 2
+
+
+def test_get_available_skills_empty_when_no_skills():
+    """No skills installed returns empty dict."""
+    with patch("tools.skills_tool._find_all_skills", return_value=[]):
+        from hermes_cli.banner import get_available_skills
+        result = get_available_skills()
+
+    assert result == {}
+
+
+def test_get_available_skills_handles_import_failure():
+    """If _find_all_skills import fails, return empty dict gracefully."""
+    with patch("tools.skills_tool._find_all_skills", side_effect=ImportError("boom")):
+        from hermes_cli.banner import get_available_skills
+        result = get_available_skills()
+
+    assert result == {}
+
+
+def test_get_available_skills_null_category_becomes_general():
+    """Skills with None category should be grouped under 'general'."""
+    skills = [{"name": "orphan-skill", "description": "No cat", "category": None}]
+    with patch("tools.skills_tool._find_all_skills", return_value=skills):
+        from hermes_cli.banner import get_available_skills
+        result = get_available_skills()
+
+    assert "general" in result
+    assert result["general"] == ["orphan-skill"]
diff --git a/tests/tools/test_skills_tool.py b/tests/tools/test_skills_tool.py
index bd72228a..6af2c83c 100644
--- a/tests/tools/test_skills_tool.py
+++ b/tests/tools/test_skills_tool.py
@@ -374,6 +374,35 @@ class TestSkillView:
         result = json.loads(raw)
         assert result["success"] is False
 
+    def test_view_disabled_skill_blocked(self, tmp_path):
+        """Disabled skills should not be viewable via skill_view."""
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "tools.skills_tool._is_skill_disabled",
+                return_value=True,
+            ),
+        ):
+            _make_skill(tmp_path, "hidden-skill")
+            raw = skill_view("hidden-skill")
+        result = json.loads(raw)
+        assert result["success"] is False
+        assert "disabled" in result["error"].lower()
+
+    def test_view_enabled_skill_allowed(self, tmp_path):
+        """Non-disabled skills should be viewable normally."""
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "tools.skills_tool._is_skill_disabled",
+                return_value=False,
+            ),
+        ):
+            _make_skill(tmp_path, "active-skill")
+            raw = skill_view("active-skill")
+        result = json.loads(raw)
+        assert result["success"] is True
+
 
 class TestSkillViewSecureSetupOnLoad:
     def test_requests_missing_required_env_and_continues(self, tmp_path, monkeypatch):
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index 771d7684..bc31cff3 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -920,6 +920,20 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
                 ensure_ascii=False,
             )
 
+        # Check if the skill is disabled by the user
+        resolved_name = parsed_frontmatter.get("name", skill_md.parent.name)
+        if _is_skill_disabled(resolved_name):
+            return json.dumps(
+                {
+                    "success": False,
+                    "error": (
+                        f"Skill '{resolved_name}' is disabled. "
+                        "Enable it with `hermes skills` or inspect the files directly on disk."
+                    ),
+                },
+                ensure_ascii=False,
+            )
+
         # If a specific file path is requested, read that instead
         if file_path and skill_dir:
             # Security: Prevent path traversal attacks

From f814787144206c7cf852239019de30b7f1a759b7 Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 03:22:58 -0700
Subject: [PATCH 17/25] fix(banner): normalize toolset labels and use skin
 colors

- Strip '_tools' suffix from internal toolset identifiers in the banner
  (e.g. 'web_tools' -> 'web', 'homeassistant_tools' -> 'homeassistant')
- Stop appending '_tools' to unavailable toolset names
- Replace 6 hardcoded hex colors (#B8860B, #FFBF00, #FFF8DC) in toolset
  rows, overflow line, and MCP server rows with the skin variables
  (dim, accent, text) already resolved at the top of the function

Inspired by PR #1871 by @kshitijk4poor.
Adds 4 tests.
---
 hermes_cli/banner.py            | 25 ++++++++----
 tests/hermes_cli/test_banner.py | 70 +++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 7 deletions(-)
 create mode 100644 tests/hermes_cli/test_banner.py

diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index addcf98c..21c577dd 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -228,6 +228,17 @@ def _format_context_length(tokens: int) -> str:
     return str(tokens)
 
 
+def _display_toolset_name(toolset_name: str) -> str:
+    """Normalize internal/legacy toolset identifiers for banner display."""
+    if not toolset_name:
+        return "unknown"
+    return (
+        toolset_name[:-6]
+        if toolset_name.endswith("_tools")
+        else toolset_name
+    )
+
+
 def build_welcome_banner(console: Console, model: str, cwd: str,
                          tools: List[dict] = None,
                          enabled_toolsets: List[str] = None,
@@ -292,12 +303,12 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
 
     for tool in tools:
         tool_name = tool["function"]["name"]
-        toolset = get_toolset_for_tool(tool_name) or "other"
+        toolset = _display_toolset_name(get_toolset_for_tool(tool_name) or "other")
         toolsets_dict.setdefault(toolset, []).append(tool_name)
 
     for item in unavailable_toolsets:
         toolset_id = item.get("id", item.get("name", "unknown"))
-        display_name = f"{toolset_id}_tools" if not toolset_id.endswith("_tools") else toolset_id
+        display_name = _display_toolset_name(toolset_id)
         if display_name not in toolsets_dict:
             toolsets_dict[display_name] = []
         for tool_name in item.get("tools", []):
@@ -337,10 +348,10 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
                     colored_names.append(f"[{text}]{name}[/]")
             tools_str = ", ".join(colored_names)
 
-        right_lines.append(f"[dim #B8860B]{toolset}:[/] {tools_str}")
+        right_lines.append(f"[dim {dim}]{toolset}:[/] {tools_str}")
 
     if remaining_toolsets > 0:
-        right_lines.append(f"[dim #B8860B](and {remaining_toolsets} more toolsets...)[/]")
+        right_lines.append(f"[dim {dim}](and {remaining_toolsets} more toolsets...)[/]")
 
     # MCP Servers section (only if configured)
     try:
@@ -351,12 +362,12 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
 
     if mcp_status:
         right_lines.append("")
-        right_lines.append("[bold #FFBF00]MCP Servers[/]")
+        right_lines.append(f"[bold {accent}]MCP Servers[/]")
         for srv in mcp_status:
             if srv["connected"]:
                 right_lines.append(
-                    f"[dim #B8860B]{srv['name']}[/] [#FFF8DC]({srv['transport']})[/] "
-                    f"[dim #B8860B]—[/] [#FFF8DC]{srv['tools']} tool(s)[/]"
+                    f"[dim {dim}]{srv['name']}[/] [{text}]({srv['transport']})[/] "
+                    f"[dim {dim}]—[/] [{text}]{srv['tools']} tool(s)[/]"
                 )
             else:
                 right_lines.append(
diff --git a/tests/hermes_cli/test_banner.py b/tests/hermes_cli/test_banner.py
new file mode 100644
index 00000000..4ea089fd
--- /dev/null
+++ b/tests/hermes_cli/test_banner.py
@@ -0,0 +1,70 @@
+"""Tests for banner toolset name normalization and skin color usage."""
+
+from unittest.mock import patch
+
+from rich.console import Console
+
+import hermes_cli.banner as banner
+import model_tools
+import tools.mcp_tool
+
+
+def test_display_toolset_name_strips_legacy_suffix():
+    assert banner._display_toolset_name("homeassistant_tools") == "homeassistant"
+    assert banner._display_toolset_name("honcho_tools") == "honcho"
+    assert banner._display_toolset_name("web_tools") == "web"
+
+
+def test_display_toolset_name_preserves_clean_names():
+    assert banner._display_toolset_name("browser") == "browser"
+    assert banner._display_toolset_name("file") == "file"
+    assert banner._display_toolset_name("terminal") == "terminal"
+
+
+def test_display_toolset_name_handles_empty():
+    assert banner._display_toolset_name("") == "unknown"
+    assert banner._display_toolset_name(None) == "unknown"
+
+
+def test_build_welcome_banner_uses_normalized_toolset_names():
+    """Unavailable toolsets should not have '_tools' appended in banner output."""
+    with (
+        patch.object(
+            model_tools,
+            "check_tool_availability",
+            return_value=(
+                ["web"],
+                [
+                    {"name": "homeassistant", "tools": ["ha_call_service"]},
+                    {"name": "honcho", "tools": ["honcho_conclude"]},
+                ],
+            ),
+        ),
+        patch.object(banner, "get_available_skills", return_value={}),
+        patch.object(banner, "get_update_result", return_value=None),
+        patch.object(tools.mcp_tool, "get_mcp_status", return_value=[]),
+    ):
+        console = Console(
+            record=True, force_terminal=False, color_system=None, width=160
+        )
+        banner.build_welcome_banner(
+            console=console,
+            model="anthropic/test-model",
+            cwd="/tmp/project",
+            tools=[
+                {"function": {"name": "web_search"}},
+                {"function": {"name": "read_file"}},
+            ],
+            get_toolset_for_tool=lambda name: {
+                "web_search": "web_tools",
+                "read_file": "file",
+            }.get(name),
+        )
+
+    output = console.export_text()
+    assert "homeassistant:" in output
+    assert "honcho:" in output
+    assert "web:" in output
+    assert "homeassistant_tools:" not in output
+    assert "honcho_tools:" not in output
+    assert "web_tools:" not in output

From 00cc0c6a286de96414f46982f1a656f990e37b1d Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 03:23:20 -0700
Subject: [PATCH 18/25] feat: add gpt-5.4-mini, gpt-5.4-nano, healer-alpha to
 OpenRouter catalog

---
 hermes_cli/models.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 5a3f871f..c8ef070c 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -20,6 +20,8 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("anthropic/claude-sonnet-4.5",     ""),
     ("openai/gpt-5.4-pro",              ""),
     ("openai/gpt-5.4",                  ""),
+    ("openai/gpt-5.4-mini",             ""),
+    ("openai/gpt-5.4-nano",             ""),
     ("openai/gpt-5.3-codex",            ""),
     ("google/gemini-3-pro-preview",     ""),
     ("google/gemini-3-flash-preview",   ""),
@@ -34,6 +36,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("arcee-ai/trinity-large-preview:free", "free"),
     ("z-ai/glm-5-turbo",                ""),
     ("openrouter/hunter-alpha",          ""),
+    ("openrouter/healer-alpha",          ""),
 ]
 
 _PROVIDER_MODELS: dict[str, list[str]] = {

From c0c14e60b478b0908b0d968cbb58b7fad8cd22f2 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 03:25:38 -0700
Subject: [PATCH 19/25] fix: make concurrent tool batching path-aware for file
 mutations (#1914)

* Improve tool batching independence checks

* fix: address review feedback on path-aware batching

- Log malformed/non-dict tool arguments at debug level before
  falling back to sequential, instead of silently swallowing
  the error into an empty dict
- Guard empty paths in _paths_overlap (unreachable in practice
  due to upstream filtering, but makes the invariant explicit)
- Add tests: malformed JSON args, non-dict args, _paths_overlap
  unit tests including empty path edge cases
- web_crawl is not a registered tool (only web_search/web_extract
  are); no addition needed to _PARALLEL_SAFE_TOOLS

---------

Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
---
 run_agent.py            | 100 +++++++++++++++++++++++++++++---
 tests/test_run_agent.py | 123 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 215 insertions(+), 8 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index f0e8f25d..fee04f39 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -203,6 +203,27 @@ class IterationBudget:
 # When any of these appear in a batch, we fall back to sequential execution.
 _NEVER_PARALLEL_TOOLS = frozenset({"clarify"})
 
+# Read-only tools with no shared mutable session state.
+_PARALLEL_SAFE_TOOLS = frozenset({
+    "ha_get_state",
+    "ha_list_entities",
+    "ha_list_services",
+    "honcho_context",
+    "honcho_profile",
+    "honcho_search",
+    "read_file",
+    "search_files",
+    "session_search",
+    "skill_view",
+    "skills_list",
+    "vision_analyze",
+    "web_extract",
+    "web_search",
+})
+
+# File tools can run concurrently when they target independent paths.
+_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
+
 # Maximum number of concurrent worker threads for parallel tool execution.
 _MAX_TOOL_WORKERS = 8
 
@@ -234,6 +255,74 @@ def _is_destructive_command(cmd: str) -> bool:
     return False
 
 
+def _should_parallelize_tool_batch(tool_calls) -> bool:
+    """Return True when a tool-call batch is safe to run concurrently."""
+    if len(tool_calls) <= 1:
+        return False
+
+    tool_names = [tc.function.name for tc in tool_calls]
+    if any(name in _NEVER_PARALLEL_TOOLS for name in tool_names):
+        return False
+
+    reserved_paths: list[Path] = []
+    for tool_call in tool_calls:
+        tool_name = tool_call.function.name
+        try:
+            function_args = json.loads(tool_call.function.arguments)
+        except Exception:
+            logging.debug(
+                "Could not parse args for %s — defaulting to sequential; raw=%s",
+                tool_name,
+                tool_call.function.arguments[:200],
+            )
+            return False
+        if not isinstance(function_args, dict):
+            logging.debug(
+                "Non-dict args for %s (%s) — defaulting to sequential",
+                tool_name,
+                type(function_args).__name__,
+            )
+            return False
+
+        if tool_name in _PATH_SCOPED_TOOLS:
+            scoped_path = _extract_parallel_scope_path(tool_name, function_args)
+            if scoped_path is None:
+                return False
+            if any(_paths_overlap(scoped_path, existing) for existing in reserved_paths):
+                return False
+            reserved_paths.append(scoped_path)
+            continue
+
+        if tool_name not in _PARALLEL_SAFE_TOOLS:
+            return False
+
+    return True
+
+
+def _extract_parallel_scope_path(tool_name: str, function_args: dict) -> Path | None:
+    """Return the normalized file target for path-scoped tools."""
+    if tool_name not in _PATH_SCOPED_TOOLS:
+        return None
+
+    raw_path = function_args.get("path")
+    if not isinstance(raw_path, str) or not raw_path.strip():
+        return None
+
+    # Avoid resolve(); the file may not exist yet.
+    return Path(raw_path).expanduser()
+
+
+def _paths_overlap(left: Path, right: Path) -> bool:
+    """Return True when two paths may refer to the same subtree."""
+    left_parts = left.parts
+    right_parts = right.parts
+    if not left_parts or not right_parts:
+        # Empty paths shouldn't reach here (guarded upstream), but be safe.
+        return bool(left_parts) == bool(right_parts) and bool(left_parts)
+    common_len = min(len(left_parts), len(right_parts))
+    return left_parts[:common_len] == right_parts[:common_len]
+
+
 def _inject_honcho_turn_context(content, turn_context: str):
     """Append Honcho recall to the current-turn user message without mutating history.
 
@@ -4078,20 +4167,17 @@ class AIAgent:
     def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
         """Execute tool calls from the assistant message and append results to messages.
 
-        Dispatches to concurrent execution when multiple independent tool calls
-        are present, falling back to sequential execution for single calls or
-        when interactive tools (e.g. clarify) are in the batch.
+        Dispatches to concurrent execution only for batches that look
+        independent: read-only tools may always share the parallel path, while
+        file reads/writes may do so only when their target paths do not overlap.
         """
         tool_calls = assistant_message.tool_calls
 
-        # Single tool call or interactive tool present → sequential
-        if (len(tool_calls) <= 1
-                or any(tc.function.name in _NEVER_PARALLEL_TOOLS for tc in tool_calls)):
+        if not _should_parallelize_tool_batch(tool_calls):
             return self._execute_tool_calls_sequential(
                 assistant_message, messages, effective_task_id, api_call_count
             )
 
-        # Multiple non-interactive tools → concurrent
         return self._execute_tool_calls_concurrent(
             assistant_message, messages, effective_task_id, api_call_count
         )
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index ec9b26f3..50b3a509 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -806,7 +806,7 @@ class TestConcurrentToolExecution:
                 mock_con.assert_not_called()
 
     def test_multiple_tools_uses_concurrent_path(self, agent):
-        """Multiple non-interactive tools should use concurrent path."""
+        """Multiple read-only tools should use concurrent path."""
         tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
         tc2 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c2")
         mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
@@ -817,6 +817,94 @@ class TestConcurrentToolExecution:
                 mock_con.assert_called_once()
                 mock_seq.assert_not_called()
 
+    def test_terminal_batch_forces_sequential(self, agent):
+        """Stateful tools should not share the concurrent execution path."""
+        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+        tc2 = _mock_tool_call(name="terminal", arguments='{"command":"pwd"}', call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_seq.assert_called_once()
+                mock_con.assert_not_called()
+
+    def test_write_batch_forces_sequential(self, agent):
+        """File mutations should stay ordered within a turn."""
+        tc1 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c1")
+        tc2 = _mock_tool_call(name="write_file", arguments='{"path":"x.py","content":"print(1)"}', call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_seq.assert_called_once()
+                mock_con.assert_not_called()
+
+    def test_disjoint_write_batch_uses_concurrent_path(self, agent):
+        """Independent file writes should still run concurrently."""
+        tc1 = _mock_tool_call(
+            name="write_file",
+            arguments='{"path":"src/a.py","content":"print(1)"}',
+            call_id="c1",
+        )
+        tc2 = _mock_tool_call(
+            name="write_file",
+            arguments='{"path":"src/b.py","content":"print(2)"}',
+            call_id="c2",
+        )
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_con.assert_called_once()
+                mock_seq.assert_not_called()
+
+    def test_overlapping_write_batch_forces_sequential(self, agent):
+        """Writes to the same file must stay ordered."""
+        tc1 = _mock_tool_call(
+            name="write_file",
+            arguments='{"path":"src/a.py","content":"print(1)"}',
+            call_id="c1",
+        )
+        tc2 = _mock_tool_call(
+            name="patch",
+            arguments='{"path":"src/a.py","old_string":"1","new_string":"2"}',
+            call_id="c2",
+        )
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_seq.assert_called_once()
+                mock_con.assert_not_called()
+
+    def test_malformed_json_args_forces_sequential(self, agent):
+        """Unparseable tool arguments should fall back to sequential."""
+        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+        tc2 = _mock_tool_call(name="web_search", arguments="NOT JSON {{{", call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_seq.assert_called_once()
+                mock_con.assert_not_called()
+
+    def test_non_dict_args_forces_sequential(self, agent):
+        """Tool arguments that parse to a non-dict type should fall back to sequential."""
+        tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
+        tc2 = _mock_tool_call(name="web_search", arguments='"just a string"', call_id="c2")
+        mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
+        messages = []
+        with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
+            with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
+                agent._execute_tool_calls(mock_msg, messages, "task-1")
+                mock_seq.assert_called_once()
+                mock_con.assert_not_called()
+
     def test_concurrent_executes_all_tools(self, agent):
         """Concurrent path should execute all tools and append results in order."""
         tc1 = _mock_tool_call(name="web_search", arguments='{"q":"alpha"}', call_id="c1")
@@ -943,6 +1031,39 @@ class TestConcurrentToolExecution:
         assert "ok" in result
 
 
+class TestPathsOverlap:
+    """Unit tests for the _paths_overlap helper."""
+
+    def test_same_path_overlaps(self):
+        from run_agent import _paths_overlap
+        assert _paths_overlap(Path("src/a.py"), Path("src/a.py"))
+
+    def test_siblings_do_not_overlap(self):
+        from run_agent import _paths_overlap
+        assert not _paths_overlap(Path("src/a.py"), Path("src/b.py"))
+
+    def test_parent_child_overlap(self):
+        from run_agent import _paths_overlap
+        assert _paths_overlap(Path("src"), Path("src/sub/a.py"))
+
+    def test_different_roots_do_not_overlap(self):
+        from run_agent import _paths_overlap
+        assert not _paths_overlap(Path("src/a.py"), Path("other/a.py"))
+
+    def test_nested_vs_flat_do_not_overlap(self):
+        from run_agent import _paths_overlap
+        assert not _paths_overlap(Path("src/sub/a.py"), Path("src/a.py"))
+
+    def test_empty_paths_do_not_overlap(self):
+        from run_agent import _paths_overlap
+        assert not _paths_overlap(Path(""), Path(""))
+
+    def test_one_empty_path_does_not_overlap(self):
+        from run_agent import _paths_overlap
+        assert not _paths_overlap(Path(""), Path("src/a.py"))
+        assert not _paths_overlap(Path("src/a.py"), Path(""))
+
+
 class TestHandleMaxIterations:
     def test_returns_summary(self, agent):
         resp = _mock_response(content="Here is a summary of what I did.")

From cb54750e07786568d6040d21c0d706d55e0dfdd7 Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 03:26:06 -0700
Subject: [PATCH 20/25] feat: reorder OpenRouter catalog, add haiku-4.5, fix
 minimax slug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add anthropic/claude-haiku-4.5
- Move gpt-5.4-pro and gpt-5.4-nano to bottom
- Fix minimax/minimax-m2.7 → minimax-m2.5 (m2.7 not on OpenRouter)
- Tag hunter-alpha and healer-alpha as free
- Place hunter/healer-alpha right below gpt-5.4-mini
---
 hermes_cli/models.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index c8ef070c..ca51e659 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -18,10 +18,11 @@ from typing import Any, Optional
 OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("anthropic/claude-opus-4.6",       "recommended"),
     ("anthropic/claude-sonnet-4.5",     ""),
-    ("openai/gpt-5.4-pro",              ""),
+    ("anthropic/claude-haiku-4.5",      ""),
     ("openai/gpt-5.4",                  ""),
     ("openai/gpt-5.4-mini",             ""),
-    ("openai/gpt-5.4-nano",             ""),
+    ("openrouter/hunter-alpha",          "free"),
+    ("openrouter/healer-alpha",          "free"),
     ("openai/gpt-5.3-codex",            ""),
     ("google/gemini-3-pro-preview",     ""),
     ("google/gemini-3-flash-preview",   ""),
@@ -30,13 +31,13 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("stepfun/step-3.5-flash",          ""),
     ("z-ai/glm-5",                      ""),
     ("moonshotai/kimi-k2.5",            ""),
-    ("minimax/minimax-m2.7",            ""),
+    ("minimax/minimax-m2.5",            ""),
     ("x-ai/grok-4.20-beta",             ""),
     ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
     ("arcee-ai/trinity-large-preview:free", "free"),
     ("z-ai/glm-5-turbo",                ""),
-    ("openrouter/hunter-alpha",          ""),
-    ("openrouter/healer-alpha",          ""),
+    ("openai/gpt-5.4-pro",              ""),
+    ("openai/gpt-5.4-nano",             ""),
 ]
 
 _PROVIDER_MODELS: dict[str, list[str]] = {

From b05f9b62564c42bd11eeeaef0b232864a78a1ed0 Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 03:31:04 -0700
Subject: [PATCH 21/25] =?UTF-8?q?chore:=20reorder=20OpenRouter=20catalog?=
 =?UTF-8?q?=20=E2=80=94=20glm-5-turbo=20under=20glm-5,=20minimax=20under?=
 =?UTF-8?q?=20stepfun?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 hermes_cli/models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index ca51e659..f0bb54c9 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -29,13 +29,13 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("qwen/qwen3.5-plus-02-15",         ""),
     ("qwen/qwen3.5-35b-a3b",            ""),
     ("stepfun/step-3.5-flash",          ""),
-    ("z-ai/glm-5",                      ""),
-    ("moonshotai/kimi-k2.5",            ""),
     ("minimax/minimax-m2.5",            ""),
+    ("z-ai/glm-5",                      ""),
+    ("z-ai/glm-5-turbo",                ""),
+    ("moonshotai/kimi-k2.5",            ""),
     ("x-ai/grok-4.20-beta",             ""),
     ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
     ("arcee-ai/trinity-large-preview:free", "free"),
-    ("z-ai/glm-5-turbo",                ""),
     ("openai/gpt-5.4-pro",              ""),
     ("openai/gpt-5.4-nano",             ""),
 ]

From c1750bb32d6666e77482009d0aa47ca69b38a18a Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 03:49:49 -0700
Subject: [PATCH 22/25] feat(cli): add /statusbar command to toggle context bar

Adds /statusbar (alias /sb) to show/hide the bottom status bar that
displays model name, context usage, and session duration.

Uses ConditionalContainer so the bar takes zero space when hidden
rather than leaving a blank line.
---
 cli.py                 | 18 +++++++++++++++---
 hermes_cli/commands.py |  2 ++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/cli.py b/cli.py
index 18620142..e61f5a0f 100755
--- a/cli.py
+++ b/cli.py
@@ -1215,6 +1215,9 @@ class HermesCLI:
         self._voice_tts_done = threading.Event()
         self._voice_tts_done.set()
 
+        # Status bar visibility (toggled via /statusbar)
+        self._status_bar_visible = True
+
         # Background task tracking: {task_id: threading.Thread}
         self._background_tasks: Dict[str, threading.Thread] = {}
         self._background_task_counter = 0
@@ -1322,6 +1325,8 @@ class HermesCLI:
             return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}"
 
     def _get_status_bar_fragments(self):
+        if not self._status_bar_visible:
+            return []
         try:
             snapshot = self._get_status_bar_snapshot()
             width = shutil.get_terminal_size((80, 24)).columns
@@ -3551,6 +3556,10 @@ class HermesCLI:
                 self._handle_skills_command(cmd_original)
         elif canonical == "platforms":
             self._show_gateway_status()
+        elif canonical == "statusbar":
+            self._status_bar_visible = not self._status_bar_visible
+            state = "visible" if self._status_bar_visible else "hidden"
+            self.console.print(f"  Status bar {state}")
         elif canonical == "verbose":
             self._toggle_verbose()
         elif canonical == "reasoning":
@@ -6587,9 +6596,12 @@ class HermesCLI:
             filter=Condition(lambda: cli_ref._voice_mode),
         )
 
-        status_bar = Window(
-            content=FormattedTextControl(lambda: cli_ref._get_status_bar_fragments()),
-            height=1,
+        status_bar = ConditionalContainer(
+            Window(
+                content=FormattedTextControl(lambda: cli_ref._get_status_bar_fragments()),
+                height=1,
+            ),
+            filter=Condition(lambda: cli_ref._status_bar_visible),
         )
 
         # Layout: interactive prompt widgets + ruled input at bottom.
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index fca97ebe..7ea34941 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -81,6 +81,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
                cli_only=True, args_hint="[text]", subcommands=("clear",)),
     CommandDef("personality", "Set a predefined personality", "Configuration",
                args_hint="[name]"),
+    CommandDef("statusbar", "Toggle the context/model status bar", "Configuration",
+               cli_only=True, aliases=("sb",)),
     CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
                "Configuration", cli_only=True),
     CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",

From 0e2714acea308493499ff337f500858a8565cb25 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 04:06:02 -0700
Subject: [PATCH 23/25] fix(cron): recover recent one-shot jobs (#1918)

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
---
 cron/jobs.py            | 53 +++++++++++++++++++++++++--
 tests/cron/test_jobs.py | 81 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 129 insertions(+), 5 deletions(-)

diff --git a/cron/jobs.py b/cron/jobs.py
index 30d20f1e..da4382cb 100644
--- a/cron/jobs.py
+++ b/cron/jobs.py
@@ -34,6 +34,7 @@ HERMES_DIR = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
 CRON_DIR = HERMES_DIR / "cron"
 JOBS_FILE = CRON_DIR / "jobs.json"
 OUTPUT_DIR = CRON_DIR / "output"
+ONESHOT_GRACE_SECONDS = 120
 
 
 def _normalize_skill_list(skill: Optional[str] = None, skills: Optional[Any] = None) -> List[str]:
@@ -220,6 +221,33 @@ def _ensure_aware(dt: datetime) -> datetime:
     return dt.astimezone(target_tz)
 
 
+def _recoverable_oneshot_run_at(
+    schedule: Dict[str, Any],
+    now: datetime,
+    *,
+    last_run_at: Optional[str] = None,
+) -> Optional[str]:
+    """Return a one-shot run time if it is still eligible to fire.
+
+    One-shot jobs get a small grace window so jobs created a few seconds after
+    their requested minute still run on the next tick. Once a one-shot has
+    already run, it is never eligible again.
+    """
+    if schedule.get("kind") != "once":
+        return None
+    if last_run_at:
+        return None
+
+    run_at = schedule.get("run_at")
+    if not run_at:
+        return None
+
+    run_at_dt = _ensure_aware(datetime.fromisoformat(run_at))
+    if run_at_dt >= now - timedelta(seconds=ONESHOT_GRACE_SECONDS):
+        return run_at
+    return None
+
+
 def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None) -> Optional[str]:
     """
     Compute the next run time for a schedule.
@@ -229,9 +257,7 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
     now = _hermes_now()
 
     if schedule["kind"] == "once":
-        run_at = _ensure_aware(datetime.fromisoformat(schedule["run_at"]))
-        # If in the future, return it; if in the past, no more runs
-        return schedule["run_at"] if run_at > now else None
+        return _recoverable_oneshot_run_at(schedule, now, last_run_at=last_run_at)
 
     elif schedule["kind"] == "interval":
         minutes = schedule["minutes"]
@@ -555,7 +581,26 @@ def get_due_jobs() -> List[Dict[str, Any]]:
 
         next_run = job.get("next_run_at")
         if not next_run:
-            continue
+            recovered_next = _recoverable_oneshot_run_at(
+                job.get("schedule", {}),
+                now,
+                last_run_at=job.get("last_run_at"),
+            )
+            if not recovered_next:
+                continue
+
+            job["next_run_at"] = recovered_next
+            next_run = recovered_next
+            logger.info(
+                "Job '%s' had no next_run_at; recovering one-shot run at %s",
+                job.get("name", job["id"]),
+                recovered_next,
+            )
+            for rj in raw_jobs:
+                if rj["id"] == job["id"]:
+                    rj["next_run_at"] = recovered_next
+                    needs_save = True
+                    break
 
         next_run_dt = _ensure_aware(datetime.fromisoformat(next_run))
         if next_run_dt <= now:
diff --git a/tests/cron/test_jobs.py b/tests/cron/test_jobs.py
index 31565e67..e0e80fe8 100644
--- a/tests/cron/test_jobs.py
+++ b/tests/cron/test_jobs.py
@@ -2,7 +2,7 @@
 
 import json
 import pytest
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from unittest.mock import patch
 
@@ -122,11 +122,29 @@ class TestComputeNextRun:
         schedule = {"kind": "once", "run_at": future}
         assert compute_next_run(schedule) == future
 
+    def test_once_recent_past_within_grace_returns_time(self, monkeypatch):
+        now = datetime(2026, 3, 18, 4, 22, 3, tzinfo=timezone.utc)
+        run_at = "2026-03-18T04:22:00+00:00"
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        schedule = {"kind": "once", "run_at": run_at}
+
+        assert compute_next_run(schedule) == run_at
+
     def test_once_past_returns_none(self):
         past = (datetime.now() - timedelta(hours=1)).isoformat()
         schedule = {"kind": "once", "run_at": past}
         assert compute_next_run(schedule) is None
 
+    def test_once_with_last_run_returns_none_even_within_grace(self, monkeypatch):
+        now = datetime(2026, 3, 18, 4, 22, 3, tzinfo=timezone.utc)
+        run_at = "2026-03-18T04:22:00+00:00"
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        schedule = {"kind": "once", "run_at": run_at}
+
+        assert compute_next_run(schedule, last_run_at=now.isoformat()) is None
+
     def test_interval_first_run(self):
         schedule = {"kind": "interval", "minutes": 60}
         result = compute_next_run(schedule)
@@ -347,6 +365,67 @@ class TestGetDueJobs:
         due = get_due_jobs()
         assert len(due) == 0
 
+    def test_broken_recent_one_shot_without_next_run_is_recovered(self, tmp_cron_dir, monkeypatch):
+        now = datetime(2026, 3, 18, 4, 22, 30, tzinfo=timezone.utc)
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        run_at = "2026-03-18T04:22:00+00:00"
+        save_jobs(
+            [{
+                "id": "oneshot-recover",
+                "name": "Recover me",
+                "prompt": "Word of the day",
+                "schedule": {"kind": "once", "run_at": run_at, "display": "once at 2026-03-18 04:22"},
+                "schedule_display": "once at 2026-03-18 04:22",
+                "repeat": {"times": 1, "completed": 0},
+                "enabled": True,
+                "state": "scheduled",
+                "paused_at": None,
+                "paused_reason": None,
+                "created_at": "2026-03-18T04:21:00+00:00",
+                "next_run_at": None,
+                "last_run_at": None,
+                "last_status": None,
+                "last_error": None,
+                "deliver": "local",
+                "origin": None,
+            }]
+        )
+
+        due = get_due_jobs()
+
+        assert [job["id"] for job in due] == ["oneshot-recover"]
+        assert get_job("oneshot-recover")["next_run_at"] == run_at
+
+    def test_broken_stale_one_shot_without_next_run_is_not_recovered(self, tmp_cron_dir, monkeypatch):
+        now = datetime(2026, 3, 18, 4, 30, 0, tzinfo=timezone.utc)
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        save_jobs(
+            [{
+                "id": "oneshot-stale",
+                "name": "Too old",
+                "prompt": "Word of the day",
+                "schedule": {"kind": "once", "run_at": "2026-03-18T04:22:00+00:00", "display": "once at 2026-03-18 04:22"},
+                "schedule_display": "once at 2026-03-18 04:22",
+                "repeat": {"times": 1, "completed": 0},
+                "enabled": True,
+                "state": "scheduled",
+                "paused_at": None,
+                "paused_reason": None,
+                "created_at": "2026-03-18T04:21:00+00:00",
+                "next_run_at": None,
+                "last_run_at": None,
+                "last_status": None,
+                "last_error": None,
+                "deliver": "local",
+                "origin": None,
+            }]
+        )
+
+        assert get_due_jobs() == []
+        assert get_job("oneshot-stale")["next_run_at"] is None
+
 
 class TestSaveJobOutput:
     def test_creates_output_file(self, tmp_cron_dir):

From 0a247a50f2039e4474a3a36ee6b19a481803f629 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 04:06:08 -0700
Subject: [PATCH 24/25] feat: support ignoring unauthorized gateway DMs (#1919)

Add unauthorized_dm_behavior config (pair|ignore) with global default
and per-platform override. WhatsApp can silently drop unknown DMs
instead of sending pairing codes.

Adapted config bridging to work with gw_data dict (pre-construction)
rather than config object. Dropped implementation plan document.

Co-authored-by: Frederico Ribeiro <fr@tecompanytea.com>
---
 gateway/config.py                             |  61 ++++++++
 gateway/run.py                                |   9 +-
 tests/gateway/test_config.py                  |  34 +++++
 .../gateway/test_unauthorized_dm_behavior.py  | 137 ++++++++++++++++++
 website/docs/user-guide/configuration.md      |  15 ++
 website/docs/user-guide/messaging/whatsapp.md |  20 +++
 website/docs/user-guide/security.md           |  13 ++
 7 files changed, 288 insertions(+), 1 deletion(-)
 create mode 100644 tests/gateway/test_unauthorized_dm_behavior.py

diff --git a/gateway/config.py b/gateway/config.py
index 55a811aa..242111dd 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -32,6 +32,15 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
     return bool(value)
 
 
+def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str:
+    """Normalize unauthorized DM behavior to a supported value."""
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in {"pair", "ignore"}:
+            return normalized
+    return default
+
+
 class Platform(Enum):
     """Supported messaging platforms."""
     LOCAL = "local"
@@ -215,6 +224,9 @@ class GatewayConfig:
     # Session isolation in shared chats
     group_sessions_per_user: bool = True  # Isolate group/channel sessions per participant when user IDs are available
 
+    # Unauthorized DM policy
+    unauthorized_dm_behavior: str = "pair"  # "pair" or "ignore"
+
     # Streaming configuration
     streaming: StreamingConfig = field(default_factory=StreamingConfig)
 
@@ -289,6 +301,7 @@ class GatewayConfig:
             "always_log_local": self.always_log_local,
             "stt_enabled": self.stt_enabled,
             "group_sessions_per_user": self.group_sessions_per_user,
+            "unauthorized_dm_behavior": self.unauthorized_dm_behavior,
             "streaming": self.streaming.to_dict(),
         }
     
@@ -331,6 +344,10 @@ class GatewayConfig:
             stt_enabled = data.get("stt", {}).get("enabled") if isinstance(data.get("stt"), dict) else None
 
         group_sessions_per_user = data.get("group_sessions_per_user")
+        unauthorized_dm_behavior = _normalize_unauthorized_dm_behavior(
+            data.get("unauthorized_dm_behavior"),
+            "pair",
+        )
 
         return cls(
             platforms=platforms,
@@ -343,9 +360,21 @@ class GatewayConfig:
             always_log_local=data.get("always_log_local", True),
             stt_enabled=_coerce_bool(stt_enabled, True),
             group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
+            unauthorized_dm_behavior=unauthorized_dm_behavior,
             streaming=StreamingConfig.from_dict(data.get("streaming", {})),
         )
 
+    def get_unauthorized_dm_behavior(self, platform: Optional[Platform] = None) -> str:
+        """Return the effective unauthorized-DM behavior for a platform."""
+        if platform:
+            platform_cfg = self.platforms.get(platform)
+            if platform_cfg and "unauthorized_dm_behavior" in platform_cfg.extra:
+                return _normalize_unauthorized_dm_behavior(
+                    platform_cfg.extra.get("unauthorized_dm_behavior"),
+                    self.unauthorized_dm_behavior,
+                )
+        return self.unauthorized_dm_behavior
+
 
 def load_gateway_config() -> GatewayConfig:
     """
@@ -416,6 +445,38 @@ def load_gateway_config() -> GatewayConfig:
             if "always_log_local" in yaml_cfg:
                 gw_data["always_log_local"] = yaml_cfg["always_log_local"]
 
+            if "unauthorized_dm_behavior" in yaml_cfg:
+                gw_data["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
+                    yaml_cfg.get("unauthorized_dm_behavior"),
+                    "pair",
+                )
+
+            # Bridge per-platform unauthorized_dm_behavior from config.yaml
+            platforms_data = gw_data.setdefault("platforms", {})
+            if not isinstance(platforms_data, dict):
+                platforms_data = {}
+                gw_data["platforms"] = platforms_data
+            for plat in Platform:
+                if plat == Platform.LOCAL:
+                    continue
+                platform_cfg = yaml_cfg.get(plat.value)
+                if not isinstance(platform_cfg, dict):
+                    continue
+                if "unauthorized_dm_behavior" not in platform_cfg:
+                    continue
+                plat_data = platforms_data.setdefault(plat.value, {})
+                if not isinstance(plat_data, dict):
+                    plat_data = {}
+                    platforms_data[plat.value] = plat_data
+                extra = plat_data.setdefault("extra", {})
+                if not isinstance(extra, dict):
+                    extra = {}
+                    plat_data["extra"] = extra
+                extra["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
+                    platform_cfg.get("unauthorized_dm_behavior"),
+                    gw_data.get("unauthorized_dm_behavior", "pair"),
+                )
+
             # Discord settings → env vars (env vars take precedence)
             discord_cfg = yaml_cfg.get("discord", {})
             if isinstance(discord_cfg, dict):
diff --git a/gateway/run.py b/gateway/run.py
index 62d16e68..95663cb9 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1257,6 +1257,13 @@ class GatewayRunner:
         if "@" in user_id:
             check_ids.add(user_id.split("@")[0])
         return bool(check_ids & allowed_ids)
+
+    def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str:
+        """Return how unauthorized DMs should be handled for a platform."""
+        config = getattr(self, "config", None)
+        if config and hasattr(config, "get_unauthorized_dm_behavior"):
+            return config.get_unauthorized_dm_behavior(platform)
+        return "pair"
     
     async def _handle_message(self, event: MessageEvent) -> Optional[str]:
         """
@@ -1277,7 +1284,7 @@ class GatewayRunner:
         if not self._is_user_authorized(source):
             logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
             # In DMs: offer pairing code. In groups: silently ignore.
-            if source.chat_type == "dm":
+            if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair":
                 platform_name = source.platform.value if source.platform else "unknown"
                 code = self.pairing_store.generate_code(
                     platform_name, source.user_id, source.user_name or ""
diff --git a/tests/gateway/test_config.py b/tests/gateway/test_config.py
index 363118b3..8dbb725d 100644
--- a/tests/gateway/test_config.py
+++ b/tests/gateway/test_config.py
@@ -115,6 +115,22 @@ class TestGatewayConfigRoundtrip:
         assert restored.quick_commands == {"limits": {"type": "exec", "command": "echo ok"}}
         assert restored.group_sessions_per_user is False
 
+    def test_roundtrip_preserves_unauthorized_dm_behavior(self):
+        config = GatewayConfig(
+            unauthorized_dm_behavior="ignore",
+            platforms={
+                Platform.WHATSAPP: PlatformConfig(
+                    enabled=True,
+                    extra={"unauthorized_dm_behavior": "pair"},
+                ),
+            },
+        )
+
+        restored = GatewayConfig.from_dict(config.to_dict())
+
+        assert restored.unauthorized_dm_behavior == "ignore"
+        assert restored.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"
+
 
 class TestLoadGatewayConfig:
     def test_bridges_quick_commands_from_config_yaml(self, tmp_path, monkeypatch):
@@ -158,3 +174,21 @@ class TestLoadGatewayConfig:
         config = load_gateway_config()
 
         assert config.quick_commands == {}
+
+    def test_bridges_unauthorized_dm_behavior_from_config_yaml(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "unauthorized_dm_behavior: ignore\n"
+            "whatsapp:\n"
+            "  unauthorized_dm_behavior: pair\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        assert config.unauthorized_dm_behavior == "ignore"
+        assert config.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"
diff --git a/tests/gateway/test_unauthorized_dm_behavior.py b/tests/gateway/test_unauthorized_dm_behavior.py
new file mode 100644
index 00000000..0dbe457a
--- /dev/null
+++ b/tests/gateway/test_unauthorized_dm_behavior.py
@@ -0,0 +1,137 @@
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionSource
+
+
+def _clear_auth_env(monkeypatch) -> None:
+    for key in (
+        "TELEGRAM_ALLOWED_USERS",
+        "DISCORD_ALLOWED_USERS",
+        "WHATSAPP_ALLOWED_USERS",
+        "SLACK_ALLOWED_USERS",
+        "SIGNAL_ALLOWED_USERS",
+        "EMAIL_ALLOWED_USERS",
+        "SMS_ALLOWED_USERS",
+        "MATTERMOST_ALLOWED_USERS",
+        "MATRIX_ALLOWED_USERS",
+        "DINGTALK_ALLOWED_USERS",
+        "GATEWAY_ALLOWED_USERS",
+        "TELEGRAM_ALLOW_ALL_USERS",
+        "DISCORD_ALLOW_ALL_USERS",
+        "WHATSAPP_ALLOW_ALL_USERS",
+        "SLACK_ALLOW_ALL_USERS",
+        "SIGNAL_ALLOW_ALL_USERS",
+        "EMAIL_ALLOW_ALL_USERS",
+        "SMS_ALLOW_ALL_USERS",
+        "MATTERMOST_ALLOW_ALL_USERS",
+        "MATRIX_ALLOW_ALL_USERS",
+        "DINGTALK_ALLOW_ALL_USERS",
+        "GATEWAY_ALLOW_ALL_USERS",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
+def _make_event(platform: Platform, user_id: str, chat_id: str) -> MessageEvent:
+    return MessageEvent(
+        text="hello",
+        message_id="m1",
+        source=SessionSource(
+            platform=platform,
+            user_id=user_id,
+            chat_id=chat_id,
+            user_name="tester",
+            chat_type="dm",
+        ),
+    )
+
+
+def _make_runner(platform: Platform, config: GatewayConfig):
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = config
+    adapter = SimpleNamespace(send=AsyncMock())
+    runner.adapters = {platform: adapter}
+    runner.pairing_store = MagicMock()
+    runner.pairing_store.is_approved.return_value = False
+    return runner, adapter
+
+
+@pytest.mark.asyncio
+async def test_unauthorized_dm_pairs_by_default(monkeypatch):
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        platforms={Platform.WHATSAPP: PlatformConfig(enabled=True)},
+    )
+    runner, adapter = _make_runner(Platform.WHATSAPP, config)
+    runner.pairing_store.generate_code.return_value = "ABC12DEF"
+
+    result = await runner._handle_message(
+        _make_event(
+            Platform.WHATSAPP,
+            "15551234567@s.whatsapp.net",
+            "15551234567@s.whatsapp.net",
+        )
+    )
+
+    assert result is None
+    runner.pairing_store.generate_code.assert_called_once_with(
+        "whatsapp",
+        "15551234567@s.whatsapp.net",
+        "tester",
+    )
+    adapter.send.assert_awaited_once()
+    assert "ABC12DEF" in adapter.send.await_args.args[1]
+
+
+@pytest.mark.asyncio
+async def test_unauthorized_whatsapp_dm_can_be_ignored(monkeypatch):
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        platforms={
+            Platform.WHATSAPP: PlatformConfig(
+                enabled=True,
+                extra={"unauthorized_dm_behavior": "ignore"},
+            ),
+        },
+    )
+    runner, adapter = _make_runner(Platform.WHATSAPP, config)
+
+    result = await runner._handle_message(
+        _make_event(
+            Platform.WHATSAPP,
+            "15551234567@s.whatsapp.net",
+            "15551234567@s.whatsapp.net",
+        )
+    )
+
+    assert result is None
+    runner.pairing_store.generate_code.assert_not_called()
+    adapter.send.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_global_ignore_suppresses_pairing_reply(monkeypatch):
+    _clear_auth_env(monkeypatch)
+    config = GatewayConfig(
+        unauthorized_dm_behavior="ignore",
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")},
+    )
+    runner, adapter = _make_runner(Platform.TELEGRAM, config)
+
+    result = await runner._handle_message(
+        _make_event(
+            Platform.TELEGRAM,
+            "12345",
+            "12345",
+        )
+    )
+
+    assert result is None
+    runner.pairing_store.generate_code.assert_not_called()
+    adapter.send.assert_not_awaited()
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index aa770c9e..28b54ffa 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -1090,6 +1090,21 @@ group_sessions_per_user: true  # true = per-user isolation in groups/channels, f
 
 For the behavior details and examples, see [Sessions](/docs/user-guide/sessions) and the [Discord guide](/docs/user-guide/messaging/discord).
 
+## Unauthorized DM Behavior
+
+Control what Hermes does when an unknown user sends a direct message:
+
+```yaml
+unauthorized_dm_behavior: pair
+
+whatsapp:
+  unauthorized_dm_behavior: ignore
+```
+
+- `pair` is the default. Hermes denies access, but replies with a one-time pairing code in DMs.
+- `ignore` silently drops unauthorized DMs.
+- Platform sections override the global default, so you can keep pairing enabled broadly while making one platform quieter.
+
 ## Quick Commands
 
 Define custom commands that run shell commands without invoking the LLM — zero token usage, instant execution. Especially useful from messaging platforms (Telegram, Discord, etc.) for quick server checks or utility scripts.
diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md
index f754c9c2..57212df1 100644
--- a/website/docs/user-guide/messaging/whatsapp.md
+++ b/website/docs/user-guide/messaging/whatsapp.md
@@ -97,6 +97,18 @@ WHATSAPP_MODE=bot                          # "bot" or "self-chat"
 WHATSAPP_ALLOWED_USERS=15551234567         # Comma-separated phone numbers (with country code, no +)
 ```
 
+Optional behavior settings in `~/.hermes/config.yaml`:
+
+```yaml
+unauthorized_dm_behavior: pair
+
+whatsapp:
+  unauthorized_dm_behavior: ignore
+```
+
+- `unauthorized_dm_behavior: pair` is the global default. Unknown DM senders get a pairing code.
+- `whatsapp.unauthorized_dm_behavior: ignore` makes WhatsApp stay silent for unauthorized DMs, which is usually the better choice for a private number.
+
 Then start the gateway:
 
 ```bash
@@ -162,6 +174,7 @@ whatsapp:
 | **Bridge crashes or reconnect loops** | Restart the gateway, update Hermes, and re-pair if the session was invalidated by a WhatsApp protocol change. |
 | **Bot stops working after WhatsApp update** | Update Hermes to get the latest bridge version, then re-pair. |
 | **Messages not being received** | Verify `WHATSAPP_ALLOWED_USERS` includes the sender's number (with country code, no `+` or spaces). |
+| **Bot replies to strangers with a pairing code** | Set `whatsapp.unauthorized_dm_behavior: ignore` in `~/.hermes/config.yaml` if you want unauthorized DMs to be silently ignored instead. |
 
 ---
 
@@ -173,6 +186,13 @@ of authorized users. Without this setting, the gateway will **deny all incoming
 safety measure.
 :::
 
+By default, unauthorized DMs still receive a pairing code reply. If you want a private WhatsApp number to stay completely silent to strangers, set:
+
+```yaml
+whatsapp:
+  unauthorized_dm_behavior: ignore
+```
+
 - The `~/.hermes/whatsapp/session` directory contains full session credentials — protect it like a password
 - Set file permissions: `chmod 700 ~/.hermes/whatsapp/session`
 - Use a **dedicated phone number** for the bot to isolate risk from your personal account
diff --git a/website/docs/user-guide/security.md b/website/docs/user-guide/security.md
index d6d14db8..edf0a2e9 100644
--- a/website/docs/user-guide/security.md
+++ b/website/docs/user-guide/security.md
@@ -151,6 +151,19 @@ For more flexible authorization, Hermes includes a code-based pairing system. In
 3. The bot owner runs `hermes pairing approve <platform> <code>` on the CLI
 4. The user is permanently approved for that platform
 
+Control how unauthorized direct messages are handled in `~/.hermes/config.yaml`:
+
+```yaml
+unauthorized_dm_behavior: pair
+
+whatsapp:
+  unauthorized_dm_behavior: ignore
+```
+
+- `pair` is the default. Unauthorized DMs get a pairing code reply.
+- `ignore` silently drops unauthorized DMs.
+- Platform sections override the global default, so you can keep pairing on Telegram while keeping WhatsApp silent.
+
 **Security features** (based on OWASP + NIST SP 800-63-4 guidance):
 
 | Feature | Details |

From 56ca84f243bcaecfef8a4fa276d6e644337a39cf Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Wed, 18 Mar 2026 04:07:41 -0700
Subject: [PATCH 25/25] feat: add huggingface-hub bundled skill

Adds the Hugging Face CLI (hf) reference as a built-in skill under
mlops/. Covers downloading/uploading models and datasets, repo
management, SQL queries on datasets, inference endpoints, Spaces,
buckets, and more.

Based on the official HF skill from huggingface/skills.
---
 skills/mlops/huggingface-hub/SKILL.md | 80 +++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 skills/mlops/huggingface-hub/SKILL.md

diff --git a/skills/mlops/huggingface-hub/SKILL.md b/skills/mlops/huggingface-hub/SKILL.md
new file mode 100644
index 00000000..41152d44
--- /dev/null
+++ b/skills/mlops/huggingface-hub/SKILL.md
@@ -0,0 +1,80 @@
+---
+name: huggingface-hub
+description: Hugging Face Hub CLI (hf) — download/upload models and datasets, manage repos, run SQL on datasets, deploy inference endpoints, manage Spaces, and more. Use when working with HuggingFace models, datasets, or infrastructure.
+version: 1.0.0
+author: Hugging Face
+license: MIT
+tags: [huggingface, hf, models, datasets, hub, mlops]
+---
+
+# Hugging Face CLI (`hf`) Reference Guide
+
+The `hf` command is the modern command-line interface for interacting with the Hugging Face Hub, providing tools to manage repositories, models, datasets, and Spaces.
+
+> **IMPORTANT:** The `hf` command replaces the now deprecated `huggingface-cli` command.
+
+## Quick Start
+*   **Installation:** `curl -LsSf https://hf.co/cli/install.sh | bash -s`
+*   **Help:** Use `hf --help` to view all available functions and real-world examples.
+*   **Authentication:** Recommended via `HF_TOKEN` environment variable or the `--token` flag.
+
+---
+
+## Core Commands
+
+### General Operations
+*   `hf download REPO_ID`: Download files from the Hub.
+*   `hf upload REPO_ID`: Upload files/folders (recommended for single-commit).
+*   `hf upload-large-folder REPO_ID LOCAL_PATH`: Recommended for resumable uploads of large directories.
+*   `hf sync`: Sync files between a local directory and a bucket.
+*   `hf env` / `hf version`: View environment and version details.
+
+### Authentication (`hf auth`)
+*   `login` / `logout`: Manage sessions using tokens from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens).
+*   `list` / `switch`: Manage and toggle between multiple stored access tokens.
+*   `whoami`: Identify the currently logged-in account.
+
+### Repository Management (`hf repos`)
+*   `create` / `delete`: Create or permanently remove repositories.
+*   `duplicate`: Clone a model, dataset, or Space to a new ID.
+*   `move`: Transfer a repository between namespaces.
+*   `branch` / `tag`: Manage Git-like references.
+*   `delete-files`: Remove specific files using patterns.
+
+---
+
+## Specialized Hub Interactions
+
+### Datasets & Models
+*   **Datasets:** `hf datasets list`, `info`, and `parquet` (list parquet URLs).
+*   **SQL Queries:** `hf datasets sql SQL` — Execute raw SQL via DuckDB against dataset parquet URLs.
+*   **Models:** `hf models list` and `info`.
+*   **Papers:** `hf papers list` — View daily papers.
+
+### Discussions & Pull Requests (`hf discussions`)
+*   Manage the lifecycle of Hub contributions: `list`, `create`, `info`, `comment`, `close`, `reopen`, and `rename`.
+*   `diff`: View changes in a PR.
+*   `merge`: Finalize pull requests.
+
+### Infrastructure & Compute
+*   **Endpoints:** Deploy and manage Inference Endpoints (`deploy`, `pause`, `resume`, `scale-to-zero`, `catalog`).
+*   **Jobs:** Run compute tasks on HF infrastructure. Includes `hf jobs uv` for running Python scripts with inline dependencies and `stats` for resource monitoring.
+*   **Spaces:** Manage interactive apps. Includes `dev-mode` and `hot-reload` for Python files without full restarts.
+
+### Storage & Automation
+*   **Buckets:** Full S3-like bucket management (`create`, `cp`, `mv`, `rm`, `sync`).
+*   **Cache:** Manage local storage with `list`, `prune` (remove detached revisions), and `verify` (checksum checks).
+*   **Webhooks:** Automate workflows by managing Hub webhooks (`create`, `watch`, `enable`/`disable`).
+*   **Collections:** Organize Hub items into collections (`add-item`, `update`, `list`).
+
+---
+
+## Advanced Usage & Tips
+
+### Global Flags
+*   `--format json`: Produces machine-readable output for automation.
+*   `-q` / `--quiet`: Limits output to IDs only.
+
+### Extensions & Skills
+*   **Extensions:** Extend CLI functionality via GitHub repositories using `hf extensions install REPO_ID`.
+*   **Skills:** Manage AI assistant skills with `hf skills add`.