From 9eee529a7fecfa3388208e9facad9f73505b2bd8 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Sun, 8 Mar 2026 20:44:42 +0300 Subject: [PATCH 1/4] fix: detect and warn on file re-read loops after context compression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When context compression summarizes conversation history, the agent loses track of which files it already read and re-reads them in a loop. Users report the agent reading the same files endlessly without writing. Root cause: context compression is lossy — file contents and read history are lost in the summary. After compression, the model thinks it hasn't examined the files yet and reads them again. Fix (two-part): 1. Track file reads per task in file_tools.py. When the same file region is read again, include a _warning in the response telling the model to stop re-reading and use existing information. 2. After context compression, inject a structured message listing all files already read in the session with explicit "do NOT re-read" instruction, preserving read history across compression boundaries. Adds 16 tests covering warning detection, task isolation, summary accuracy, tracker cleanup, and compression history injection. --- run_agent.py | 33 ++- tests/tools/test_read_loop_detection.py | 271 ++++++++++++++++++++++++ tools/file_tools.py | 51 ++++- 3 files changed, 349 insertions(+), 6 deletions(-) create mode 100644 tests/tools/test_read_loop_detection.py diff --git a/run_agent.py b/run_agent.py index 75e3dfc95..58d75332e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2463,7 +2463,7 @@ class AIAgent: if messages and messages[-1].get("_flush_sentinel") == _sentinel: messages.pop() - def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None) -> tuple: + def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default") -> tuple: """Compress conversation context and split the session in SQLite. Returns: @@ -2478,6 +2478,25 @@ class AIAgent: if todo_snapshot: compressed.append({"role": "user", "content": todo_snapshot}) + # Preserve file-read history so the model doesn't re-read files + # it already examined before compression. + try: + from tools.file_tools import get_read_files_summary + read_files = get_read_files_summary(task_id) + if read_files: + file_list = "\n".join( + f" - {f['path']} ({', '.join(f['regions'])})" + for f in read_files + ) + compressed.append({"role": "user", "content": ( + "[Files already read in this session — do NOT re-read these]\n" + f"{file_list}\n" + "Use the information from the context summary above. " + "Proceed with writing, editing, or responding." + )}) + except Exception: + pass # Don't break compression if file tracking fails + self._invalidate_system_prompt() new_system_prompt = self._build_system_prompt(system_message) self._cached_system_prompt = new_system_prompt @@ -2999,7 +3018,8 @@ class AIAgent: for _pass in range(3): _orig_len = len(messages) messages, active_system_prompt = self._compress_context( - messages, system_message, approx_tokens=_preflight_tokens + messages, system_message, approx_tokens=_preflight_tokens, + task_id=effective_task_id, ) if len(messages) >= _orig_len: break # Cannot compress further @@ -3461,7 +3481,8 @@ class AIAgent: original_len = len(messages) messages, active_system_prompt = self._compress_context( - messages, system_message, approx_tokens=approx_tokens + messages, system_message, approx_tokens=approx_tokens, + task_id=effective_task_id, ) if len(messages) < original_len: @@ -3528,7 +3549,8 @@ class AIAgent: original_len = len(messages) messages, active_system_prompt = self._compress_context( - messages, system_message, approx_tokens=approx_tokens + messages, system_message, approx_tokens=approx_tokens, + task_id=effective_task_id, ) if len(messages) < original_len or new_ctx and new_ctx < old_ctx: @@ -3848,7 +3870,8 @@ class AIAgent: if self.compression_enabled and self.context_compressor.should_compress(): messages, active_system_prompt = self._compress_context( messages, system_message, - approx_tokens=self.context_compressor.last_prompt_tokens + approx_tokens=self.context_compressor.last_prompt_tokens, + task_id=effective_task_id, ) # Save session log incrementally (so progress is visible even if interrupted) diff --git a/tests/tools/test_read_loop_detection.py b/tests/tools/test_read_loop_detection.py new file mode 100644 index 000000000..544a5fa1f --- /dev/null +++ b/tests/tools/test_read_loop_detection.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +""" +Tests for the read-loop detection mechanism in file_tools. + +Verifies that: +1. Re-reading the same file region produces a warning +2. Different regions/files don't trigger false warnings +3. Task isolation works (different tasks have separate trackers) +4. get_read_files_summary returns accurate history +5. clear_read_tracker resets state +6. Context compression injects file-read history + +Run with: python -m pytest tests/tools/test_read_loop_detection.py -v +""" + +import json +import unittest +from unittest.mock import patch, MagicMock + +from tools.file_tools import ( + read_file_tool, + get_read_files_summary, + clear_read_tracker, + _read_tracker, +) + + +class _FakeReadResult: + """Minimal stand-in for FileOperations.read_file return value.""" + def __init__(self, content="line1\nline2\n", total_lines=2): + self._content = content + self._total_lines = total_lines + + def to_dict(self): + return {"content": self._content, "total_lines": self._total_lines} + + +def _fake_read_file(path, offset=1, limit=500): + return _FakeReadResult(content=f"content of {path}", total_lines=10) + + +def _make_fake_file_ops(): + fake = MagicMock() + fake.read_file = _fake_read_file + return fake + + +class TestReadLoopDetection(unittest.TestCase): + """Verify that read_file_tool detects and warns on re-reads.""" + + def setUp(self): + clear_read_tracker() + + def tearDown(self): + clear_read_tracker() + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_first_read_has_no_warning(self, _mock_ops): + result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) + self.assertNotIn("_warning", result) + self.assertIn("content", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_second_read_same_region_has_warning(self, _mock_ops): + read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") + result = json.loads( + read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") + ) + self.assertIn("_warning", result) + self.assertIn("already read", result["_warning"]) + self.assertIn("2 times", result["_warning"]) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_third_read_increments_count(self, _mock_ops): + for _ in range(2): + read_file_tool("/tmp/test.py", task_id="t1") + result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) + self.assertIn("3 times", result["_warning"]) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_different_region_no_warning(self, _mock_ops): + read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") + result = json.loads( + read_file_tool("/tmp/test.py", offset=501, limit=500, task_id="t1") + ) + self.assertNotIn("_warning", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_different_file_no_warning(self, _mock_ops): + read_file_tool("/tmp/a.py", task_id="t1") + result = json.loads(read_file_tool("/tmp/b.py", task_id="t1")) + self.assertNotIn("_warning", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_different_tasks_isolated(self, _mock_ops): + read_file_tool("/tmp/test.py", task_id="task_a") + result = json.loads( + read_file_tool("/tmp/test.py", task_id="task_b") + ) + self.assertNotIn("_warning", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_warning_still_returns_content(self, _mock_ops): + """Even with a warning, the file content is still returned.""" + read_file_tool("/tmp/test.py", task_id="t1") + result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) + self.assertIn("_warning", result) + self.assertIn("content", result) + self.assertIn("content of /tmp/test.py", result["content"]) + + +class TestReadFilesSummary(unittest.TestCase): + """Verify get_read_files_summary returns accurate file-read history.""" + + def setUp(self): + clear_read_tracker() + + def tearDown(self): + clear_read_tracker() + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_empty_when_no_reads(self, _mock_ops): + summary = get_read_files_summary("t1") + self.assertEqual(summary, []) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_single_file_single_region(self, _mock_ops): + read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") + summary = get_read_files_summary("t1") + self.assertEqual(len(summary), 1) + self.assertEqual(summary[0]["path"], "/tmp/test.py") + self.assertIn("lines 1-500", summary[0]["regions"]) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_single_file_multiple_regions(self, _mock_ops): + read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") + read_file_tool("/tmp/test.py", offset=501, limit=500, task_id="t1") + summary = get_read_files_summary("t1") + self.assertEqual(len(summary), 1) + self.assertEqual(len(summary[0]["regions"]), 2) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_multiple_files(self, _mock_ops): + read_file_tool("/tmp/a.py", task_id="t1") + read_file_tool("/tmp/b.py", task_id="t1") + summary = get_read_files_summary("t1") + self.assertEqual(len(summary), 2) + paths = [s["path"] for s in summary] + self.assertIn("/tmp/a.py", paths) + self.assertIn("/tmp/b.py", paths) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_different_task_has_separate_summary(self, _mock_ops): + read_file_tool("/tmp/a.py", task_id="task_a") + read_file_tool("/tmp/b.py", task_id="task_b") + summary_a = get_read_files_summary("task_a") + summary_b = get_read_files_summary("task_b") + self.assertEqual(len(summary_a), 1) + self.assertEqual(summary_a[0]["path"], "/tmp/a.py") + self.assertEqual(len(summary_b), 1) + self.assertEqual(summary_b[0]["path"], "/tmp/b.py") + + +class TestClearReadTracker(unittest.TestCase): + """Verify clear_read_tracker resets state properly.""" + + def setUp(self): + clear_read_tracker() + + def tearDown(self): + clear_read_tracker() + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_clear_specific_task(self, _mock_ops): + read_file_tool("/tmp/test.py", task_id="t1") + read_file_tool("/tmp/test.py", task_id="t2") + clear_read_tracker("t1") + self.assertEqual(get_read_files_summary("t1"), []) + self.assertEqual(len(get_read_files_summary("t2")), 1) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_clear_all(self, _mock_ops): + read_file_tool("/tmp/test.py", task_id="t1") + read_file_tool("/tmp/test.py", task_id="t2") + clear_read_tracker() + self.assertEqual(get_read_files_summary("t1"), []) + self.assertEqual(get_read_files_summary("t2"), []) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_clear_then_reread_no_warning(self, _mock_ops): + read_file_tool("/tmp/test.py", task_id="t1") + clear_read_tracker("t1") + result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) + self.assertNotIn("_warning", result) + + +class TestCompressionFileHistory(unittest.TestCase): + """Verify that _compress_context injects file-read history.""" + + def setUp(self): + clear_read_tracker() + + def tearDown(self): + clear_read_tracker() + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_compress_context_includes_read_files(self, _mock_ops): + """After reading files, _compress_context should inject a message + listing which files were already read.""" + # Simulate reads + read_file_tool("/tmp/foo.py", offset=1, limit=100, task_id="compress_test") + read_file_tool("/tmp/bar.py", offset=1, limit=200, task_id="compress_test") + + # Build minimal messages for compression (need enough messages) + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Analyze the codebase."}, + {"role": "assistant", "content": "I'll read the files."}, + {"role": "user", "content": "Continue."}, + {"role": "assistant", "content": "Reading more files."}, + {"role": "user", "content": "What did you find?"}, + {"role": "assistant", "content": "Here are my findings."}, + {"role": "user", "content": "Great, write the fix."}, + {"role": "assistant", "content": "Working on it."}, + {"role": "user", "content": "Status?"}, + ] + + # Mock the compressor to return a simple compression + mock_compressor = MagicMock() + mock_compressor.compress.return_value = [ + messages[0], # system + messages[1], # first user + {"role": "user", "content": "[CONTEXT SUMMARY]: Files were analyzed."}, + messages[-1], # last user + ] + mock_compressor.last_prompt_tokens = 5000 + + # Mock the agent's _compress_context dependencies + mock_agent = MagicMock() + mock_agent.context_compressor = mock_compressor + mock_agent._todo_store.format_for_injection.return_value = None + mock_agent._session_db = None + mock_agent.quiet_mode = True + mock_agent._invalidate_system_prompt = MagicMock() + mock_agent._build_system_prompt = MagicMock(return_value="system prompt") + mock_agent._cached_system_prompt = None + + # Call the real _compress_context + from run_agent import AIAgent + result, _ = AIAgent._compress_context( + mock_agent, messages, "system prompt", + approx_tokens=5000, task_id="compress_test", + ) + + # Find the injected file-read history message + file_history_msgs = [ + m for m in result + if isinstance(m.get("content"), str) + and "already read" in m.get("content", "").lower() + ] + self.assertEqual(len(file_history_msgs), 1, + "Should inject exactly one file-read history message") + + history_content = file_history_msgs[0]["content"] + self.assertIn("/tmp/foo.py", history_content) + self.assertIn("/tmp/bar.py", history_content) + self.assertIn("do NOT re-read", history_content) + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/file_tools.py b/tools/file_tools.py index b29d2d274..b34a27a3f 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -13,6 +13,11 @@ logger = logging.getLogger(__name__) _file_ops_lock = threading.Lock() _file_ops_cache: dict = {} +# Track files read per task to detect re-read loops after context compression. +# Key: task_id, Value: dict mapping (path, offset, limit) -> read count +_read_tracker_lock = threading.Lock() +_read_tracker: dict = {} + def _get_file_ops(task_id: str = "default") -> ShellFileOperations: """Get or create ShellFileOperations for a terminal environment. @@ -128,11 +133,55 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = try: file_ops = _get_file_ops(task_id) result = file_ops.read_file(path, offset, limit) - return json.dumps(result.to_dict(), ensure_ascii=False) + result_dict = result.to_dict() + + # Track reads to detect re-read loops (e.g. after context compression) + read_key = (path, offset, limit) + with _read_tracker_lock: + task_reads = _read_tracker.setdefault(task_id, {}) + task_reads[read_key] = task_reads.get(read_key, 0) + 1 + count = task_reads[read_key] + + if count > 1: + result_dict["_warning"] = ( + f"You have already read this exact file region {count} times in this session. " + "The content has not changed. Use the information you already have instead of re-reading. " + "If you are stuck in a loop, stop reading and proceed with writing or responding." + ) + + return json.dumps(result_dict, ensure_ascii=False) except Exception as e: return json.dumps({"error": str(e)}, ensure_ascii=False) +def get_read_files_summary(task_id: str = "default") -> list: + """Return a list of files read in this session for the given task. + + Used by context compression to preserve file-read history across + compression boundaries. + """ + with _read_tracker_lock: + task_reads = _read_tracker.get(task_id, {}) + seen_paths = {} + for (path, offset, limit), count in task_reads.items(): + if path not in seen_paths: + seen_paths[path] = [] + seen_paths[path].append(f"lines {offset}-{offset + limit - 1}") + return [ + {"path": p, "regions": regions} + for p, regions in sorted(seen_paths.items()) + ] + + +def clear_read_tracker(task_id: str = None): + """Clear the read tracker. Called when starting a new conversation.""" + with _read_tracker_lock: + if task_id: + _read_tracker.pop(task_id, None) + else: + _read_tracker.clear() + + def write_file_tool(path: str, content: str, task_id: str = "default") -> str: """Write content to a file.""" try: From e2fe1373f31f046683f3863be6045aa7e6fe7319 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Sun, 8 Mar 2026 23:01:21 +0300 Subject: [PATCH 2/4] fix: escalate read/search blocking, track search loops, filter completed todos - Block file reads after 3+ re-reads of same region (no content returned) - Track search_files calls and block repeated identical searches - Filter completed/cancelled todos from post-compression injection to prevent agent from re-doing finished work - Add 10 new tests covering all three fixes --- tests/tools/test_read_loop_detection.py | 113 +++++++++++++++++++++++- tools/code_execution_tool.py | 9 +- tools/file_tools.py | 41 ++++++++- tools/todo_tool.py | 13 ++- 4 files changed, 167 insertions(+), 9 deletions(-) diff --git a/tests/tools/test_read_loop_detection.py b/tests/tools/test_read_loop_detection.py index 544a5fa1f..d5f38a3da 100644 --- a/tests/tools/test_read_loop_detection.py +++ b/tests/tools/test_read_loop_detection.py @@ -19,6 +19,7 @@ from unittest.mock import patch, MagicMock from tools.file_tools import ( read_file_tool, + search_tool, get_read_files_summary, clear_read_tracker, _read_tracker, @@ -39,9 +40,16 @@ def _fake_read_file(path, offset=1, limit=500): return _FakeReadResult(content=f"content of {path}", total_lines=10) +class _FakeSearchResult: + """Minimal stand-in for FileOperations.search return value.""" + def to_dict(self): + return {"matches": [{"file": "test.py", "line": 1, "text": "match"}]} + + def _make_fake_file_ops(): fake = MagicMock() fake.read_file = _fake_read_file + fake.search = lambda **kw: _FakeSearchResult() return fake @@ -71,11 +79,23 @@ class TestReadLoopDetection(unittest.TestCase): self.assertIn("2 times", result["_warning"]) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) - def test_third_read_increments_count(self, _mock_ops): + def test_third_read_is_blocked(self, _mock_ops): + """3rd read of the same region returns error, no content.""" for _ in range(2): read_file_tool("/tmp/test.py", task_id="t1") result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) - self.assertIn("3 times", result["_warning"]) + self.assertIn("error", result) + self.assertIn("BLOCKED", result["error"]) + self.assertNotIn("content", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_fourth_read_still_blocked(self, _mock_ops): + """Subsequent reads remain blocked with incrementing count.""" + for _ in range(3): + read_file_tool("/tmp/test.py", task_id="t1") + result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) + self.assertIn("BLOCKED", result["error"]) + self.assertIn("4 times", result["error"]) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) def test_different_region_no_warning(self, _mock_ops): @@ -267,5 +287,94 @@ class TestCompressionFileHistory(unittest.TestCase): self.assertIn("do NOT re-read", history_content) +class TestSearchLoopDetection(unittest.TestCase): + """Verify that search_tool detects and blocks repeated searches.""" + + def setUp(self): + clear_read_tracker() + + def tearDown(self): + clear_read_tracker() + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_first_search_no_warning(self, _mock_ops): + result = json.loads(search_tool("def main", task_id="t1")) + self.assertNotIn("_warning", result) + self.assertNotIn("error", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_second_search_has_warning(self, _mock_ops): + search_tool("def main", task_id="t1") + result = json.loads(search_tool("def main", task_id="t1")) + self.assertIn("_warning", result) + self.assertIn("2 times", result["_warning"]) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_third_search_is_blocked(self, _mock_ops): + for _ in range(2): + search_tool("def main", task_id="t1") + result = json.loads(search_tool("def main", task_id="t1")) + self.assertIn("error", result) + self.assertIn("BLOCKED", result["error"]) + self.assertNotIn("matches", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_different_pattern_no_warning(self, _mock_ops): + search_tool("def main", task_id="t1") + result = json.loads(search_tool("class Foo", task_id="t1")) + self.assertNotIn("_warning", result) + self.assertNotIn("error", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_different_task_isolated(self, _mock_ops): + search_tool("def main", task_id="t1") + result = json.loads(search_tool("def main", task_id="t2")) + self.assertNotIn("_warning", result) + + +class TestTodoInjectionFiltering(unittest.TestCase): + """Verify that format_for_injection filters completed/cancelled todos.""" + + def test_filters_completed_and_cancelled(self): + from tools.todo_tool import TodoStore + store = TodoStore() + store.write([ + {"id": "1", "content": "Read codebase", "status": "completed"}, + {"id": "2", "content": "Write fix", "status": "in_progress"}, + {"id": "3", "content": "Run tests", "status": "pending"}, + {"id": "4", "content": "Abandoned", "status": "cancelled"}, + ]) + injection = store.format_for_injection() + self.assertNotIn("Read codebase", injection) + self.assertNotIn("Abandoned", injection) + self.assertIn("Write fix", injection) + self.assertIn("Run tests", injection) + + def test_all_completed_returns_none(self): + from tools.todo_tool import TodoStore + store = TodoStore() + store.write([ + {"id": "1", "content": "Done", "status": "completed"}, + {"id": "2", "content": "Also done", "status": "cancelled"}, + ]) + self.assertIsNone(store.format_for_injection()) + + def test_empty_store_returns_none(self): + from tools.todo_tool import TodoStore + store = TodoStore() + self.assertIsNone(store.format_for_injection()) + + def test_all_active_included(self): + from tools.todo_tool import TodoStore + store = TodoStore() + store.write([ + {"id": "1", "content": "Task A", "status": "pending"}, + {"id": "2", "content": "Task B", "status": "in_progress"}, + ]) + injection = store.format_for_injection() + self.assertIn("Task A", injection) + self.assertIn("Task B", injection) + + if __name__ == "__main__": unittest.main() diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 0d3f17609..ea02cc819 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -78,7 +78,7 @@ _TOOL_STUBS = { "web_extract": ( "web_extract", "urls: list", - '"""Extract content from URLs. Returns dict with results list of {url, title, content, error}."""', + '"""Extract content from URLs. Returns dict with results list of {url, content, error}."""', '{"urls": urls}', ), "read_file": ( @@ -605,7 +605,7 @@ _TOOL_DOC_LINES = [ " Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"), ("web_extract", " web_extract(urls: list[str]) -> dict\n" - " Returns {\"results\": [{\"url\", \"title\", \"content\", \"error\"}, ...]} where content is markdown"), + " Returns {\"results\": [{\"url\", \"content\", \"error\"}, ...]} where content is markdown"), ("read_file", " read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n" " Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"), @@ -643,7 +643,10 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict: import_examples = [n for n in ("web_search", "terminal") if n in enabled_sandbox_tools] if not import_examples: import_examples = sorted(enabled_sandbox_tools)[:2] - import_str = ", ".join(import_examples) + ", ..." + if import_examples: + import_str = ", ".join(import_examples) + ", ..." + else: + import_str = "..." description = ( "Run a Python script that can call Hermes tools programmatically. " diff --git a/tools/file_tools.py b/tools/file_tools.py index b34a27a3f..1a8bdcf25 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -142,7 +142,18 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = task_reads[read_key] = task_reads.get(read_key, 0) + 1 count = task_reads[read_key] - if count > 1: + if count >= 3: + # Hard block: stop returning content to break the loop + return json.dumps({ + "error": ( + f"BLOCKED: You have read this exact file region {count} times. " + "The content has NOT changed. You already have this information. " + "STOP re-reading and proceed with your task." + ), + "path": path, + "already_read": count, + }, ensure_ascii=False) + elif count > 1: result_dict["_warning"] = ( f"You have already read this exact file region {count} times in this session. " "The content has not changed. Use the information you already have instead of re-reading. " @@ -224,12 +235,38 @@ def search_tool(pattern: str, target: str = "content", path: str = ".", task_id: str = "default") -> str: """Search for content or files.""" try: + # Track searches to detect repeated search loops + search_key = ("search", pattern, target, path, file_glob or "") + with _read_tracker_lock: + task_reads = _read_tracker.setdefault(task_id, {}) + task_reads[search_key] = task_reads.get(search_key, 0) + 1 + count = task_reads[search_key] + + if count >= 3: + return json.dumps({ + "error": ( + f"BLOCKED: You have run this exact search {count} times. " + "The results have NOT changed. You already have this information. " + "STOP re-searching and proceed with your task." + ), + "pattern": pattern, + "already_searched": count, + }, ensure_ascii=False) + file_ops = _get_file_ops(task_id) result = file_ops.search( pattern=pattern, path=path, target=target, file_glob=file_glob, limit=limit, offset=offset, output_mode=output_mode, context=context ) - return json.dumps(result.to_dict(), ensure_ascii=False) + result_dict = result.to_dict() + + if count > 1: + result_dict["_warning"] = ( + f"You have run this exact search {count} times in this session. " + "The results have not changed. Use the information you already have." + ) + + return json.dumps(result_dict, ensure_ascii=False) except Exception as e: return json.dumps({"error": str(e)}, ensure_ascii=False) diff --git a/tools/todo_tool.py b/tools/todo_tool.py index a4853ac3b..7b74d01ea 100644 --- a/tools/todo_tool.py +++ b/tools/todo_tool.py @@ -105,8 +105,17 @@ class TodoStore: "cancelled": "[~]", } - lines = ["[Your task list was preserved across context compression]"] - for item in self._items: + # Only inject pending/in_progress items — completed/cancelled ones + # cause the model to re-do finished work after compression. + active_items = [ + item for item in self._items + if item["status"] in ("pending", "in_progress") + ] + if not active_items: + return None + + lines = ["[Your active task list was preserved across context compression]"] + for item in active_items: marker = markers.get(item["status"], "[?]") lines.append(f"- {marker} {item['id']}. {item['content']} ({item['status']})") From 67421ed74f2e5cc1e7ac619e12b56519cfeae088 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Sun, 8 Mar 2026 23:07:38 +0300 Subject: [PATCH 3/4] fix: update test_non_empty_has_markers to match todo filtering behavior Completed/cancelled items are now filtered from format_for_injection() output. Update the existing test to verify active items appear and completed items are excluded. --- tests/tools/test_todo_tool.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/tools/test_todo_tool.py b/tests/tools/test_todo_tool.py index b0f694d72..d4fd03baf 100644 --- a/tests/tools/test_todo_tool.py +++ b/tests/tools/test_todo_tool.py @@ -46,11 +46,17 @@ class TestFormatForInjection: store.write([ {"id": "1", "content": "Do thing", "status": "completed"}, {"id": "2", "content": "Next", "status": "pending"}, + {"id": "3", "content": "Working", "status": "in_progress"}, ]) text = store.format_for_injection() - assert "[x]" in text + # Completed items are filtered out of injection + assert "[x]" not in text + assert "Do thing" not in text + # Active items are included assert "[ ]" in text - assert "Do thing" in text + assert "[>]" in text + assert "Next" in text + assert "Working" in text assert "context compression" in text.lower() From 912efe11b57bade7586c9caf484747914d2da692 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:25:52 +0300 Subject: [PATCH 4/4] fix(tests): add content attribute to fake result objects _FakeReadResult and _FakeSearchResult now expose the attributes that read_file_tool/search_tool access after the redact_sensitive_text integration from main. --- tests/tools/test_read_loop_detection.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/tools/test_read_loop_detection.py b/tests/tools/test_read_loop_detection.py index d5f38a3da..dfa1c1ab3 100644 --- a/tests/tools/test_read_loop_detection.py +++ b/tests/tools/test_read_loop_detection.py @@ -29,11 +29,11 @@ from tools.file_tools import ( class _FakeReadResult: """Minimal stand-in for FileOperations.read_file return value.""" def __init__(self, content="line1\nline2\n", total_lines=2): - self._content = content + self.content = content self._total_lines = total_lines def to_dict(self): - return {"content": self._content, "total_lines": self._total_lines} + return {"content": self.content, "total_lines": self._total_lines} def _fake_read_file(path, offset=1, limit=500): @@ -42,6 +42,9 @@ def _fake_read_file(path, offset=1, limit=500): class _FakeSearchResult: """Minimal stand-in for FileOperations.search return value.""" + def __init__(self): + self.matches = [] + def to_dict(self): return {"matches": [{"file": "test.py", "line": 1, "text": "match"}]}