diff --git a/tests/tools/test_read_loop_detection.py b/tests/tools/test_read_loop_detection.py index 544a5fa1f..d5f38a3da 100644 --- a/tests/tools/test_read_loop_detection.py +++ b/tests/tools/test_read_loop_detection.py @@ -19,6 +19,7 @@ from unittest.mock import patch, MagicMock from tools.file_tools import ( read_file_tool, + search_tool, get_read_files_summary, clear_read_tracker, _read_tracker, @@ -39,9 +40,16 @@ def _fake_read_file(path, offset=1, limit=500): return _FakeReadResult(content=f"content of {path}", total_lines=10) +class _FakeSearchResult: + """Minimal stand-in for FileOperations.search return value.""" + def to_dict(self): + return {"matches": [{"file": "test.py", "line": 1, "text": "match"}]} + + def _make_fake_file_ops(): fake = MagicMock() fake.read_file = _fake_read_file + fake.search = lambda **kw: _FakeSearchResult() return fake @@ -71,11 +79,23 @@ class TestReadLoopDetection(unittest.TestCase): self.assertIn("2 times", result["_warning"]) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) - def test_third_read_increments_count(self, _mock_ops): + def test_third_read_is_blocked(self, _mock_ops): + """3rd read of the same region returns error, no content.""" for _ in range(2): read_file_tool("/tmp/test.py", task_id="t1") result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) - self.assertIn("3 times", result["_warning"]) + self.assertIn("error", result) + self.assertIn("BLOCKED", result["error"]) + self.assertNotIn("content", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_fourth_read_still_blocked(self, _mock_ops): + """Subsequent reads remain blocked with incrementing count.""" + for _ in range(3): + read_file_tool("/tmp/test.py", task_id="t1") + result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) + self.assertIn("BLOCKED", result["error"]) + self.assertIn("4 times", result["error"]) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) def test_different_region_no_warning(self, _mock_ops): @@ -267,5 +287,94 @@ class TestCompressionFileHistory(unittest.TestCase): self.assertIn("do NOT re-read", history_content) +class TestSearchLoopDetection(unittest.TestCase): + """Verify that search_tool detects and blocks repeated searches.""" + + def setUp(self): + clear_read_tracker() + + def tearDown(self): + clear_read_tracker() + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_first_search_no_warning(self, _mock_ops): + result = json.loads(search_tool("def main", task_id="t1")) + self.assertNotIn("_warning", result) + self.assertNotIn("error", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_second_search_has_warning(self, _mock_ops): + search_tool("def main", task_id="t1") + result = json.loads(search_tool("def main", task_id="t1")) + self.assertIn("_warning", result) + self.assertIn("2 times", result["_warning"]) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_third_search_is_blocked(self, _mock_ops): + for _ in range(2): + search_tool("def main", task_id="t1") + result = json.loads(search_tool("def main", task_id="t1")) + self.assertIn("error", result) + self.assertIn("BLOCKED", result["error"]) + self.assertNotIn("matches", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_different_pattern_no_warning(self, _mock_ops): + search_tool("def main", task_id="t1") + result = json.loads(search_tool("class Foo", task_id="t1")) + self.assertNotIn("_warning", result) + self.assertNotIn("error", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_different_task_isolated(self, _mock_ops): + search_tool("def main", task_id="t1") + result = json.loads(search_tool("def main", task_id="t2")) + self.assertNotIn("_warning", result) + + +class TestTodoInjectionFiltering(unittest.TestCase): + """Verify that format_for_injection filters completed/cancelled todos.""" + + def test_filters_completed_and_cancelled(self): + from tools.todo_tool import TodoStore + store = TodoStore() + store.write([ + {"id": "1", "content": "Read codebase", "status": "completed"}, + {"id": "2", "content": "Write fix", "status": "in_progress"}, + {"id": "3", "content": "Run tests", "status": "pending"}, + {"id": "4", "content": "Abandoned", "status": "cancelled"}, + ]) + injection = store.format_for_injection() + self.assertNotIn("Read codebase", injection) + self.assertNotIn("Abandoned", injection) + self.assertIn("Write fix", injection) + self.assertIn("Run tests", injection) + + def test_all_completed_returns_none(self): + from tools.todo_tool import TodoStore + store = TodoStore() + store.write([ + {"id": "1", "content": "Done", "status": "completed"}, + {"id": "2", "content": "Also done", "status": "cancelled"}, + ]) + self.assertIsNone(store.format_for_injection()) + + def test_empty_store_returns_none(self): + from tools.todo_tool import TodoStore + store = TodoStore() + self.assertIsNone(store.format_for_injection()) + + def test_all_active_included(self): + from tools.todo_tool import TodoStore + store = TodoStore() + store.write([ + {"id": "1", "content": "Task A", "status": "pending"}, + {"id": "2", "content": "Task B", "status": "in_progress"}, + ]) + injection = store.format_for_injection() + self.assertIn("Task A", injection) + self.assertIn("Task B", injection) + + if __name__ == "__main__": unittest.main() diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 0d3f17609..ea02cc819 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -78,7 +78,7 @@ _TOOL_STUBS = { "web_extract": ( "web_extract", "urls: list", - '"""Extract content from URLs. Returns dict with results list of {url, title, content, error}."""', + '"""Extract content from URLs. Returns dict with results list of {url, content, error}."""', '{"urls": urls}', ), "read_file": ( @@ -605,7 +605,7 @@ _TOOL_DOC_LINES = [ " Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"), ("web_extract", " web_extract(urls: list[str]) -> dict\n" - " Returns {\"results\": [{\"url\", \"title\", \"content\", \"error\"}, ...]} where content is markdown"), + " Returns {\"results\": [{\"url\", \"content\", \"error\"}, ...]} where content is markdown"), ("read_file", " read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n" " Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"), @@ -643,7 +643,10 @@ def build_execute_code_schema(enabled_sandbox_tools: set = None) -> dict: import_examples = [n for n in ("web_search", "terminal") if n in enabled_sandbox_tools] if not import_examples: import_examples = sorted(enabled_sandbox_tools)[:2] - import_str = ", ".join(import_examples) + ", ..." + if import_examples: + import_str = ", ".join(import_examples) + ", ..." + else: + import_str = "..." description = ( "Run a Python script that can call Hermes tools programmatically. " diff --git a/tools/file_tools.py b/tools/file_tools.py index b34a27a3f..1a8bdcf25 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -142,7 +142,18 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = task_reads[read_key] = task_reads.get(read_key, 0) + 1 count = task_reads[read_key] - if count > 1: + if count >= 3: + # Hard block: stop returning content to break the loop + return json.dumps({ + "error": ( + f"BLOCKED: You have read this exact file region {count} times. " + "The content has NOT changed. You already have this information. " + "STOP re-reading and proceed with your task." + ), + "path": path, + "already_read": count, + }, ensure_ascii=False) + elif count > 1: result_dict["_warning"] = ( f"You have already read this exact file region {count} times in this session. " "The content has not changed. Use the information you already have instead of re-reading. " @@ -224,12 +235,38 @@ def search_tool(pattern: str, target: str = "content", path: str = ".", task_id: str = "default") -> str: """Search for content or files.""" try: + # Track searches to detect repeated search loops + search_key = ("search", pattern, target, path, file_glob or "") + with _read_tracker_lock: + task_reads = _read_tracker.setdefault(task_id, {}) + task_reads[search_key] = task_reads.get(search_key, 0) + 1 + count = task_reads[search_key] + + if count >= 3: + return json.dumps({ + "error": ( + f"BLOCKED: You have run this exact search {count} times. " + "The results have NOT changed. You already have this information. " + "STOP re-searching and proceed with your task." + ), + "pattern": pattern, + "already_searched": count, + }, ensure_ascii=False) + file_ops = _get_file_ops(task_id) result = file_ops.search( pattern=pattern, path=path, target=target, file_glob=file_glob, limit=limit, offset=offset, output_mode=output_mode, context=context ) - return json.dumps(result.to_dict(), ensure_ascii=False) + result_dict = result.to_dict() + + if count > 1: + result_dict["_warning"] = ( + f"You have run this exact search {count} times in this session. " + "The results have not changed. Use the information you already have." + ) + + return json.dumps(result_dict, ensure_ascii=False) except Exception as e: return json.dumps({"error": str(e)}, ensure_ascii=False) diff --git a/tools/todo_tool.py b/tools/todo_tool.py index a4853ac3b..7b74d01ea 100644 --- a/tools/todo_tool.py +++ b/tools/todo_tool.py @@ -105,8 +105,17 @@ class TodoStore: "cancelled": "[~]", } - lines = ["[Your task list was preserved across context compression]"] - for item in self._items: + # Only inject pending/in_progress items — completed/cancelled ones + # cause the model to re-do finished work after compression. + active_items = [ + item for item in self._items + if item["status"] in ("pending", "in_progress") + ] + if not active_items: + return None + + lines = ["[Your active task list was preserved across context compression]"] + for item in active_items: marker = markers.get(item["status"], "[?]") lines.append(f"- {marker} {item['id']}. {item['content']} ({item['status']})")