diff --git a/model_tools.py b/model_tools.py index a2fd68c4d..2139eb080 100644 --- a/model_tools.py +++ b/model_tools.py @@ -284,6 +284,16 @@ def handle_function_call( Returns: Function result as a JSON string. """ + # Notify the read-loop tracker when a non-read/search tool runs, + # so the *consecutive* counter resets (reads after other work are fine). + _READ_SEARCH_TOOLS = {"read_file", "search_files"} + if function_name not in _READ_SEARCH_TOOLS: + try: + from tools.file_tools import notify_other_tool_call + notify_other_tool_call(task_id or "default") + except Exception: + pass # file_tools may not be loaded yet + try: if function_name in _AGENT_LOOP_TOOLS: return json.dumps({"error": f"{function_name} must be handled by the agent loop"}) diff --git a/tests/tools/test_read_loop_detection.py b/tests/tools/test_read_loop_detection.py index dfa1c1ab3..a7c01170f 100644 --- a/tests/tools/test_read_loop_detection.py +++ b/tests/tools/test_read_loop_detection.py @@ -3,12 +3,14 @@ Tests for the read-loop detection mechanism in file_tools. Verifies that: -1. Re-reading the same file region produces a warning -2. Different regions/files don't trigger false warnings -3. Task isolation works (different tasks have separate trackers) -4. get_read_files_summary returns accurate history -5. clear_read_tracker resets state -6. Context compression injects file-read history +1. Only *consecutive* identical reads trigger warnings/blocks +2. Any other tool call in between resets the consecutive counter +3. Warn on 3rd consecutive, block on 4th+ +4. Different regions/files/tasks don't trigger false warnings +5. get_read_files_summary returns accurate history (unaffected by search keys) +6. clear_read_tracker resets state +7. notify_other_tool_call resets consecutive counters +8. Context compression injects file-read history Run with: python -m pytest tests/tools/test_read_loop_detection.py -v """ @@ -22,6 +24,7 @@ from tools.file_tools import ( search_tool, get_read_files_summary, clear_read_tracker, + notify_other_tool_call, _read_tracker, ) @@ -57,7 +60,7 @@ def _make_fake_file_ops(): class TestReadLoopDetection(unittest.TestCase): - """Verify that read_file_tool detects and warns on re-reads.""" + """Verify that read_file_tool detects and warns on consecutive re-reads.""" def setUp(self): clear_read_tracker() @@ -72,50 +75,68 @@ class TestReadLoopDetection(unittest.TestCase): self.assertIn("content", result) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) - def test_second_read_same_region_has_warning(self, _mock_ops): + def test_second_consecutive_read_no_warning(self, _mock_ops): + """2nd consecutive read should NOT warn (threshold is 3).""" read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") result = json.loads( read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") ) - self.assertIn("_warning", result) - self.assertIn("already read", result["_warning"]) - self.assertIn("2 times", result["_warning"]) + self.assertNotIn("_warning", result) + self.assertIn("content", result) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) - def test_third_read_is_blocked(self, _mock_ops): - """3rd read of the same region returns error, no content.""" + def test_third_consecutive_read_has_warning(self, _mock_ops): + """3rd consecutive read of the same region triggers a warning.""" for _ in range(2): read_file_tool("/tmp/test.py", task_id="t1") result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) + self.assertIn("_warning", result) + self.assertIn("3 times", result["_warning"]) + # Warning still returns content + self.assertIn("content", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_fourth_consecutive_read_is_blocked(self, _mock_ops): + """4th consecutive read of the same region is BLOCKED — no content.""" + for _ in range(3): + read_file_tool("/tmp/test.py", task_id="t1") + result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) self.assertIn("error", result) self.assertIn("BLOCKED", result["error"]) + self.assertIn("4 times", result["error"]) self.assertNotIn("content", result) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) - def test_fourth_read_still_blocked(self, _mock_ops): + def test_fifth_consecutive_read_still_blocked(self, _mock_ops): """Subsequent reads remain blocked with incrementing count.""" - for _ in range(3): + for _ in range(4): read_file_tool("/tmp/test.py", task_id="t1") result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) self.assertIn("BLOCKED", result["error"]) - self.assertIn("4 times", result["error"]) + self.assertIn("5 times", result["error"]) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) - def test_different_region_no_warning(self, _mock_ops): + def test_different_region_resets_consecutive(self, _mock_ops): + """Reading a different region of the same file resets consecutive count.""" read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") + read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") + # Now read a different region — this resets the consecutive counter result = json.loads( read_file_tool("/tmp/test.py", offset=501, limit=500, task_id="t1") ) self.assertNotIn("_warning", result) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) - def test_different_file_no_warning(self, _mock_ops): + def test_different_file_resets_consecutive(self, _mock_ops): + """Reading a different file resets the consecutive counter.""" + read_file_tool("/tmp/a.py", task_id="t1") read_file_tool("/tmp/a.py", task_id="t1") result = json.loads(read_file_tool("/tmp/b.py", task_id="t1")) self.assertNotIn("_warning", result) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) def test_different_tasks_isolated(self, _mock_ops): + """Different task_ids have separate consecutive counters.""" read_file_tool("/tmp/test.py", task_id="task_a") result = json.loads( read_file_tool("/tmp/test.py", task_id="task_b") @@ -124,14 +145,63 @@ class TestReadLoopDetection(unittest.TestCase): @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) def test_warning_still_returns_content(self, _mock_ops): - """Even with a warning, the file content is still returned.""" - read_file_tool("/tmp/test.py", task_id="t1") + """Even with a warning (3rd read), the file content is still returned.""" + for _ in range(2): + read_file_tool("/tmp/test.py", task_id="t1") result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) self.assertIn("_warning", result) self.assertIn("content", result) self.assertIn("content of /tmp/test.py", result["content"]) +class TestNotifyOtherToolCall(unittest.TestCase): + """Verify that notify_other_tool_call resets the consecutive counter.""" + + def setUp(self): + clear_read_tracker() + + def tearDown(self): + clear_read_tracker() + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_other_tool_resets_consecutive(self, _mock_ops): + """After another tool runs, re-reading the same file is NOT consecutive.""" + read_file_tool("/tmp/test.py", task_id="t1") + read_file_tool("/tmp/test.py", task_id="t1") + # Simulate a different tool being called + notify_other_tool_call("t1") + # This should be treated as a fresh read (consecutive reset) + result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) + self.assertNotIn("_warning", result) + self.assertIn("content", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_other_tool_prevents_block(self, _mock_ops): + """Agent can keep reading if other tools are used in between.""" + for i in range(10): + read_file_tool("/tmp/test.py", task_id="t1") + notify_other_tool_call("t1") + # After 10 reads interleaved with other tools, still no warning + result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) + self.assertNotIn("_warning", result) + self.assertNotIn("error", result) + self.assertIn("content", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_notify_on_unknown_task_is_safe(self, _mock_ops): + """notify_other_tool_call on a task that hasn't read anything is a no-op.""" + notify_other_tool_call("nonexistent_task") # Should not raise + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_history_survives_notify(self, _mock_ops): + """notify_other_tool_call resets consecutive but preserves read_history.""" + read_file_tool("/tmp/test.py", offset=1, limit=100, task_id="t1") + notify_other_tool_call("t1") + summary = get_read_files_summary("t1") + self.assertEqual(len(summary), 1) + self.assertEqual(summary[0]["path"], "/tmp/test.py") + + class TestReadFilesSummary(unittest.TestCase): """Verify get_read_files_summary returns accurate file-read history.""" @@ -183,6 +253,15 @@ class TestReadFilesSummary(unittest.TestCase): self.assertEqual(len(summary_b), 1) self.assertEqual(summary_b[0]["path"], "/tmp/b.py") + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_summary_unaffected_by_searches(self, _mock_ops): + """Searches should NOT appear in the file-read summary.""" + read_file_tool("/tmp/test.py", task_id="t1") + search_tool("def main", task_id="t1") + summary = get_read_files_summary("t1") + self.assertEqual(len(summary), 1) + self.assertEqual(summary[0]["path"], "/tmp/test.py") + class TestClearReadTracker(unittest.TestCase): """Verify clear_read_tracker resets state properly.""" @@ -211,10 +290,12 @@ class TestClearReadTracker(unittest.TestCase): @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) def test_clear_then_reread_no_warning(self, _mock_ops): - read_file_tool("/tmp/test.py", task_id="t1") + for _ in range(3): + read_file_tool("/tmp/test.py", task_id="t1") clear_read_tracker("t1") result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) self.assertNotIn("_warning", result) + self.assertNotIn("error", result) class TestCompressionFileHistory(unittest.TestCase): @@ -256,7 +337,7 @@ class TestCompressionFileHistory(unittest.TestCase): {"role": "user", "content": "[CONTEXT SUMMARY]: Files were analyzed."}, messages[-1], # last user ] - mock_compressor.last_prompt_tokens = 5000 + mock_compressor.last_prompt_tokens = 1000 # Mock the agent's _compress_context dependencies mock_agent = MagicMock() @@ -272,7 +353,7 @@ class TestCompressionFileHistory(unittest.TestCase): from run_agent import AIAgent result, _ = AIAgent._compress_context( mock_agent, messages, "system prompt", - approx_tokens=5000, task_id="compress_test", + approx_tokens=1000, task_id="compress_test", ) # Find the injected file-read history message @@ -291,7 +372,7 @@ class TestCompressionFileHistory(unittest.TestCase): class TestSearchLoopDetection(unittest.TestCase): - """Verify that search_tool detects and blocks repeated searches.""" + """Verify that search_tool detects and blocks consecutive repeated searches.""" def setUp(self): clear_read_tracker() @@ -306,23 +387,38 @@ class TestSearchLoopDetection(unittest.TestCase): self.assertNotIn("error", result) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) - def test_second_search_has_warning(self, _mock_ops): + def test_second_consecutive_search_no_warning(self, _mock_ops): + """2nd consecutive search should NOT warn (threshold is 3).""" search_tool("def main", task_id="t1") result = json.loads(search_tool("def main", task_id="t1")) - self.assertIn("_warning", result) - self.assertIn("2 times", result["_warning"]) + self.assertNotIn("_warning", result) + self.assertNotIn("error", result) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) - def test_third_search_is_blocked(self, _mock_ops): + def test_third_consecutive_search_has_warning(self, _mock_ops): + """3rd consecutive identical search triggers a warning.""" for _ in range(2): search_tool("def main", task_id="t1") result = json.loads(search_tool("def main", task_id="t1")) + self.assertIn("_warning", result) + self.assertIn("3 times", result["_warning"]) + # Warning still returns results + self.assertIn("matches", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_fourth_consecutive_search_is_blocked(self, _mock_ops): + """4th consecutive identical search is BLOCKED.""" + for _ in range(3): + search_tool("def main", task_id="t1") + result = json.loads(search_tool("def main", task_id="t1")) self.assertIn("error", result) self.assertIn("BLOCKED", result["error"]) self.assertNotIn("matches", result) @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) - def test_different_pattern_no_warning(self, _mock_ops): + def test_different_pattern_resets_consecutive(self, _mock_ops): + """A different search pattern resets the consecutive counter.""" + search_tool("def main", task_id="t1") search_tool("def main", task_id="t1") result = json.loads(search_tool("class Foo", task_id="t1")) self.assertNotIn("_warning", result) @@ -330,10 +426,32 @@ class TestSearchLoopDetection(unittest.TestCase): @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) def test_different_task_isolated(self, _mock_ops): + """Different tasks have separate consecutive counters.""" search_tool("def main", task_id="t1") result = json.loads(search_tool("def main", task_id="t2")) self.assertNotIn("_warning", result) + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_other_tool_resets_search_consecutive(self, _mock_ops): + """notify_other_tool_call resets search consecutive counter too.""" + search_tool("def main", task_id="t1") + search_tool("def main", task_id="t1") + notify_other_tool_call("t1") + result = json.loads(search_tool("def main", task_id="t1")) + self.assertNotIn("_warning", result) + self.assertNotIn("error", result) + + @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) + def test_read_between_searches_resets_consecutive(self, _mock_ops): + """A read_file call between searches resets search consecutive counter.""" + search_tool("def main", task_id="t1") + search_tool("def main", task_id="t1") + # A read changes the last_key, resetting consecutive for the search + read_file_tool("/tmp/test.py", task_id="t1") + result = json.loads(search_tool("def main", task_id="t1")) + self.assertNotIn("_warning", result) + self.assertNotIn("error", result) + class TestTodoInjectionFiltering(unittest.TestCase): """Verify that format_for_injection filters completed/cancelled todos.""" diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index 401fc6ac7..76a12dff8 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -78,7 +78,7 @@ _TOOL_STUBS = { "web_extract": ( "web_extract", "urls: list", - '"""Extract content from URLs. Returns dict with results list of {url, content, error}."""', + '"""Extract content from URLs. Returns dict with results list of {url, title, content, error}."""', '{"urls": urls}', ), "read_file": ( @@ -616,7 +616,7 @@ _TOOL_DOC_LINES = [ " Returns {\"data\": {\"web\": [{\"url\", \"title\", \"description\"}, ...]}}"), ("web_extract", " web_extract(urls: list[str]) -> dict\n" - " Returns {\"results\": [{\"url\", \"content\", \"error\"}, ...]} where content is markdown"), + " Returns {\"results\": [{\"url\", \"title\", \"content\", \"error\"}, ...]} where content is markdown"), ("read_file", " read_file(path: str, offset: int = 1, limit: int = 500) -> dict\n" " Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}"), diff --git a/tools/file_tools.py b/tools/file_tools.py index cf5c12680..f498da251 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -15,7 +15,10 @@ _file_ops_lock = threading.Lock() _file_ops_cache: dict = {} # Track files read per task to detect re-read loops after context compression. -# Key: task_id, Value: dict mapping (path, offset, limit) -> read count +# Per task_id we store: +# "last_key": the key of the most recent read/search call (or None) +# "consecutive": how many times that exact call has been repeated in a row +# "read_history": set of (path, offset, limit) tuples for get_read_files_summary _read_tracker_lock = threading.Lock() _read_tracker: dict = {} @@ -139,28 +142,37 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = result.content = redact_sensitive_text(result.content) result_dict = result.to_dict() - # Track reads to detect re-read loops (e.g. after context compression) - read_key = (path, offset, limit) + # Track reads to detect *consecutive* re-read loops. + # The counter resets whenever any other tool is called in between, + # so only truly back-to-back identical reads trigger warnings/blocks. + read_key = ("read", path, offset, limit) with _read_tracker_lock: - task_reads = _read_tracker.setdefault(task_id, {}) - task_reads[read_key] = task_reads.get(read_key, 0) + 1 - count = task_reads[read_key] + task_data = _read_tracker.setdefault(task_id, { + "last_key": None, "consecutive": 0, "read_history": set(), + }) + task_data["read_history"].add((path, offset, limit)) + if task_data["last_key"] == read_key: + task_data["consecutive"] += 1 + else: + task_data["last_key"] = read_key + task_data["consecutive"] = 1 + count = task_data["consecutive"] - if count >= 3: + if count >= 4: # Hard block: stop returning content to break the loop return json.dumps({ "error": ( - f"BLOCKED: You have read this exact file region {count} times. " + f"BLOCKED: You have read this exact file region {count} times in a row. " "The content has NOT changed. You already have this information. " "STOP re-reading and proceed with your task." ), "path": path, "already_read": count, }, ensure_ascii=False) - elif count > 1: + elif count >= 3: result_dict["_warning"] = ( - f"You have already read this exact file region {count} times in this session. " - "The content has not changed. Use the information you already have instead of re-reading. " + f"You have read this exact file region {count} times consecutively. " + "The content has not changed since your last read. Use the information you already have. " "If you are stuck in a loop, stop reading and proceed with writing or responding." ) @@ -176,9 +188,10 @@ def get_read_files_summary(task_id: str = "default") -> list: compression boundaries. """ with _read_tracker_lock: - task_reads = _read_tracker.get(task_id, {}) - seen_paths = {} - for (path, offset, limit), count in task_reads.items(): + task_data = _read_tracker.get(task_id, {}) + read_history = task_data.get("read_history", set()) + seen_paths: dict = {} + for (path, offset, limit) in read_history: if path not in seen_paths: seen_paths[path] = [] seen_paths[path].append(f"lines {offset}-{offset + limit - 1}") @@ -189,7 +202,12 @@ def get_read_files_summary(task_id: str = "default") -> list: def clear_read_tracker(task_id: str = None): - """Clear the read tracker. Called when starting a new conversation.""" + """Clear the read tracker. + + Call with a task_id to clear just that task, or without to clear all. + Should be called when a session is destroyed to prevent memory leaks + in long-running gateway processes. + """ with _read_tracker_lock: if task_id: _read_tracker.pop(task_id, None) @@ -197,6 +215,22 @@ def clear_read_tracker(task_id: str = None): _read_tracker.clear() +def notify_other_tool_call(task_id: str = "default"): + """Reset consecutive read/search counter for a task. + + Called by the tool dispatcher (model_tools.py) whenever a tool OTHER + than read_file / search_files is executed. This ensures we only warn + or block on *truly consecutive* repeated reads — if the agent does + anything else in between (write, patch, terminal, etc.) the counter + resets and the next read is treated as fresh. + """ + with _read_tracker_lock: + task_data = _read_tracker.get(task_id) + if task_data: + task_data["last_key"] = None + task_data["consecutive"] = 0 + + def write_file_tool(path: str, content: str, task_id: str = "default") -> str: """Write content to a file.""" try: @@ -245,17 +279,23 @@ def search_tool(pattern: str, target: str = "content", path: str = ".", task_id: str = "default") -> str: """Search for content or files.""" try: - # Track searches to detect repeated search loops - search_key = ("search", pattern, target, path, file_glob or "") + # Track searches to detect *consecutive* repeated search loops. + search_key = ("search", pattern, target, str(path), file_glob or "") with _read_tracker_lock: - task_reads = _read_tracker.setdefault(task_id, {}) - task_reads[search_key] = task_reads.get(search_key, 0) + 1 - count = task_reads[search_key] + task_data = _read_tracker.setdefault(task_id, { + "last_key": None, "consecutive": 0, "read_history": set(), + }) + if task_data["last_key"] == search_key: + task_data["consecutive"] += 1 + else: + task_data["last_key"] = search_key + task_data["consecutive"] = 1 + count = task_data["consecutive"] - if count >= 3: + if count >= 4: return json.dumps({ "error": ( - f"BLOCKED: You have run this exact search {count} times. " + f"BLOCKED: You have run this exact search {count} times in a row. " "The results have NOT changed. You already have this information. " "STOP re-searching and proceed with your task." ), @@ -274,9 +314,9 @@ def search_tool(pattern: str, target: str = "content", path: str = ".", m.content = redact_sensitive_text(m.content) result_dict = result.to_dict() - if count > 1: + if count >= 3: result_dict["_warning"] = ( - f"You have run this exact search {count} times in this session. " + f"You have run this exact search {count} times consecutively. " "The results have not changed. Use the information you already have." )