fix: detect and warn on file re-read loops after context compression

When context compression summarizes conversation history, the agent
loses track of which files it already read and re-reads them in a loop.
Users report the agent reading the same files endlessly without writing.

Root cause: context compression is lossy — file contents and read history
are lost in the summary. After compression, the model thinks it hasn't
examined the files yet and reads them again.

Fix (two-part):
1. Track file reads per task in file_tools.py. When the same file region
   is read again, include a _warning in the response telling the model
   to stop re-reading and use existing information.
2. After context compression, inject a structured message listing all
   files already read in the session with explicit "do NOT re-read"
   instruction, preserving read history across compression boundaries.

Adds 16 tests covering warning detection, task isolation, summary
accuracy, tracker cleanup, and compression history injection.
This commit is contained in:
0xbyt4
2026-03-08 20:44:42 +03:00
parent cd77c7100c
commit 9eee529a7f
3 changed files with 349 additions and 6 deletions

View File

@@ -2463,7 +2463,7 @@ class AIAgent:
if messages and messages[-1].get("_flush_sentinel") == _sentinel:
messages.pop()
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None) -> tuple:
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default") -> tuple:
"""Compress conversation context and split the session in SQLite.
Returns:
@@ -2478,6 +2478,25 @@ class AIAgent:
if todo_snapshot:
compressed.append({"role": "user", "content": todo_snapshot})
# Preserve file-read history so the model doesn't re-read files
# it already examined before compression.
try:
from tools.file_tools import get_read_files_summary
read_files = get_read_files_summary(task_id)
if read_files:
file_list = "\n".join(
f" - {f['path']} ({', '.join(f['regions'])})"
for f in read_files
)
compressed.append({"role": "user", "content": (
"[Files already read in this session — do NOT re-read these]\n"
f"{file_list}\n"
"Use the information from the context summary above. "
"Proceed with writing, editing, or responding."
)})
except Exception:
pass # Don't break compression if file tracking fails
self._invalidate_system_prompt()
new_system_prompt = self._build_system_prompt(system_message)
self._cached_system_prompt = new_system_prompt
@@ -2999,7 +3018,8 @@ class AIAgent:
for _pass in range(3):
_orig_len = len(messages)
messages, active_system_prompt = self._compress_context(
messages, system_message, approx_tokens=_preflight_tokens
messages, system_message, approx_tokens=_preflight_tokens,
task_id=effective_task_id,
)
if len(messages) >= _orig_len:
break # Cannot compress further
@@ -3461,7 +3481,8 @@ class AIAgent:
original_len = len(messages)
messages, active_system_prompt = self._compress_context(
messages, system_message, approx_tokens=approx_tokens
messages, system_message, approx_tokens=approx_tokens,
task_id=effective_task_id,
)
if len(messages) < original_len:
@@ -3528,7 +3549,8 @@ class AIAgent:
original_len = len(messages)
messages, active_system_prompt = self._compress_context(
messages, system_message, approx_tokens=approx_tokens
messages, system_message, approx_tokens=approx_tokens,
task_id=effective_task_id,
)
if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
@@ -3848,7 +3870,8 @@ class AIAgent:
if self.compression_enabled and self.context_compressor.should_compress():
messages, active_system_prompt = self._compress_context(
messages, system_message,
approx_tokens=self.context_compressor.last_prompt_tokens
approx_tokens=self.context_compressor.last_prompt_tokens,
task_id=effective_task_id,
)
# Save session log incrementally (so progress is visible even if interrupted)

View File

@@ -0,0 +1,271 @@
#!/usr/bin/env python3
"""
Tests for the read-loop detection mechanism in file_tools.
Verifies that:
1. Re-reading the same file region produces a warning
2. Different regions/files don't trigger false warnings
3. Task isolation works (different tasks have separate trackers)
4. get_read_files_summary returns accurate history
5. clear_read_tracker resets state
6. Context compression injects file-read history
Run with: python -m pytest tests/tools/test_read_loop_detection.py -v
"""
import json
import unittest
from unittest.mock import patch, MagicMock
from tools.file_tools import (
read_file_tool,
get_read_files_summary,
clear_read_tracker,
_read_tracker,
)
class _FakeReadResult:
"""Minimal stand-in for FileOperations.read_file return value."""
def __init__(self, content="line1\nline2\n", total_lines=2):
self._content = content
self._total_lines = total_lines
def to_dict(self):
return {"content": self._content, "total_lines": self._total_lines}
def _fake_read_file(path, offset=1, limit=500):
return _FakeReadResult(content=f"content of {path}", total_lines=10)
def _make_fake_file_ops():
fake = MagicMock()
fake.read_file = _fake_read_file
return fake
class TestReadLoopDetection(unittest.TestCase):
"""Verify that read_file_tool detects and warns on re-reads."""
def setUp(self):
clear_read_tracker()
def tearDown(self):
clear_read_tracker()
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_first_read_has_no_warning(self, _mock_ops):
result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
self.assertNotIn("_warning", result)
self.assertIn("content", result)
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_second_read_same_region_has_warning(self, _mock_ops):
read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
result = json.loads(
read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
)
self.assertIn("_warning", result)
self.assertIn("already read", result["_warning"])
self.assertIn("2 times", result["_warning"])
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_third_read_increments_count(self, _mock_ops):
for _ in range(2):
read_file_tool("/tmp/test.py", task_id="t1")
result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
self.assertIn("3 times", result["_warning"])
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_different_region_no_warning(self, _mock_ops):
read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
result = json.loads(
read_file_tool("/tmp/test.py", offset=501, limit=500, task_id="t1")
)
self.assertNotIn("_warning", result)
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_different_file_no_warning(self, _mock_ops):
read_file_tool("/tmp/a.py", task_id="t1")
result = json.loads(read_file_tool("/tmp/b.py", task_id="t1"))
self.assertNotIn("_warning", result)
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_different_tasks_isolated(self, _mock_ops):
read_file_tool("/tmp/test.py", task_id="task_a")
result = json.loads(
read_file_tool("/tmp/test.py", task_id="task_b")
)
self.assertNotIn("_warning", result)
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_warning_still_returns_content(self, _mock_ops):
"""Even with a warning, the file content is still returned."""
read_file_tool("/tmp/test.py", task_id="t1")
result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
self.assertIn("_warning", result)
self.assertIn("content", result)
self.assertIn("content of /tmp/test.py", result["content"])
class TestReadFilesSummary(unittest.TestCase):
"""Verify get_read_files_summary returns accurate file-read history."""
def setUp(self):
clear_read_tracker()
def tearDown(self):
clear_read_tracker()
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_empty_when_no_reads(self, _mock_ops):
summary = get_read_files_summary("t1")
self.assertEqual(summary, [])
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_single_file_single_region(self, _mock_ops):
read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
summary = get_read_files_summary("t1")
self.assertEqual(len(summary), 1)
self.assertEqual(summary[0]["path"], "/tmp/test.py")
self.assertIn("lines 1-500", summary[0]["regions"])
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_single_file_multiple_regions(self, _mock_ops):
read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1")
read_file_tool("/tmp/test.py", offset=501, limit=500, task_id="t1")
summary = get_read_files_summary("t1")
self.assertEqual(len(summary), 1)
self.assertEqual(len(summary[0]["regions"]), 2)
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_multiple_files(self, _mock_ops):
read_file_tool("/tmp/a.py", task_id="t1")
read_file_tool("/tmp/b.py", task_id="t1")
summary = get_read_files_summary("t1")
self.assertEqual(len(summary), 2)
paths = [s["path"] for s in summary]
self.assertIn("/tmp/a.py", paths)
self.assertIn("/tmp/b.py", paths)
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_different_task_has_separate_summary(self, _mock_ops):
read_file_tool("/tmp/a.py", task_id="task_a")
read_file_tool("/tmp/b.py", task_id="task_b")
summary_a = get_read_files_summary("task_a")
summary_b = get_read_files_summary("task_b")
self.assertEqual(len(summary_a), 1)
self.assertEqual(summary_a[0]["path"], "/tmp/a.py")
self.assertEqual(len(summary_b), 1)
self.assertEqual(summary_b[0]["path"], "/tmp/b.py")
class TestClearReadTracker(unittest.TestCase):
"""Verify clear_read_tracker resets state properly."""
def setUp(self):
clear_read_tracker()
def tearDown(self):
clear_read_tracker()
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_clear_specific_task(self, _mock_ops):
read_file_tool("/tmp/test.py", task_id="t1")
read_file_tool("/tmp/test.py", task_id="t2")
clear_read_tracker("t1")
self.assertEqual(get_read_files_summary("t1"), [])
self.assertEqual(len(get_read_files_summary("t2")), 1)
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_clear_all(self, _mock_ops):
read_file_tool("/tmp/test.py", task_id="t1")
read_file_tool("/tmp/test.py", task_id="t2")
clear_read_tracker()
self.assertEqual(get_read_files_summary("t1"), [])
self.assertEqual(get_read_files_summary("t2"), [])
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_clear_then_reread_no_warning(self, _mock_ops):
read_file_tool("/tmp/test.py", task_id="t1")
clear_read_tracker("t1")
result = json.loads(read_file_tool("/tmp/test.py", task_id="t1"))
self.assertNotIn("_warning", result)
class TestCompressionFileHistory(unittest.TestCase):
"""Verify that _compress_context injects file-read history."""
def setUp(self):
clear_read_tracker()
def tearDown(self):
clear_read_tracker()
@patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops())
def test_compress_context_includes_read_files(self, _mock_ops):
"""After reading files, _compress_context should inject a message
listing which files were already read."""
# Simulate reads
read_file_tool("/tmp/foo.py", offset=1, limit=100, task_id="compress_test")
read_file_tool("/tmp/bar.py", offset=1, limit=200, task_id="compress_test")
# Build minimal messages for compression (need enough messages)
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Analyze the codebase."},
{"role": "assistant", "content": "I'll read the files."},
{"role": "user", "content": "Continue."},
{"role": "assistant", "content": "Reading more files."},
{"role": "user", "content": "What did you find?"},
{"role": "assistant", "content": "Here are my findings."},
{"role": "user", "content": "Great, write the fix."},
{"role": "assistant", "content": "Working on it."},
{"role": "user", "content": "Status?"},
]
# Mock the compressor to return a simple compression
mock_compressor = MagicMock()
mock_compressor.compress.return_value = [
messages[0], # system
messages[1], # first user
{"role": "user", "content": "[CONTEXT SUMMARY]: Files were analyzed."},
messages[-1], # last user
]
mock_compressor.last_prompt_tokens = 5000
# Mock the agent's _compress_context dependencies
mock_agent = MagicMock()
mock_agent.context_compressor = mock_compressor
mock_agent._todo_store.format_for_injection.return_value = None
mock_agent._session_db = None
mock_agent.quiet_mode = True
mock_agent._invalidate_system_prompt = MagicMock()
mock_agent._build_system_prompt = MagicMock(return_value="system prompt")
mock_agent._cached_system_prompt = None
# Call the real _compress_context
from run_agent import AIAgent
result, _ = AIAgent._compress_context(
mock_agent, messages, "system prompt",
approx_tokens=5000, task_id="compress_test",
)
# Find the injected file-read history message
file_history_msgs = [
m for m in result
if isinstance(m.get("content"), str)
and "already read" in m.get("content", "").lower()
]
self.assertEqual(len(file_history_msgs), 1,
"Should inject exactly one file-read history message")
history_content = file_history_msgs[0]["content"]
self.assertIn("/tmp/foo.py", history_content)
self.assertIn("/tmp/bar.py", history_content)
self.assertIn("do NOT re-read", history_content)
if __name__ == "__main__":
unittest.main()

View File

@@ -13,6 +13,11 @@ logger = logging.getLogger(__name__)
_file_ops_lock = threading.Lock()
_file_ops_cache: dict = {}
# Track files read per task to detect re-read loops after context compression.
# Key: task_id, Value: dict mapping (path, offset, limit) -> read count
_read_tracker_lock = threading.Lock()
_read_tracker: dict = {}
def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
"""Get or create ShellFileOperations for a terminal environment.
@@ -128,11 +133,55 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
try:
file_ops = _get_file_ops(task_id)
result = file_ops.read_file(path, offset, limit)
return json.dumps(result.to_dict(), ensure_ascii=False)
result_dict = result.to_dict()
# Track reads to detect re-read loops (e.g. after context compression)
read_key = (path, offset, limit)
with _read_tracker_lock:
task_reads = _read_tracker.setdefault(task_id, {})
task_reads[read_key] = task_reads.get(read_key, 0) + 1
count = task_reads[read_key]
if count > 1:
result_dict["_warning"] = (
f"You have already read this exact file region {count} times in this session. "
"The content has not changed. Use the information you already have instead of re-reading. "
"If you are stuck in a loop, stop reading and proceed with writing or responding."
)
return json.dumps(result_dict, ensure_ascii=False)
except Exception as e:
return json.dumps({"error": str(e)}, ensure_ascii=False)
def get_read_files_summary(task_id: str = "default") -> list:
"""Return a list of files read in this session for the given task.
Used by context compression to preserve file-read history across
compression boundaries.
"""
with _read_tracker_lock:
task_reads = _read_tracker.get(task_id, {})
seen_paths = {}
for (path, offset, limit), count in task_reads.items():
if path not in seen_paths:
seen_paths[path] = []
seen_paths[path].append(f"lines {offset}-{offset + limit - 1}")
return [
{"path": p, "regions": regions}
for p, regions in sorted(seen_paths.items())
]
def clear_read_tracker(task_id: str = None):
"""Clear the read tracker. Called when starting a new conversation."""
with _read_tracker_lock:
if task_id:
_read_tracker.pop(task_id, None)
else:
_read_tracker.clear()
def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
"""Write content to a file."""
try: