fix: remove stale test skips, fix regex backtracking, file search bug, and test flakiness

Bug fixes: - agent/redact.py: catastrophic regex backtracking in _ENV_ASSIGN_RE — removed re.IGNORECASE and changed [A-Z_]* to [A-Z0-9_]* to restrict matching to actual env var name chars. Without this, the pattern backtracks exponentially on large strings (e.g. 100K tool output), causing test_file_read_guards to time out. - tools/file_operations.py: over-escaped newline in find -printf format string produced literal backslash-n instead of a real newline, breaking file search result parsing (total_count always 1, paths concatenated). Test fixes: - Remove stale pytestmark.skip from 4 test modules that were blanket-skipped as 'Hangs in non-interactive environments' but actually run fine: - test_413_compression.py (12 tests, 25s) - test_file_tools_live.py (71 tests, 24s) - test_code_execution.py (61 tests, 99s) - test_agent_loop_tool_calling.py (has proper OPENROUTER_API_KEY skip already) - test_413_compression.py: fix threshold values in 2 preflight compression tests where context_length was too small for the compressed output to fit in one pass. - test_mcp_probe.py: add missing _MCP_AVAILABLE mock so tests work without MCP SDK. - test_mcp_tool_issue_948.py: inject MCP symbols (StdioServerParameters etc.) when SDK is not installed so patch() targets exist. - test_approve_deny_commands.py: replace time.sleep(0.3) with deterministic polling of _gateway_queues — fixes race condition where resolve fires before threads register their approval entries, causing the test to hang indefinitely. Net effect: +256 tests recovered from skip, 8 real failures fixed.
2026-04-03 13:35:17 +02:00
parent 77a2aad771
commit 6367e1c4c0
9 changed files with 45 additions and 20 deletions
--- a/tests/test_413_compression.py
+++ b/tests/test_413_compression.py
@@ -7,7 +7,7 @@ Verifies that:
 """

 import pytest
-pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
+#pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")



@@ -318,12 +318,13 @@ class TestPreflightCompression:
    def test_preflight_compresses_oversized_history(self, agent):
        """When loaded history exceeds the model's context threshold, compress before API call."""
        agent.compression_enabled = True
-        # Set a very small context so the history is "oversized"
-        agent.context_compressor.context_length = 100
-        agent.context_compressor.threshold_tokens = 85  # 85% of 100
+        # Set a small context so the history is "oversized", but large enough
+        # that the compressed result (2 short messages) fits in a single pass.
+        agent.context_compressor.context_length = 2000
+        agent.context_compressor.threshold_tokens = 200

        # Build a history that will be large enough to trigger preflight
-        # (each message ~20 chars = ~5 tokens, 20 messages = ~100 tokens > 85 threshold)
+        # (each message ~50 chars ≈ 13 tokens, 40 messages ≈ 520 tokens > 200 threshold)
        big_history = []
        for i in range(20):
            big_history.append({"role": "user", "content": f"Message number {i} with some extra text padding"})
@@ -338,7 +339,7 @@ class TestPreflightCompression:
            patch.object(agent, "_save_trajectory"),
            patch.object(agent, "_cleanup_task_resources"),
        ):
-            # Simulate compression reducing messages
+            # Simulate compression reducing messages to a small set that fits
            mock_compress.return_value = (
                [
                    {"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
@@ -411,7 +412,7 @@ class TestToolResultPreflightCompression:
        """When tool results push estimated tokens past threshold, compress before next call."""
        agent.compression_enabled = True
        agent.context_compressor.context_length = 200_000
-        agent.context_compressor.threshold_tokens = 140_000
+        agent.context_compressor.threshold_tokens = 130_000  # below the 135k reported usage
        agent.context_compressor.last_prompt_tokens = 130_000
        agent.context_compressor.last_completion_tokens = 5_000