fix: remove stale test skips, fix regex backtracking, file search bug, and test flakiness
Bug fixes: - agent/redact.py: catastrophic regex backtracking in _ENV_ASSIGN_RE — removed re.IGNORECASE and changed [A-Z_]* to [A-Z0-9_]* to restrict matching to actual env var name chars. Without this, the pattern backtracks exponentially on large strings (e.g. 100K tool output), causing test_file_read_guards to time out. - tools/file_operations.py: over-escaped newline in find -printf format string produced literal backslash-n instead of a real newline, breaking file search result parsing (total_count always 1, paths concatenated). Test fixes: - Remove stale pytestmark.skip from 4 test modules that were blanket-skipped as 'Hangs in non-interactive environments' but actually run fine: - test_413_compression.py (12 tests, 25s) - test_file_tools_live.py (71 tests, 24s) - test_code_execution.py (61 tests, 99s) - test_agent_loop_tool_calling.py (has proper OPENROUTER_API_KEY skip already) - test_413_compression.py: fix threshold values in 2 preflight compression tests where context_length was too small for the compressed output to fit in one pass. - test_mcp_probe.py: add missing _MCP_AVAILABLE mock so tests work without MCP SDK. - test_mcp_tool_issue_948.py: inject MCP symbols (StdioServerParameters etc.) when SDK is not installed so patch() targets exist. - test_approve_deny_commands.py: replace time.sleep(0.3) with deterministic polling of _gateway_queues — fixes race condition where resolve fires before threads register their approval entries, causing the test to hang indefinitely. Net effect: +256 tests recovered from skip, 8 real failures fixed.
This commit is contained in:
@@ -7,7 +7,7 @@ Verifies that:
|
||||
"""
|
||||
|
||||
import pytest
|
||||
pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
|
||||
#pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
|
||||
|
||||
|
||||
|
||||
@@ -318,12 +318,13 @@ class TestPreflightCompression:
|
||||
def test_preflight_compresses_oversized_history(self, agent):
|
||||
"""When loaded history exceeds the model's context threshold, compress before API call."""
|
||||
agent.compression_enabled = True
|
||||
# Set a very small context so the history is "oversized"
|
||||
agent.context_compressor.context_length = 100
|
||||
agent.context_compressor.threshold_tokens = 85 # 85% of 100
|
||||
# Set a small context so the history is "oversized", but large enough
|
||||
# that the compressed result (2 short messages) fits in a single pass.
|
||||
agent.context_compressor.context_length = 2000
|
||||
agent.context_compressor.threshold_tokens = 200
|
||||
|
||||
# Build a history that will be large enough to trigger preflight
|
||||
# (each message ~20 chars = ~5 tokens, 20 messages = ~100 tokens > 85 threshold)
|
||||
# (each message ~50 chars ≈ 13 tokens, 40 messages ≈ 520 tokens > 200 threshold)
|
||||
big_history = []
|
||||
for i in range(20):
|
||||
big_history.append({"role": "user", "content": f"Message number {i} with some extra text padding"})
|
||||
@@ -338,7 +339,7 @@ class TestPreflightCompression:
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
# Simulate compression reducing messages
|
||||
# Simulate compression reducing messages to a small set that fits
|
||||
mock_compress.return_value = (
|
||||
[
|
||||
{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
|
||||
@@ -411,7 +412,7 @@ class TestToolResultPreflightCompression:
|
||||
"""When tool results push estimated tokens past threshold, compress before next call."""
|
||||
agent.compression_enabled = True
|
||||
agent.context_compressor.context_length = 200_000
|
||||
agent.context_compressor.threshold_tokens = 140_000
|
||||
agent.context_compressor.threshold_tokens = 130_000 # below the 135k reported usage
|
||||
agent.context_compressor.last_prompt_tokens = 130_000
|
||||
agent.context_compressor.last_completion_tokens = 5_000
|
||||
|
||||
|
||||
Reference in New Issue
Block a user