test: add reproducible perf benchmark for file sync overhead

Direct env.execute() timing — no LLM in the loop. Measures per-command wall-clock including sync check. Results on SSH: - echo median: 617ms (pure SSH round-trip + spawn overhead) - sync-triggered after 6s wait: 621ms (mtime skip adds ~0ms) - within-interval (no sync): 618ms Confirms mtime skip makes sync overhead unmeasurable.
2026-04-08 15:01:45 -07:00
parent 1f1f297528
commit 41c233cb99
1 changed files with 127 additions and 0 deletions
--- a/tests/tools/test_file_sync_perf.py
+++ b/tests/tools/test_file_sync_perf.py
@@ -0,0 +1,127 @@
+"""Reproducible perf benchmark for file sync overhead.
+
+Measures actual env.execute() wall-clock time, no LLM in the loop.
+Run with: uv run pytest tests/tools/test_file_sync_perf.py -v -o "addopts=" -s
+
+Requires backends to be configured (SSH host, Modal creds, etc).
+Skip markers gate each backend.
+"""
+
+import statistics
+import time
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Backend fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def local_env():
+    from tools.environments.local import LocalEnvironment
+    env = LocalEnvironment(cwd="/tmp", timeout=30)
+    yield env
+    env.cleanup()
+
+
+@pytest.fixture
+def ssh_env():
+    import os
+    host = os.environ.get("TERMINAL_SSH_HOST")
+    user = os.environ.get("TERMINAL_SSH_USER")
+    if not host or not user:
+        pytest.skip("TERMINAL_SSH_HOST and TERMINAL_SSH_USER required")
+    from tools.environments.ssh import SSHEnvironment
+    env = SSHEnvironment(host=host, user=user, cwd="/tmp", timeout=30)
+    yield env
+    env.cleanup()
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _time_executions(env, command: str, n: int = 10) -> list[float]:
+    """Run *command* n times and return per-call wall-clock durations."""
+    durations = []
+    for _ in range(n):
+        t0 = time.monotonic()
+        result = env.execute(command, timeout=10)
+        elapsed = time.monotonic() - t0
+        durations.append(elapsed)
+        assert result.get("returncode", result.get("exit_code", -1)) == 0, \
+            f"command failed: {result}"
+    return durations
+
+
+def _report(label: str, durations: list[float]):
+    """Print timing stats."""
+    med = statistics.median(durations)
+    mean = statistics.mean(durations)
+    p95 = sorted(durations)[int(len(durations) * 0.95)]
+    print(f"\n  {label}:")
+    print(f"    n={len(durations)}  median={med*1000:.0f}ms  mean={mean*1000:.0f}ms  p95={p95*1000:.0f}ms")
+    print(f"    raw: {[f'{d*1000:.0f}ms' for d in durations]}")
+    return med
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestLocalPerf:
+    """Local baseline — no file sync, no network. Sets the floor."""
+
+    def test_echo_latency(self, local_env):
+        durations = _time_executions(local_env, "echo hello", n=20)
+        med = _report("local echo", durations)
+        # Spawn-per-call overhead should be < 500ms
+        assert med < 0.5, f"local echo median {med*1000:.0f}ms exceeds 500ms"
+
+
+@pytest.mark.ssh
+class TestSSHPerf:
+    """SSH with FileSyncManager — mtime skip should make sync ~0ms."""
+
+    def test_echo_latency(self, ssh_env):
+        """Sequential echo commands — measures per-command overhead including sync check."""
+        durations = _time_executions(ssh_env, "echo hello", n=20)
+        med = _report("ssh echo (with sync check)", durations)
+        # SSH round-trip + spawn-per-call, but sync should be ~0ms (rate limited)
+        assert med < 2.0, f"ssh echo median {med*1000:.0f}ms exceeds 2000ms"
+
+    def test_sync_overhead_after_interval(self, ssh_env):
+        """Measure sync cost when the rate-limit window has expired.
+
+        Sleep past the 5s interval, then time the next command which
+        triggers a real sync cycle (but with mtime skip, should be fast).
+        """
+        # Warm up
+        ssh_env.execute("echo warmup", timeout=10)
+
+        # Wait for sync interval to expire
+        time.sleep(6)
+
+        # This command will trigger a real sync cycle
+        t0 = time.monotonic()
+        result = ssh_env.execute("echo after-interval", timeout=10)
+        elapsed = time.monotonic() - t0
+
+        print(f"\n  ssh echo after 6s wait (sync triggered): {elapsed*1000:.0f}ms")
+        assert result.get("returncode", result.get("exit_code", -1)) == 0
+
+        # Even with sync triggered, mtime skip should keep it fast
+        # Old rsync approach: ~2-3s. New mtime skip: should be < 1.5s
+        assert elapsed < 1.5, f"sync-triggered command took {elapsed*1000:.0f}ms (expected < 1500ms)"
+
+    def test_no_sync_within_interval(self, ssh_env):
+        """Rapid sequential commands within 5s window — no sync at all."""
+        # First command triggers sync
+        ssh_env.execute("echo prime", timeout=10)
+
+        # Immediately run 10 more — all within rate-limit window
+        durations = _time_executions(ssh_env, "echo rapid", n=10)
+        med = _report("ssh echo (within interval, no sync)", durations)
+
+        # Should be pure SSH overhead, no sync
+        assert med < 1.5, f"within-interval median {med*1000:.0f}ms exceeds 1500ms"