perf(daytona): bulk upload files in single HTTP call
FileSyncManager now accepts an optional bulk_upload_fn callback. When provided, all changed files are uploaded in one call instead of iterating one-by-one with individual HTTP POSTs. DaytonaEnvironment wires this to sandbox.fs.upload_files() which batches everything into a single multipart POST — ~580 files goes from ~5 min to <2s on init. Parent directories are pre-created in one mkdir -p call. Fixes #7362 (item 1).
This commit is contained in:
@@ -16,7 +16,7 @@ from tools.environments.base import (
|
||||
BaseEnvironment,
|
||||
_ThreadedProcessHandle,
|
||||
)
|
||||
from tools.environments.file_sync import FileSyncManager, iter_sync_files, quoted_rm_command
|
||||
from tools.environments.file_sync import BulkUploadFn, FileSyncManager, iter_sync_files, quoted_rm_command
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -129,6 +129,7 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||
get_files_fn=lambda: iter_sync_files(f"{self._remote_home}/.hermes"),
|
||||
upload_fn=self._daytona_upload,
|
||||
delete_fn=self._daytona_delete,
|
||||
bulk_upload_fn=self._daytona_bulk_upload,
|
||||
)
|
||||
self._sync_manager.sync(force=True)
|
||||
self.init_session()
|
||||
@@ -139,6 +140,30 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||
self._sandbox.process.exec(f"mkdir -p {parent}")
|
||||
self._sandbox.fs.upload_file(host_path, remote_path)
|
||||
|
||||
def _daytona_bulk_upload(self, files: list[tuple[str, str]]) -> None:
|
||||
"""Upload many files in a single HTTP call via Daytona SDK.
|
||||
|
||||
Uses ``sandbox.fs.upload_files()`` which batches all files into one
|
||||
multipart POST, avoiding per-file TLS/HTTP overhead (~580 files
|
||||
goes from ~5 min to <2 s).
|
||||
"""
|
||||
from daytona.common.filesystem import FileUpload
|
||||
|
||||
if not files:
|
||||
return
|
||||
|
||||
# Pre-create all unique parent directories in one shell call
|
||||
parents = sorted({str(Path(remote).parent) for _, remote in files})
|
||||
if parents:
|
||||
mkdir_cmd = "mkdir -p " + " ".join(shlex.quote(p) for p in parents)
|
||||
self._sandbox.process.exec(mkdir_cmd)
|
||||
|
||||
uploads = [
|
||||
FileUpload(source=host_path, destination=remote_path)
|
||||
for host_path, remote_path in files
|
||||
]
|
||||
self._sandbox.fs.upload_files(uploads)
|
||||
|
||||
def _daytona_delete(self, remote_paths: list[str]) -> None:
|
||||
"""Batch-delete remote files via SDK exec."""
|
||||
self._sandbox.process.exec(quoted_rm_command(remote_paths))
|
||||
|
||||
@@ -21,6 +21,7 @@ _FORCE_SYNC_ENV = "HERMES_FORCE_FILE_SYNC"
|
||||
|
||||
# Transport callbacks provided by each backend
|
||||
UploadFn = Callable[[str, str], None] # (host_path, remote_path) -> raises on failure
|
||||
BulkUploadFn = Callable[[list[tuple[str, str]]], None] # [(host_path, remote_path), ...] -> raises on failure
|
||||
DeleteFn = Callable[[list[str]], None] # (remote_paths) -> raises on failure
|
||||
GetFilesFn = Callable[[], list[tuple[str, str]]] # () -> [(host_path, remote_path), ...]
|
||||
|
||||
@@ -76,9 +77,11 @@ class FileSyncManager:
|
||||
upload_fn: UploadFn,
|
||||
delete_fn: DeleteFn,
|
||||
sync_interval: float = _SYNC_INTERVAL_SECONDS,
|
||||
bulk_upload_fn: BulkUploadFn | None = None,
|
||||
):
|
||||
self._get_files_fn = get_files_fn
|
||||
self._upload_fn = upload_fn
|
||||
self._bulk_upload_fn = bulk_upload_fn
|
||||
self._delete_fn = delete_fn
|
||||
self._synced_files: dict[str, tuple[float, int]] = {} # remote_path -> (mtime, size)
|
||||
self._last_sync_time: float = 0.0 # monotonic; 0 ensures first sync runs
|
||||
@@ -129,9 +132,13 @@ class FileSyncManager:
|
||||
logger.debug("file_sync: deleting %d stale remote file(s)", len(to_delete))
|
||||
|
||||
try:
|
||||
for host_path, remote_path in to_upload:
|
||||
self._upload_fn(host_path, remote_path)
|
||||
logger.debug("file_sync: uploaded %s -> %s", host_path, remote_path)
|
||||
if to_upload and self._bulk_upload_fn is not None:
|
||||
self._bulk_upload_fn(to_upload)
|
||||
logger.debug("file_sync: bulk-uploaded %d file(s)", len(to_upload))
|
||||
else:
|
||||
for host_path, remote_path in to_upload:
|
||||
self._upload_fn(host_path, remote_path)
|
||||
logger.debug("file_sync: uploaded %s -> %s", host_path, remote_path)
|
||||
|
||||
if to_delete:
|
||||
self._delete_fn(to_delete)
|
||||
|
||||
Reference in New Issue
Block a user