feat: mount skills directory into all remote backends with live sync (#3890)

Skills with scripts/, templates/, and references/ subdirectories need those files available inside sandboxed execution environments. Previously the skills directory was missing entirely from remote backends. Live sync — files stay current as credentials refresh and skills update: - Docker/Singularity: bind mounts are inherently live (host changes visible immediately) - Modal: _sync_files() runs before each command with mtime+size caching, pushing only changed credential and skill files (~13μs no-op overhead) - SSH: rsync --safe-links before each command (naturally incremental) - Daytona: _upload_if_changed() with mtime+size caching before each command Security — symlink filtering: - Docker/Singularity: sanitized temp copy when symlinks detected - Modal/Daytona: iter_skills_files() skips symlinks - SSH: rsync --safe-links skips symlinks pointing outside source tree - Temp dir cleanup via atexit + reuse across calls Non-root user support: - SSH: detects remote home via echo $HOME, syncs to $HOME/.hermes/ - Daytona: detects sandbox home before sync, uploads to $HOME/.hermes/ - Docker/Modal/Singularity: run as root, /root/.hermes/ is correct Also: - credential_files.py: fix name/path key fallback in required_credential_files - Singularity, SSH, Daytona: gained credential file support - 14 tests covering symlink filtering, name/path fallback, iter_skills_files
2026-03-30 02:45:41 -07:00
parent 791f4e94b2
commit 5148682b43
8 changed files with 494 additions and 179 deletions
--- a/tools/credential_files.py
+++ b/tools/credential_files.py
@@ -83,7 +83,7 @@ def register_credential_files(
        if isinstance(entry, str):
            rel_path = entry.strip()
        elif isinstance(entry, dict):
-            rel_path = (entry.get("path") or "").strip()
+            rel_path = (entry.get("path") or entry.get("name") or "").strip()
        else:
            continue
        if not rel_path:
@@ -152,6 +152,107 @@ def get_credential_file_mounts() -> List[Dict[str, str]]:
    ]


+def get_skills_directory_mount(
+    container_base: str = "/root/.hermes",
+) -> Dict[str, str] | None:
+    """Return mount info for a symlink-safe copy of the skills directory.
+
+    Skills may include ``scripts/``, ``templates/``, and ``references/``
+    subdirectories that the agent needs to execute inside remote sandboxes.
+
+    **Security:** Bind mounts follow symlinks, so a malicious symlink inside
+    the skills tree could expose arbitrary host files to the container.  When
+    symlinks are detected, this function creates a sanitized copy (regular
+    files only) in a temp directory and returns that path instead.  When no
+    symlinks are present (the common case), the original directory is returned
+    directly with zero overhead.
+
+    Returns a dict with ``host_path`` and ``container_path`` keys, or None.
+    """
+    hermes_home = _resolve_hermes_home()
+    skills_dir = hermes_home / "skills"
+    if not skills_dir.is_dir():
+        return None
+
+    host_path = _safe_skills_path(skills_dir)
+    return {
+        "host_path": host_path,
+        "container_path": f"{container_base.rstrip('/')}/skills",
+    }
+
+
+_safe_skills_tempdir: Path | None = None
+
+
+def _safe_skills_path(skills_dir: Path) -> str:
+    """Return *skills_dir* if symlink-free, else a sanitized temp copy."""
+    global _safe_skills_tempdir
+
+    symlinks = [p for p in skills_dir.rglob("*") if p.is_symlink()]
+    if not symlinks:
+        return str(skills_dir)
+
+    for link in symlinks:
+        logger.warning("credential_files: skipping symlink in skills dir: %s -> %s",
+                       link, os.readlink(link))
+
+    import atexit
+    import shutil
+    import tempfile
+
+    # Reuse the same temp dir across calls to avoid accumulation.
+    if _safe_skills_tempdir and _safe_skills_tempdir.is_dir():
+        shutil.rmtree(_safe_skills_tempdir, ignore_errors=True)
+
+    safe_dir = Path(tempfile.mkdtemp(prefix="hermes-skills-safe-"))
+    _safe_skills_tempdir = safe_dir
+
+    for item in skills_dir.rglob("*"):
+        if item.is_symlink():
+            continue
+        rel = item.relative_to(skills_dir)
+        target = safe_dir / rel
+        if item.is_dir():
+            target.mkdir(parents=True, exist_ok=True)
+        elif item.is_file():
+            target.parent.mkdir(parents=True, exist_ok=True)
+            shutil.copy2(str(item), str(target))
+
+    def _cleanup():
+        if safe_dir.is_dir():
+            shutil.rmtree(safe_dir, ignore_errors=True)
+
+    atexit.register(_cleanup)
+    logger.info("credential_files: created symlink-safe skills copy at %s", safe_dir)
+    return str(safe_dir)
+
+
+def iter_skills_files(
+    container_base: str = "/root/.hermes",
+) -> List[Dict[str, str]]:
+    """Yield individual (host_path, container_path) entries for skills files.
+
+    Skips symlinks entirely.  Preferred for backends that upload files
+    individually (Daytona, Modal) rather than mounting a directory.
+    """
+    hermes_home = _resolve_hermes_home()
+    skills_dir = hermes_home / "skills"
+    if not skills_dir.is_dir():
+        return []
+
+    container_root = f"{container_base.rstrip('/')}/skills"
+    result: List[Dict[str, str]] = []
+    for item in skills_dir.rglob("*"):
+        if item.is_symlink() or not item.is_file():
+            continue
+        rel = item.relative_to(skills_dir)
+        result.append({
+            "host_path": str(item),
+            "container_path": f"{container_root}/{rel}",
+        })
+    return result
+
+
 def clear_credential_files() -> None:
    """Reset the skill-scoped registry (e.g. on session reset)."""
    _registered_files.clear()
--- a/tools/environments/daytona.py
+++ b/tools/environments/daytona.py
@@ -113,15 +113,61 @@ class DaytonaEnvironment(BaseEnvironment):
            logger.info("Daytona: created sandbox %s for task %s",
                        self._sandbox.id, task_id)

-        # Resolve cwd: detect actual home dir inside the sandbox
-        if self._requested_cwd in ("~", "/home/daytona"):
-            try:
-                home = self._sandbox.process.exec("echo $HOME").result.strip()
-                if home:
+        # Detect remote home dir first so mounts go to the right place.
+        self._remote_home = "/root"
+        try:
+            home = self._sandbox.process.exec("echo $HOME").result.strip()
+            if home:
+                self._remote_home = home
+                if self._requested_cwd in ("~", "/home/daytona"):
                    self.cwd = home
-            except Exception:
-                pass  # leave cwd as-is; sandbox will use its own default
-            logger.info("Daytona: resolved cwd to %s", self.cwd)
+        except Exception:
+            pass
+        logger.info("Daytona: resolved home to %s, cwd to %s", self._remote_home, self.cwd)
+
+        # Track synced files to avoid redundant uploads.
+        # Key: remote_path, Value: (mtime, size)
+        self._synced_files: Dict[str, tuple] = {}
+
+        # Upload credential files and skills directory into the sandbox.
+        self._sync_skills_and_credentials()
+
+    def _upload_if_changed(self, host_path: str, remote_path: str) -> bool:
+        """Upload a file if its mtime/size changed since last sync."""
+        hp = Path(host_path)
+        try:
+            stat = hp.stat()
+            file_key = (stat.st_mtime, stat.st_size)
+        except OSError:
+            return False
+        if self._synced_files.get(remote_path) == file_key:
+            return False
+        try:
+            parent = str(Path(remote_path).parent)
+            self._sandbox.process.exec(f"mkdir -p {parent}")
+            self._sandbox.fs.upload_file(host_path, remote_path)
+            self._synced_files[remote_path] = file_key
+            return True
+        except Exception as e:
+            logger.debug("Daytona: upload failed %s: %s", host_path, e)
+            return False
+
+    def _sync_skills_and_credentials(self) -> None:
+        """Upload changed credential files and skill files into the sandbox."""
+        container_base = f"{self._remote_home}/.hermes"
+        try:
+            from tools.credential_files import get_credential_file_mounts, iter_skills_files
+
+            for mount_entry in get_credential_file_mounts():
+                remote_path = mount_entry["container_path"].replace("/root/.hermes", container_base, 1)
+                if self._upload_if_changed(mount_entry["host_path"], remote_path):
+                    logger.debug("Daytona: synced credential %s", remote_path)
+
+            for entry in iter_skills_files(container_base=container_base):
+                if self._upload_if_changed(entry["host_path"], entry["container_path"]):
+                    logger.debug("Daytona: synced skill %s", entry["container_path"])
+        except Exception as e:
+            logger.debug("Daytona: could not sync skills/credentials: %s", e)

    def _ensure_sandbox_ready(self):
        """Restart sandbox if it was stopped (e.g., by a previous interrupt)."""
@@ -191,6 +237,9 @@ class DaytonaEnvironment(BaseEnvironment):
                stdin_data: Optional[str] = None) -> dict:
        with self._lock:
            self._ensure_sandbox_ready()
+        # Incremental sync before each command so mid-session credential
+        # refreshes and skill updates are picked up.
+        self._sync_skills_and_credentials()

        if stdin_data is not None:
            marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@@ -315,7 +315,7 @@ class DockerEnvironment(BaseEnvironment):
        # Mount credential files (OAuth tokens, etc.) declared by skills.
        # Read-only so the container can authenticate but not modify host creds.
        try:
-            from tools.credential_files import get_credential_file_mounts
+            from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount

            for mount_entry in get_credential_file_mounts():
                volume_args.extend([
@@ -327,6 +327,20 @@ class DockerEnvironment(BaseEnvironment):
                    mount_entry["host_path"],
                    mount_entry["container_path"],
                )
+
+            # Mount the skills directory so skill scripts/templates are
+            # available inside the container at the same relative path.
+            skills_mount = get_skills_directory_mount()
+            if skills_mount:
+                volume_args.extend([
+                    "-v",
+                    f"{skills_mount['host_path']}:{skills_mount['container_path']}:ro",
+                ])
+                logger.info(
+                    "Docker: mounting skills dir %s -> %s",
+                    skills_mount["host_path"],
+                    skills_mount["container_path"],
+                )
        except Exception as e:
            logger.debug("Docker: could not load credential file mounts: %s", e)

--- a/tools/environments/modal.py
+++ b/tools/environments/modal.py
@@ -142,7 +142,7 @@ class ModalEnvironment(BaseEnvironment):
        # external services but can't modify the host's credentials.
        cred_mounts = []
        try:
-            from tools.credential_files import get_credential_file_mounts
+            from tools.credential_files import get_credential_file_mounts, iter_skills_files

            for mount_entry in get_credential_file_mounts():
                cred_mounts.append(
@@ -156,6 +156,18 @@ class ModalEnvironment(BaseEnvironment):
                    mount_entry["host_path"],
                    mount_entry["container_path"],
                )
+
+            # Mount individual skill files (symlinks filtered out).
+            skills_files = iter_skills_files()
+            for entry in skills_files:
+                cred_mounts.append(
+                    _modal.Mount.from_local_file(
+                        entry["host_path"],
+                        remote_path=entry["container_path"],
+                    )
+                )
+            if skills_files:
+                logger.info("Modal: mounting %d skill files", len(skills_files))
        except Exception as e:
            logger.debug("Modal: could not load credential file mounts: %s", e)

@@ -184,72 +196,69 @@ class ModalEnvironment(BaseEnvironment):
        self._app, self._sandbox = self._worker.run_coroutine(
            _create_sandbox(), timeout=300
        )
-        # Track synced credential files to avoid redundant pushes.
+        # Track synced files to avoid redundant pushes.
        # Key: container_path, Value: (mtime, size) of last synced version.
-        self._synced_creds: Dict[str, tuple] = {}
+        self._synced_files: Dict[str, tuple] = {}
        logger.info("Modal: sandbox created (task=%s)", self._task_id)

-    def _sync_credential_files(self) -> None:
-        """Push credential files into the running sandbox.
+    def _push_file_to_sandbox(self, host_path: str, container_path: str) -> bool:
+        """Push a single file into the sandbox if changed. Returns True if synced."""
+        hp = Path(host_path)
+        try:
+            stat = hp.stat()
+            file_key = (stat.st_mtime, stat.st_size)
+        except OSError:
+            return False

-        Mounts are set at sandbox creation, but credentials may be created
-        later (e.g. OAuth setup mid-session).  This writes the current file
-        content into the sandbox via exec(), so new/updated credentials are
-        available without recreating the sandbox.
+        if self._synced_files.get(container_path) == file_key:
+            return False
+
+        try:
+            content = hp.read_bytes()
+        except Exception:
+            return False
+
+        import base64
+        b64 = base64.b64encode(content).decode("ascii")
+        container_dir = str(Path(container_path).parent)
+        cmd = (
+            f"mkdir -p {shlex.quote(container_dir)} && "
+            f"echo {shlex.quote(b64)} | base64 -d > {shlex.quote(container_path)}"
+        )
+
+        async def _write():
+            proc = await self._sandbox.exec.aio("bash", "-c", cmd)
+            await proc.wait.aio()
+
+        self._worker.run_coroutine(_write(), timeout=15)
+        self._synced_files[container_path] = file_key
+        return True
+
+    def _sync_files(self) -> None:
+        """Push credential files and skill files into the running sandbox.
+
+        Runs before each command. Uses mtime+size caching so only changed
+        files are pushed (~13μs overhead in the no-op case).
        """
        try:
-            from tools.credential_files import get_credential_file_mounts
+            from tools.credential_files import get_credential_file_mounts, iter_skills_files

-            mounts = get_credential_file_mounts()
-            if not mounts:
-                return
+            for entry in get_credential_file_mounts():
+                if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
+                    logger.debug("Modal: synced credential %s", entry["container_path"])

-            for entry in mounts:
-                host_path = entry["host_path"]
-                container_path = entry["container_path"]
-                hp = Path(host_path)
-                try:
-                    stat = hp.stat()
-                    file_key = (stat.st_mtime, stat.st_size)
-                except OSError:
-                    continue
-
-                # Skip if already synced with same mtime+size
-                if self._synced_creds.get(container_path) == file_key:
-                    continue
-
-                try:
-                    content = hp.read_text(encoding="utf-8")
-                except Exception:
-                    continue
-
-                # Write via base64 to avoid shell escaping issues with JSON
-                import base64
-                b64 = base64.b64encode(content.encode("utf-8")).decode("ascii")
-                container_dir = str(Path(container_path).parent)
-                cmd = (
-                    f"mkdir -p {shlex.quote(container_dir)} && "
-                    f"echo {shlex.quote(b64)} | base64 -d > {shlex.quote(container_path)}"
-                )
-
-                _cp = container_path  # capture for closure
-
-                async def _write():
-                    proc = await self._sandbox.exec.aio("bash", "-c", cmd)
-                    await proc.wait.aio()
-
-                self._worker.run_coroutine(_write(), timeout=15)
-                self._synced_creds[container_path] = file_key
-                logger.debug("Modal: synced credential %s -> %s", host_path, container_path)
+            for entry in iter_skills_files():
+                if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
+                    logger.debug("Modal: synced skill file %s", entry["container_path"])
        except Exception as e:
-            logger.debug("Modal: credential file sync failed: %s", e)
+            logger.debug("Modal: file sync failed: %s", e)

    def execute(self, command: str, cwd: str = "", *,
                timeout: int | None = None,
                stdin_data: str | None = None) -> dict:
        # Sync credential files before each command so mid-session
        # OAuth setups are picked up without requiring a restart.
-        self._sync_credential_files()
+        self._sync_files()

        if stdin_data is not None:
            marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
--- a/tools/environments/singularity.py
+++ b/tools/environments/singularity.py
@@ -254,6 +254,28 @@ class SingularityEnvironment(BaseEnvironment):
        else:
            cmd.append("--writable-tmpfs")

+        # Mount credential files and skills directory (read-only).
+        try:
+            from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
+
+            for mount_entry in get_credential_file_mounts():
+                cmd.extend(["--bind", f"{mount_entry['host_path']}:{mount_entry['container_path']}:ro"])
+                logger.info(
+                    "Singularity: binding credential %s -> %s",
+                    mount_entry["host_path"],
+                    mount_entry["container_path"],
+                )
+            skills_mount = get_skills_directory_mount()
+            if skills_mount:
+                cmd.extend(["--bind", f"{skills_mount['host_path']}:{skills_mount['container_path']}:ro"])
+                logger.info(
+                    "Singularity: binding skills dir %s -> %s",
+                    skills_mount["host_path"],
+                    skills_mount["container_path"],
+                )
+        except Exception as e:
+            logger.debug("Singularity: could not load credential/skills mounts: %s", e)
+
        # Resource limits (cgroup-based, may require root or appropriate config)
        if self._memory > 0:
            cmd.extend(["--memory", f"{self._memory}M"])
--- a/tools/environments/ssh.py
+++ b/tools/environments/ssh.py
@@ -55,6 +55,8 @@ class SSHEnvironment(PersistentShellMixin, BaseEnvironment):
        self.control_socket = self.control_dir / f"{user}@{host}:{port}.sock"
        _ensure_ssh_available()
        self._establish_connection()
+        self._remote_home = self._detect_remote_home()
+        self._sync_skills_and_credentials()

        if self.persistent:
            self._init_persistent_shell()
@@ -87,6 +89,79 @@ class SSHEnvironment(PersistentShellMixin, BaseEnvironment):
        except subprocess.TimeoutExpired:
            raise RuntimeError(f"SSH connection to {self.user}@{self.host} timed out")

+    def _detect_remote_home(self) -> str:
+        """Detect the remote user's home directory."""
+        try:
+            cmd = self._build_ssh_command()
+            cmd.append("echo $HOME")
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+            home = result.stdout.strip()
+            if home and result.returncode == 0:
+                logger.debug("SSH: remote home = %s", home)
+                return home
+        except Exception:
+            pass
+        # Fallback: guess from username
+        if self.user == "root":
+            return "/root"
+        return f"/home/{self.user}"
+
+    def _sync_skills_and_credentials(self) -> None:
+        """Rsync skills directory and credential files to the remote host."""
+        try:
+            container_base = f"{self._remote_home}/.hermes"
+            from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
+
+            rsync_base = ["rsync", "-az", "--timeout=30", "--safe-links"]
+            ssh_opts = f"ssh -o ControlPath={self.control_socket} -o ControlMaster=auto"
+            if self.port != 22:
+                ssh_opts += f" -p {self.port}"
+            if self.key_path:
+                ssh_opts += f" -i {self.key_path}"
+            rsync_base.extend(["-e", ssh_opts])
+            dest_prefix = f"{self.user}@{self.host}"
+
+            # Sync individual credential files (remap /root/.hermes to detected home)
+            for mount_entry in get_credential_file_mounts():
+                remote_path = mount_entry["container_path"].replace("/root/.hermes", container_base, 1)
+                parent_dir = str(Path(remote_path).parent)
+                mkdir_cmd = self._build_ssh_command()
+                mkdir_cmd.append(f"mkdir -p {parent_dir}")
+                subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
+                cmd = rsync_base + [mount_entry["host_path"], f"{dest_prefix}:{remote_path}"]
+                result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+                if result.returncode == 0:
+                    logger.info("SSH: synced credential %s -> %s", mount_entry["host_path"], remote_path)
+                else:
+                    logger.debug("SSH: rsync credential failed: %s", result.stderr.strip())
+
+            # Sync skills directory (remap to detected home)
+            skills_mount = get_skills_directory_mount(container_base=container_base)
+            if skills_mount:
+                remote_path = skills_mount["container_path"]
+                mkdir_cmd = self._build_ssh_command()
+                mkdir_cmd.append(f"mkdir -p {remote_path}")
+                subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
+                cmd = rsync_base + [
+                    skills_mount["host_path"].rstrip("/") + "/",
+                    f"{dest_prefix}:{remote_path}/",
+                ]
+                result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+                if result.returncode == 0:
+                    logger.info("SSH: synced skills dir %s -> %s", skills_mount["host_path"], remote_path)
+                else:
+                    logger.debug("SSH: rsync skills dir failed: %s", result.stderr.strip())
+        except Exception as e:
+            logger.debug("SSH: could not sync skills/credentials: %s", e)
+
+    def execute(self, command: str, cwd: str = "", *,
+                timeout: int | None = None,
+                stdin_data: str | None = None) -> dict:
+        # Incremental sync before each command so mid-session credential
+        # refreshes and skill updates are picked up.
+        self._sync_skills_and_credentials()
+        return super().execute(command, cwd, timeout=timeout, stdin_data=stdin_data)
+
    _poll_interval_start: float = 0.15  # SSH: higher initial interval (150ms) for network latency

    @property