feat: mount skills directory into all remote backends with live sync (#3890)
Skills with scripts/, templates/, and references/ subdirectories need those files available inside sandboxed execution environments. Previously the skills directory was missing entirely from remote backends. Live sync — files stay current as credentials refresh and skills update: - Docker/Singularity: bind mounts are inherently live (host changes visible immediately) - Modal: _sync_files() runs before each command with mtime+size caching, pushing only changed credential and skill files (~13μs no-op overhead) - SSH: rsync --safe-links before each command (naturally incremental) - Daytona: _upload_if_changed() with mtime+size caching before each command Security — symlink filtering: - Docker/Singularity: sanitized temp copy when symlinks detected - Modal/Daytona: iter_skills_files() skips symlinks - SSH: rsync --safe-links skips symlinks pointing outside source tree - Temp dir cleanup via atexit + reuse across calls Non-root user support: - SSH: detects remote home via echo $HOME, syncs to $HOME/.hermes/ - Daytona: detects sandbox home before sync, uploads to $HOME/.hermes/ - Docker/Modal/Singularity: run as root, /root/.hermes/ is correct Also: - credential_files.py: fix name/path key fallback in required_credential_files - Singularity, SSH, Daytona: gained credential file support - 14 tests covering symlink filtering, name/path fallback, iter_skills_files
This commit is contained in:
@@ -83,7 +83,7 @@ def register_credential_files(
|
||||
if isinstance(entry, str):
|
||||
rel_path = entry.strip()
|
||||
elif isinstance(entry, dict):
|
||||
rel_path = (entry.get("path") or "").strip()
|
||||
rel_path = (entry.get("path") or entry.get("name") or "").strip()
|
||||
else:
|
||||
continue
|
||||
if not rel_path:
|
||||
@@ -152,6 +152,107 @@ def get_credential_file_mounts() -> List[Dict[str, str]]:
|
||||
]
|
||||
|
||||
|
||||
def get_skills_directory_mount(
|
||||
container_base: str = "/root/.hermes",
|
||||
) -> Dict[str, str] | None:
|
||||
"""Return mount info for a symlink-safe copy of the skills directory.
|
||||
|
||||
Skills may include ``scripts/``, ``templates/``, and ``references/``
|
||||
subdirectories that the agent needs to execute inside remote sandboxes.
|
||||
|
||||
**Security:** Bind mounts follow symlinks, so a malicious symlink inside
|
||||
the skills tree could expose arbitrary host files to the container. When
|
||||
symlinks are detected, this function creates a sanitized copy (regular
|
||||
files only) in a temp directory and returns that path instead. When no
|
||||
symlinks are present (the common case), the original directory is returned
|
||||
directly with zero overhead.
|
||||
|
||||
Returns a dict with ``host_path`` and ``container_path`` keys, or None.
|
||||
"""
|
||||
hermes_home = _resolve_hermes_home()
|
||||
skills_dir = hermes_home / "skills"
|
||||
if not skills_dir.is_dir():
|
||||
return None
|
||||
|
||||
host_path = _safe_skills_path(skills_dir)
|
||||
return {
|
||||
"host_path": host_path,
|
||||
"container_path": f"{container_base.rstrip('/')}/skills",
|
||||
}
|
||||
|
||||
|
||||
_safe_skills_tempdir: Path | None = None
|
||||
|
||||
|
||||
def _safe_skills_path(skills_dir: Path) -> str:
|
||||
"""Return *skills_dir* if symlink-free, else a sanitized temp copy."""
|
||||
global _safe_skills_tempdir
|
||||
|
||||
symlinks = [p for p in skills_dir.rglob("*") if p.is_symlink()]
|
||||
if not symlinks:
|
||||
return str(skills_dir)
|
||||
|
||||
for link in symlinks:
|
||||
logger.warning("credential_files: skipping symlink in skills dir: %s -> %s",
|
||||
link, os.readlink(link))
|
||||
|
||||
import atexit
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
# Reuse the same temp dir across calls to avoid accumulation.
|
||||
if _safe_skills_tempdir and _safe_skills_tempdir.is_dir():
|
||||
shutil.rmtree(_safe_skills_tempdir, ignore_errors=True)
|
||||
|
||||
safe_dir = Path(tempfile.mkdtemp(prefix="hermes-skills-safe-"))
|
||||
_safe_skills_tempdir = safe_dir
|
||||
|
||||
for item in skills_dir.rglob("*"):
|
||||
if item.is_symlink():
|
||||
continue
|
||||
rel = item.relative_to(skills_dir)
|
||||
target = safe_dir / rel
|
||||
if item.is_dir():
|
||||
target.mkdir(parents=True, exist_ok=True)
|
||||
elif item.is_file():
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(str(item), str(target))
|
||||
|
||||
def _cleanup():
|
||||
if safe_dir.is_dir():
|
||||
shutil.rmtree(safe_dir, ignore_errors=True)
|
||||
|
||||
atexit.register(_cleanup)
|
||||
logger.info("credential_files: created symlink-safe skills copy at %s", safe_dir)
|
||||
return str(safe_dir)
|
||||
|
||||
|
||||
def iter_skills_files(
|
||||
container_base: str = "/root/.hermes",
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Yield individual (host_path, container_path) entries for skills files.
|
||||
|
||||
Skips symlinks entirely. Preferred for backends that upload files
|
||||
individually (Daytona, Modal) rather than mounting a directory.
|
||||
"""
|
||||
hermes_home = _resolve_hermes_home()
|
||||
skills_dir = hermes_home / "skills"
|
||||
if not skills_dir.is_dir():
|
||||
return []
|
||||
|
||||
container_root = f"{container_base.rstrip('/')}/skills"
|
||||
result: List[Dict[str, str]] = []
|
||||
for item in skills_dir.rglob("*"):
|
||||
if item.is_symlink() or not item.is_file():
|
||||
continue
|
||||
rel = item.relative_to(skills_dir)
|
||||
result.append({
|
||||
"host_path": str(item),
|
||||
"container_path": f"{container_root}/{rel}",
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
def clear_credential_files() -> None:
|
||||
"""Reset the skill-scoped registry (e.g. on session reset)."""
|
||||
_registered_files.clear()
|
||||
|
||||
@@ -113,15 +113,61 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||
logger.info("Daytona: created sandbox %s for task %s",
|
||||
self._sandbox.id, task_id)
|
||||
|
||||
# Resolve cwd: detect actual home dir inside the sandbox
|
||||
if self._requested_cwd in ("~", "/home/daytona"):
|
||||
try:
|
||||
home = self._sandbox.process.exec("echo $HOME").result.strip()
|
||||
if home:
|
||||
# Detect remote home dir first so mounts go to the right place.
|
||||
self._remote_home = "/root"
|
||||
try:
|
||||
home = self._sandbox.process.exec("echo $HOME").result.strip()
|
||||
if home:
|
||||
self._remote_home = home
|
||||
if self._requested_cwd in ("~", "/home/daytona"):
|
||||
self.cwd = home
|
||||
except Exception:
|
||||
pass # leave cwd as-is; sandbox will use its own default
|
||||
logger.info("Daytona: resolved cwd to %s", self.cwd)
|
||||
except Exception:
|
||||
pass
|
||||
logger.info("Daytona: resolved home to %s, cwd to %s", self._remote_home, self.cwd)
|
||||
|
||||
# Track synced files to avoid redundant uploads.
|
||||
# Key: remote_path, Value: (mtime, size)
|
||||
self._synced_files: Dict[str, tuple] = {}
|
||||
|
||||
# Upload credential files and skills directory into the sandbox.
|
||||
self._sync_skills_and_credentials()
|
||||
|
||||
def _upload_if_changed(self, host_path: str, remote_path: str) -> bool:
|
||||
"""Upload a file if its mtime/size changed since last sync."""
|
||||
hp = Path(host_path)
|
||||
try:
|
||||
stat = hp.stat()
|
||||
file_key = (stat.st_mtime, stat.st_size)
|
||||
except OSError:
|
||||
return False
|
||||
if self._synced_files.get(remote_path) == file_key:
|
||||
return False
|
||||
try:
|
||||
parent = str(Path(remote_path).parent)
|
||||
self._sandbox.process.exec(f"mkdir -p {parent}")
|
||||
self._sandbox.fs.upload_file(host_path, remote_path)
|
||||
self._synced_files[remote_path] = file_key
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("Daytona: upload failed %s: %s", host_path, e)
|
||||
return False
|
||||
|
||||
def _sync_skills_and_credentials(self) -> None:
|
||||
"""Upload changed credential files and skill files into the sandbox."""
|
||||
container_base = f"{self._remote_home}/.hermes"
|
||||
try:
|
||||
from tools.credential_files import get_credential_file_mounts, iter_skills_files
|
||||
|
||||
for mount_entry in get_credential_file_mounts():
|
||||
remote_path = mount_entry["container_path"].replace("/root/.hermes", container_base, 1)
|
||||
if self._upload_if_changed(mount_entry["host_path"], remote_path):
|
||||
logger.debug("Daytona: synced credential %s", remote_path)
|
||||
|
||||
for entry in iter_skills_files(container_base=container_base):
|
||||
if self._upload_if_changed(entry["host_path"], entry["container_path"]):
|
||||
logger.debug("Daytona: synced skill %s", entry["container_path"])
|
||||
except Exception as e:
|
||||
logger.debug("Daytona: could not sync skills/credentials: %s", e)
|
||||
|
||||
def _ensure_sandbox_ready(self):
|
||||
"""Restart sandbox if it was stopped (e.g., by a previous interrupt)."""
|
||||
@@ -191,6 +237,9 @@ class DaytonaEnvironment(BaseEnvironment):
|
||||
stdin_data: Optional[str] = None) -> dict:
|
||||
with self._lock:
|
||||
self._ensure_sandbox_ready()
|
||||
# Incremental sync before each command so mid-session credential
|
||||
# refreshes and skill updates are picked up.
|
||||
self._sync_skills_and_credentials()
|
||||
|
||||
if stdin_data is not None:
|
||||
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
@@ -315,7 +315,7 @@ class DockerEnvironment(BaseEnvironment):
|
||||
# Mount credential files (OAuth tokens, etc.) declared by skills.
|
||||
# Read-only so the container can authenticate but not modify host creds.
|
||||
try:
|
||||
from tools.credential_files import get_credential_file_mounts
|
||||
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
|
||||
|
||||
for mount_entry in get_credential_file_mounts():
|
||||
volume_args.extend([
|
||||
@@ -327,6 +327,20 @@ class DockerEnvironment(BaseEnvironment):
|
||||
mount_entry["host_path"],
|
||||
mount_entry["container_path"],
|
||||
)
|
||||
|
||||
# Mount the skills directory so skill scripts/templates are
|
||||
# available inside the container at the same relative path.
|
||||
skills_mount = get_skills_directory_mount()
|
||||
if skills_mount:
|
||||
volume_args.extend([
|
||||
"-v",
|
||||
f"{skills_mount['host_path']}:{skills_mount['container_path']}:ro",
|
||||
])
|
||||
logger.info(
|
||||
"Docker: mounting skills dir %s -> %s",
|
||||
skills_mount["host_path"],
|
||||
skills_mount["container_path"],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Docker: could not load credential file mounts: %s", e)
|
||||
|
||||
|
||||
@@ -142,7 +142,7 @@ class ModalEnvironment(BaseEnvironment):
|
||||
# external services but can't modify the host's credentials.
|
||||
cred_mounts = []
|
||||
try:
|
||||
from tools.credential_files import get_credential_file_mounts
|
||||
from tools.credential_files import get_credential_file_mounts, iter_skills_files
|
||||
|
||||
for mount_entry in get_credential_file_mounts():
|
||||
cred_mounts.append(
|
||||
@@ -156,6 +156,18 @@ class ModalEnvironment(BaseEnvironment):
|
||||
mount_entry["host_path"],
|
||||
mount_entry["container_path"],
|
||||
)
|
||||
|
||||
# Mount individual skill files (symlinks filtered out).
|
||||
skills_files = iter_skills_files()
|
||||
for entry in skills_files:
|
||||
cred_mounts.append(
|
||||
_modal.Mount.from_local_file(
|
||||
entry["host_path"],
|
||||
remote_path=entry["container_path"],
|
||||
)
|
||||
)
|
||||
if skills_files:
|
||||
logger.info("Modal: mounting %d skill files", len(skills_files))
|
||||
except Exception as e:
|
||||
logger.debug("Modal: could not load credential file mounts: %s", e)
|
||||
|
||||
@@ -184,72 +196,69 @@ class ModalEnvironment(BaseEnvironment):
|
||||
self._app, self._sandbox = self._worker.run_coroutine(
|
||||
_create_sandbox(), timeout=300
|
||||
)
|
||||
# Track synced credential files to avoid redundant pushes.
|
||||
# Track synced files to avoid redundant pushes.
|
||||
# Key: container_path, Value: (mtime, size) of last synced version.
|
||||
self._synced_creds: Dict[str, tuple] = {}
|
||||
self._synced_files: Dict[str, tuple] = {}
|
||||
logger.info("Modal: sandbox created (task=%s)", self._task_id)
|
||||
|
||||
def _sync_credential_files(self) -> None:
|
||||
"""Push credential files into the running sandbox.
|
||||
def _push_file_to_sandbox(self, host_path: str, container_path: str) -> bool:
|
||||
"""Push a single file into the sandbox if changed. Returns True if synced."""
|
||||
hp = Path(host_path)
|
||||
try:
|
||||
stat = hp.stat()
|
||||
file_key = (stat.st_mtime, stat.st_size)
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
Mounts are set at sandbox creation, but credentials may be created
|
||||
later (e.g. OAuth setup mid-session). This writes the current file
|
||||
content into the sandbox via exec(), so new/updated credentials are
|
||||
available without recreating the sandbox.
|
||||
if self._synced_files.get(container_path) == file_key:
|
||||
return False
|
||||
|
||||
try:
|
||||
content = hp.read_bytes()
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
import base64
|
||||
b64 = base64.b64encode(content).decode("ascii")
|
||||
container_dir = str(Path(container_path).parent)
|
||||
cmd = (
|
||||
f"mkdir -p {shlex.quote(container_dir)} && "
|
||||
f"echo {shlex.quote(b64)} | base64 -d > {shlex.quote(container_path)}"
|
||||
)
|
||||
|
||||
async def _write():
|
||||
proc = await self._sandbox.exec.aio("bash", "-c", cmd)
|
||||
await proc.wait.aio()
|
||||
|
||||
self._worker.run_coroutine(_write(), timeout=15)
|
||||
self._synced_files[container_path] = file_key
|
||||
return True
|
||||
|
||||
def _sync_files(self) -> None:
|
||||
"""Push credential files and skill files into the running sandbox.
|
||||
|
||||
Runs before each command. Uses mtime+size caching so only changed
|
||||
files are pushed (~13μs overhead in the no-op case).
|
||||
"""
|
||||
try:
|
||||
from tools.credential_files import get_credential_file_mounts
|
||||
from tools.credential_files import get_credential_file_mounts, iter_skills_files
|
||||
|
||||
mounts = get_credential_file_mounts()
|
||||
if not mounts:
|
||||
return
|
||||
for entry in get_credential_file_mounts():
|
||||
if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
|
||||
logger.debug("Modal: synced credential %s", entry["container_path"])
|
||||
|
||||
for entry in mounts:
|
||||
host_path = entry["host_path"]
|
||||
container_path = entry["container_path"]
|
||||
hp = Path(host_path)
|
||||
try:
|
||||
stat = hp.stat()
|
||||
file_key = (stat.st_mtime, stat.st_size)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
# Skip if already synced with same mtime+size
|
||||
if self._synced_creds.get(container_path) == file_key:
|
||||
continue
|
||||
|
||||
try:
|
||||
content = hp.read_text(encoding="utf-8")
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Write via base64 to avoid shell escaping issues with JSON
|
||||
import base64
|
||||
b64 = base64.b64encode(content.encode("utf-8")).decode("ascii")
|
||||
container_dir = str(Path(container_path).parent)
|
||||
cmd = (
|
||||
f"mkdir -p {shlex.quote(container_dir)} && "
|
||||
f"echo {shlex.quote(b64)} | base64 -d > {shlex.quote(container_path)}"
|
||||
)
|
||||
|
||||
_cp = container_path # capture for closure
|
||||
|
||||
async def _write():
|
||||
proc = await self._sandbox.exec.aio("bash", "-c", cmd)
|
||||
await proc.wait.aio()
|
||||
|
||||
self._worker.run_coroutine(_write(), timeout=15)
|
||||
self._synced_creds[container_path] = file_key
|
||||
logger.debug("Modal: synced credential %s -> %s", host_path, container_path)
|
||||
for entry in iter_skills_files():
|
||||
if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
|
||||
logger.debug("Modal: synced skill file %s", entry["container_path"])
|
||||
except Exception as e:
|
||||
logger.debug("Modal: credential file sync failed: %s", e)
|
||||
logger.debug("Modal: file sync failed: %s", e)
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *,
|
||||
timeout: int | None = None,
|
||||
stdin_data: str | None = None) -> dict:
|
||||
# Sync credential files before each command so mid-session
|
||||
# OAuth setups are picked up without requiring a restart.
|
||||
self._sync_credential_files()
|
||||
self._sync_files()
|
||||
|
||||
if stdin_data is not None:
|
||||
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
@@ -254,6 +254,28 @@ class SingularityEnvironment(BaseEnvironment):
|
||||
else:
|
||||
cmd.append("--writable-tmpfs")
|
||||
|
||||
# Mount credential files and skills directory (read-only).
|
||||
try:
|
||||
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
|
||||
|
||||
for mount_entry in get_credential_file_mounts():
|
||||
cmd.extend(["--bind", f"{mount_entry['host_path']}:{mount_entry['container_path']}:ro"])
|
||||
logger.info(
|
||||
"Singularity: binding credential %s -> %s",
|
||||
mount_entry["host_path"],
|
||||
mount_entry["container_path"],
|
||||
)
|
||||
skills_mount = get_skills_directory_mount()
|
||||
if skills_mount:
|
||||
cmd.extend(["--bind", f"{skills_mount['host_path']}:{skills_mount['container_path']}:ro"])
|
||||
logger.info(
|
||||
"Singularity: binding skills dir %s -> %s",
|
||||
skills_mount["host_path"],
|
||||
skills_mount["container_path"],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Singularity: could not load credential/skills mounts: %s", e)
|
||||
|
||||
# Resource limits (cgroup-based, may require root or appropriate config)
|
||||
if self._memory > 0:
|
||||
cmd.extend(["--memory", f"{self._memory}M"])
|
||||
|
||||
@@ -55,6 +55,8 @@ class SSHEnvironment(PersistentShellMixin, BaseEnvironment):
|
||||
self.control_socket = self.control_dir / f"{user}@{host}:{port}.sock"
|
||||
_ensure_ssh_available()
|
||||
self._establish_connection()
|
||||
self._remote_home = self._detect_remote_home()
|
||||
self._sync_skills_and_credentials()
|
||||
|
||||
if self.persistent:
|
||||
self._init_persistent_shell()
|
||||
@@ -87,6 +89,79 @@ class SSHEnvironment(PersistentShellMixin, BaseEnvironment):
|
||||
except subprocess.TimeoutExpired:
|
||||
raise RuntimeError(f"SSH connection to {self.user}@{self.host} timed out")
|
||||
|
||||
def _detect_remote_home(self) -> str:
|
||||
"""Detect the remote user's home directory."""
|
||||
try:
|
||||
cmd = self._build_ssh_command()
|
||||
cmd.append("echo $HOME")
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
||||
home = result.stdout.strip()
|
||||
if home and result.returncode == 0:
|
||||
logger.debug("SSH: remote home = %s", home)
|
||||
return home
|
||||
except Exception:
|
||||
pass
|
||||
# Fallback: guess from username
|
||||
if self.user == "root":
|
||||
return "/root"
|
||||
return f"/home/{self.user}"
|
||||
|
||||
def _sync_skills_and_credentials(self) -> None:
|
||||
"""Rsync skills directory and credential files to the remote host."""
|
||||
try:
|
||||
container_base = f"{self._remote_home}/.hermes"
|
||||
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
|
||||
|
||||
rsync_base = ["rsync", "-az", "--timeout=30", "--safe-links"]
|
||||
ssh_opts = f"ssh -o ControlPath={self.control_socket} -o ControlMaster=auto"
|
||||
if self.port != 22:
|
||||
ssh_opts += f" -p {self.port}"
|
||||
if self.key_path:
|
||||
ssh_opts += f" -i {self.key_path}"
|
||||
rsync_base.extend(["-e", ssh_opts])
|
||||
dest_prefix = f"{self.user}@{self.host}"
|
||||
|
||||
# Sync individual credential files (remap /root/.hermes to detected home)
|
||||
for mount_entry in get_credential_file_mounts():
|
||||
remote_path = mount_entry["container_path"].replace("/root/.hermes", container_base, 1)
|
||||
parent_dir = str(Path(remote_path).parent)
|
||||
mkdir_cmd = self._build_ssh_command()
|
||||
mkdir_cmd.append(f"mkdir -p {parent_dir}")
|
||||
subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
|
||||
cmd = rsync_base + [mount_entry["host_path"], f"{dest_prefix}:{remote_path}"]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
if result.returncode == 0:
|
||||
logger.info("SSH: synced credential %s -> %s", mount_entry["host_path"], remote_path)
|
||||
else:
|
||||
logger.debug("SSH: rsync credential failed: %s", result.stderr.strip())
|
||||
|
||||
# Sync skills directory (remap to detected home)
|
||||
skills_mount = get_skills_directory_mount(container_base=container_base)
|
||||
if skills_mount:
|
||||
remote_path = skills_mount["container_path"]
|
||||
mkdir_cmd = self._build_ssh_command()
|
||||
mkdir_cmd.append(f"mkdir -p {remote_path}")
|
||||
subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
|
||||
cmd = rsync_base + [
|
||||
skills_mount["host_path"].rstrip("/") + "/",
|
||||
f"{dest_prefix}:{remote_path}/",
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
||||
if result.returncode == 0:
|
||||
logger.info("SSH: synced skills dir %s -> %s", skills_mount["host_path"], remote_path)
|
||||
else:
|
||||
logger.debug("SSH: rsync skills dir failed: %s", result.stderr.strip())
|
||||
except Exception as e:
|
||||
logger.debug("SSH: could not sync skills/credentials: %s", e)
|
||||
|
||||
def execute(self, command: str, cwd: str = "", *,
|
||||
timeout: int | None = None,
|
||||
stdin_data: str | None = None) -> dict:
|
||||
# Incremental sync before each command so mid-session credential
|
||||
# refreshes and skill updates are picked up.
|
||||
self._sync_skills_and_credentials()
|
||||
return super().execute(command, cwd, timeout=timeout, stdin_data=stdin_data)
|
||||
|
||||
_poll_interval_start: float = 0.15 # SSH: higher initial interval (150ms) for network latency
|
||||
|
||||
@property
|
||||
|
||||
Reference in New Issue
Block a user