feat: mount skills directory into all remote backends with live sync (#3890)

Skills with scripts/, templates/, and references/ subdirectories need
those files available inside sandboxed execution environments. Previously
the skills directory was missing entirely from remote backends.

Live sync — files stay current as credentials refresh and skills update:
- Docker/Singularity: bind mounts are inherently live (host changes
  visible immediately)
- Modal: _sync_files() runs before each command with mtime+size caching,
  pushing only changed credential and skill files (~13μs no-op overhead)
- SSH: rsync --safe-links before each command (naturally incremental)
- Daytona: _upload_if_changed() with mtime+size caching before each command

Security — symlink filtering:
- Docker/Singularity: sanitized temp copy when symlinks detected
- Modal/Daytona: iter_skills_files() skips symlinks
- SSH: rsync --safe-links skips symlinks pointing outside source tree
- Temp dir cleanup via atexit + reuse across calls

Non-root user support:
- SSH: detects remote home via echo $HOME, syncs to $HOME/.hermes/
- Daytona: detects sandbox home before sync, uploads to $HOME/.hermes/
- Docker/Modal/Singularity: run as root, /root/.hermes/ is correct

Also:
- credential_files.py: fix name/path key fallback in required_credential_files
- Singularity, SSH, Daytona: gained credential file support
- 14 tests covering symlink filtering, name/path fallback, iter_skills_files
This commit is contained in:
Teknium
2026-03-30 02:45:41 -07:00
committed by GitHub
parent 791f4e94b2
commit 5148682b43
8 changed files with 494 additions and 179 deletions

View File

@@ -83,7 +83,7 @@ def register_credential_files(
if isinstance(entry, str):
rel_path = entry.strip()
elif isinstance(entry, dict):
rel_path = (entry.get("path") or "").strip()
rel_path = (entry.get("path") or entry.get("name") or "").strip()
else:
continue
if not rel_path:
@@ -152,6 +152,107 @@ def get_credential_file_mounts() -> List[Dict[str, str]]:
]
def get_skills_directory_mount(
container_base: str = "/root/.hermes",
) -> Dict[str, str] | None:
"""Return mount info for a symlink-safe copy of the skills directory.
Skills may include ``scripts/``, ``templates/``, and ``references/``
subdirectories that the agent needs to execute inside remote sandboxes.
**Security:** Bind mounts follow symlinks, so a malicious symlink inside
the skills tree could expose arbitrary host files to the container. When
symlinks are detected, this function creates a sanitized copy (regular
files only) in a temp directory and returns that path instead. When no
symlinks are present (the common case), the original directory is returned
directly with zero overhead.
Returns a dict with ``host_path`` and ``container_path`` keys, or None.
"""
hermes_home = _resolve_hermes_home()
skills_dir = hermes_home / "skills"
if not skills_dir.is_dir():
return None
host_path = _safe_skills_path(skills_dir)
return {
"host_path": host_path,
"container_path": f"{container_base.rstrip('/')}/skills",
}
_safe_skills_tempdir: Path | None = None
def _safe_skills_path(skills_dir: Path) -> str:
"""Return *skills_dir* if symlink-free, else a sanitized temp copy."""
global _safe_skills_tempdir
symlinks = [p for p in skills_dir.rglob("*") if p.is_symlink()]
if not symlinks:
return str(skills_dir)
for link in symlinks:
logger.warning("credential_files: skipping symlink in skills dir: %s -> %s",
link, os.readlink(link))
import atexit
import shutil
import tempfile
# Reuse the same temp dir across calls to avoid accumulation.
if _safe_skills_tempdir and _safe_skills_tempdir.is_dir():
shutil.rmtree(_safe_skills_tempdir, ignore_errors=True)
safe_dir = Path(tempfile.mkdtemp(prefix="hermes-skills-safe-"))
_safe_skills_tempdir = safe_dir
for item in skills_dir.rglob("*"):
if item.is_symlink():
continue
rel = item.relative_to(skills_dir)
target = safe_dir / rel
if item.is_dir():
target.mkdir(parents=True, exist_ok=True)
elif item.is_file():
target.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(item), str(target))
def _cleanup():
if safe_dir.is_dir():
shutil.rmtree(safe_dir, ignore_errors=True)
atexit.register(_cleanup)
logger.info("credential_files: created symlink-safe skills copy at %s", safe_dir)
return str(safe_dir)
def iter_skills_files(
container_base: str = "/root/.hermes",
) -> List[Dict[str, str]]:
"""Yield individual (host_path, container_path) entries for skills files.
Skips symlinks entirely. Preferred for backends that upload files
individually (Daytona, Modal) rather than mounting a directory.
"""
hermes_home = _resolve_hermes_home()
skills_dir = hermes_home / "skills"
if not skills_dir.is_dir():
return []
container_root = f"{container_base.rstrip('/')}/skills"
result: List[Dict[str, str]] = []
for item in skills_dir.rglob("*"):
if item.is_symlink() or not item.is_file():
continue
rel = item.relative_to(skills_dir)
result.append({
"host_path": str(item),
"container_path": f"{container_root}/{rel}",
})
return result
def clear_credential_files() -> None:
"""Reset the skill-scoped registry (e.g. on session reset)."""
_registered_files.clear()

View File

@@ -113,15 +113,61 @@ class DaytonaEnvironment(BaseEnvironment):
logger.info("Daytona: created sandbox %s for task %s",
self._sandbox.id, task_id)
# Resolve cwd: detect actual home dir inside the sandbox
if self._requested_cwd in ("~", "/home/daytona"):
try:
home = self._sandbox.process.exec("echo $HOME").result.strip()
if home:
# Detect remote home dir first so mounts go to the right place.
self._remote_home = "/root"
try:
home = self._sandbox.process.exec("echo $HOME").result.strip()
if home:
self._remote_home = home
if self._requested_cwd in ("~", "/home/daytona"):
self.cwd = home
except Exception:
pass # leave cwd as-is; sandbox will use its own default
logger.info("Daytona: resolved cwd to %s", self.cwd)
except Exception:
pass
logger.info("Daytona: resolved home to %s, cwd to %s", self._remote_home, self.cwd)
# Track synced files to avoid redundant uploads.
# Key: remote_path, Value: (mtime, size)
self._synced_files: Dict[str, tuple] = {}
# Upload credential files and skills directory into the sandbox.
self._sync_skills_and_credentials()
def _upload_if_changed(self, host_path: str, remote_path: str) -> bool:
"""Upload a file if its mtime/size changed since last sync."""
hp = Path(host_path)
try:
stat = hp.stat()
file_key = (stat.st_mtime, stat.st_size)
except OSError:
return False
if self._synced_files.get(remote_path) == file_key:
return False
try:
parent = str(Path(remote_path).parent)
self._sandbox.process.exec(f"mkdir -p {parent}")
self._sandbox.fs.upload_file(host_path, remote_path)
self._synced_files[remote_path] = file_key
return True
except Exception as e:
logger.debug("Daytona: upload failed %s: %s", host_path, e)
return False
def _sync_skills_and_credentials(self) -> None:
"""Upload changed credential files and skill files into the sandbox."""
container_base = f"{self._remote_home}/.hermes"
try:
from tools.credential_files import get_credential_file_mounts, iter_skills_files
for mount_entry in get_credential_file_mounts():
remote_path = mount_entry["container_path"].replace("/root/.hermes", container_base, 1)
if self._upload_if_changed(mount_entry["host_path"], remote_path):
logger.debug("Daytona: synced credential %s", remote_path)
for entry in iter_skills_files(container_base=container_base):
if self._upload_if_changed(entry["host_path"], entry["container_path"]):
logger.debug("Daytona: synced skill %s", entry["container_path"])
except Exception as e:
logger.debug("Daytona: could not sync skills/credentials: %s", e)
def _ensure_sandbox_ready(self):
"""Restart sandbox if it was stopped (e.g., by a previous interrupt)."""
@@ -191,6 +237,9 @@ class DaytonaEnvironment(BaseEnvironment):
stdin_data: Optional[str] = None) -> dict:
with self._lock:
self._ensure_sandbox_ready()
# Incremental sync before each command so mid-session credential
# refreshes and skill updates are picked up.
self._sync_skills_and_credentials()
if stdin_data is not None:
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"

View File

@@ -315,7 +315,7 @@ class DockerEnvironment(BaseEnvironment):
# Mount credential files (OAuth tokens, etc.) declared by skills.
# Read-only so the container can authenticate but not modify host creds.
try:
from tools.credential_files import get_credential_file_mounts
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
for mount_entry in get_credential_file_mounts():
volume_args.extend([
@@ -327,6 +327,20 @@ class DockerEnvironment(BaseEnvironment):
mount_entry["host_path"],
mount_entry["container_path"],
)
# Mount the skills directory so skill scripts/templates are
# available inside the container at the same relative path.
skills_mount = get_skills_directory_mount()
if skills_mount:
volume_args.extend([
"-v",
f"{skills_mount['host_path']}:{skills_mount['container_path']}:ro",
])
logger.info(
"Docker: mounting skills dir %s -> %s",
skills_mount["host_path"],
skills_mount["container_path"],
)
except Exception as e:
logger.debug("Docker: could not load credential file mounts: %s", e)

View File

@@ -142,7 +142,7 @@ class ModalEnvironment(BaseEnvironment):
# external services but can't modify the host's credentials.
cred_mounts = []
try:
from tools.credential_files import get_credential_file_mounts
from tools.credential_files import get_credential_file_mounts, iter_skills_files
for mount_entry in get_credential_file_mounts():
cred_mounts.append(
@@ -156,6 +156,18 @@ class ModalEnvironment(BaseEnvironment):
mount_entry["host_path"],
mount_entry["container_path"],
)
# Mount individual skill files (symlinks filtered out).
skills_files = iter_skills_files()
for entry in skills_files:
cred_mounts.append(
_modal.Mount.from_local_file(
entry["host_path"],
remote_path=entry["container_path"],
)
)
if skills_files:
logger.info("Modal: mounting %d skill files", len(skills_files))
except Exception as e:
logger.debug("Modal: could not load credential file mounts: %s", e)
@@ -184,72 +196,69 @@ class ModalEnvironment(BaseEnvironment):
self._app, self._sandbox = self._worker.run_coroutine(
_create_sandbox(), timeout=300
)
# Track synced credential files to avoid redundant pushes.
# Track synced files to avoid redundant pushes.
# Key: container_path, Value: (mtime, size) of last synced version.
self._synced_creds: Dict[str, tuple] = {}
self._synced_files: Dict[str, tuple] = {}
logger.info("Modal: sandbox created (task=%s)", self._task_id)
def _sync_credential_files(self) -> None:
"""Push credential files into the running sandbox.
def _push_file_to_sandbox(self, host_path: str, container_path: str) -> bool:
"""Push a single file into the sandbox if changed. Returns True if synced."""
hp = Path(host_path)
try:
stat = hp.stat()
file_key = (stat.st_mtime, stat.st_size)
except OSError:
return False
Mounts are set at sandbox creation, but credentials may be created
later (e.g. OAuth setup mid-session). This writes the current file
content into the sandbox via exec(), so new/updated credentials are
available without recreating the sandbox.
if self._synced_files.get(container_path) == file_key:
return False
try:
content = hp.read_bytes()
except Exception:
return False
import base64
b64 = base64.b64encode(content).decode("ascii")
container_dir = str(Path(container_path).parent)
cmd = (
f"mkdir -p {shlex.quote(container_dir)} && "
f"echo {shlex.quote(b64)} | base64 -d > {shlex.quote(container_path)}"
)
async def _write():
proc = await self._sandbox.exec.aio("bash", "-c", cmd)
await proc.wait.aio()
self._worker.run_coroutine(_write(), timeout=15)
self._synced_files[container_path] = file_key
return True
def _sync_files(self) -> None:
"""Push credential files and skill files into the running sandbox.
Runs before each command. Uses mtime+size caching so only changed
files are pushed (~13μs overhead in the no-op case).
"""
try:
from tools.credential_files import get_credential_file_mounts
from tools.credential_files import get_credential_file_mounts, iter_skills_files
mounts = get_credential_file_mounts()
if not mounts:
return
for entry in get_credential_file_mounts():
if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
logger.debug("Modal: synced credential %s", entry["container_path"])
for entry in mounts:
host_path = entry["host_path"]
container_path = entry["container_path"]
hp = Path(host_path)
try:
stat = hp.stat()
file_key = (stat.st_mtime, stat.st_size)
except OSError:
continue
# Skip if already synced with same mtime+size
if self._synced_creds.get(container_path) == file_key:
continue
try:
content = hp.read_text(encoding="utf-8")
except Exception:
continue
# Write via base64 to avoid shell escaping issues with JSON
import base64
b64 = base64.b64encode(content.encode("utf-8")).decode("ascii")
container_dir = str(Path(container_path).parent)
cmd = (
f"mkdir -p {shlex.quote(container_dir)} && "
f"echo {shlex.quote(b64)} | base64 -d > {shlex.quote(container_path)}"
)
_cp = container_path # capture for closure
async def _write():
proc = await self._sandbox.exec.aio("bash", "-c", cmd)
await proc.wait.aio()
self._worker.run_coroutine(_write(), timeout=15)
self._synced_creds[container_path] = file_key
logger.debug("Modal: synced credential %s -> %s", host_path, container_path)
for entry in iter_skills_files():
if self._push_file_to_sandbox(entry["host_path"], entry["container_path"]):
logger.debug("Modal: synced skill file %s", entry["container_path"])
except Exception as e:
logger.debug("Modal: credential file sync failed: %s", e)
logger.debug("Modal: file sync failed: %s", e)
def execute(self, command: str, cwd: str = "", *,
timeout: int | None = None,
stdin_data: str | None = None) -> dict:
# Sync credential files before each command so mid-session
# OAuth setups are picked up without requiring a restart.
self._sync_credential_files()
self._sync_files()
if stdin_data is not None:
marker = f"HERMES_EOF_{uuid.uuid4().hex[:8]}"

View File

@@ -254,6 +254,28 @@ class SingularityEnvironment(BaseEnvironment):
else:
cmd.append("--writable-tmpfs")
# Mount credential files and skills directory (read-only).
try:
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
for mount_entry in get_credential_file_mounts():
cmd.extend(["--bind", f"{mount_entry['host_path']}:{mount_entry['container_path']}:ro"])
logger.info(
"Singularity: binding credential %s -> %s",
mount_entry["host_path"],
mount_entry["container_path"],
)
skills_mount = get_skills_directory_mount()
if skills_mount:
cmd.extend(["--bind", f"{skills_mount['host_path']}:{skills_mount['container_path']}:ro"])
logger.info(
"Singularity: binding skills dir %s -> %s",
skills_mount["host_path"],
skills_mount["container_path"],
)
except Exception as e:
logger.debug("Singularity: could not load credential/skills mounts: %s", e)
# Resource limits (cgroup-based, may require root or appropriate config)
if self._memory > 0:
cmd.extend(["--memory", f"{self._memory}M"])

View File

@@ -55,6 +55,8 @@ class SSHEnvironment(PersistentShellMixin, BaseEnvironment):
self.control_socket = self.control_dir / f"{user}@{host}:{port}.sock"
_ensure_ssh_available()
self._establish_connection()
self._remote_home = self._detect_remote_home()
self._sync_skills_and_credentials()
if self.persistent:
self._init_persistent_shell()
@@ -87,6 +89,79 @@ class SSHEnvironment(PersistentShellMixin, BaseEnvironment):
except subprocess.TimeoutExpired:
raise RuntimeError(f"SSH connection to {self.user}@{self.host} timed out")
def _detect_remote_home(self) -> str:
"""Detect the remote user's home directory."""
try:
cmd = self._build_ssh_command()
cmd.append("echo $HOME")
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
home = result.stdout.strip()
if home and result.returncode == 0:
logger.debug("SSH: remote home = %s", home)
return home
except Exception:
pass
# Fallback: guess from username
if self.user == "root":
return "/root"
return f"/home/{self.user}"
def _sync_skills_and_credentials(self) -> None:
"""Rsync skills directory and credential files to the remote host."""
try:
container_base = f"{self._remote_home}/.hermes"
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
rsync_base = ["rsync", "-az", "--timeout=30", "--safe-links"]
ssh_opts = f"ssh -o ControlPath={self.control_socket} -o ControlMaster=auto"
if self.port != 22:
ssh_opts += f" -p {self.port}"
if self.key_path:
ssh_opts += f" -i {self.key_path}"
rsync_base.extend(["-e", ssh_opts])
dest_prefix = f"{self.user}@{self.host}"
# Sync individual credential files (remap /root/.hermes to detected home)
for mount_entry in get_credential_file_mounts():
remote_path = mount_entry["container_path"].replace("/root/.hermes", container_base, 1)
parent_dir = str(Path(remote_path).parent)
mkdir_cmd = self._build_ssh_command()
mkdir_cmd.append(f"mkdir -p {parent_dir}")
subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
cmd = rsync_base + [mount_entry["host_path"], f"{dest_prefix}:{remote_path}"]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode == 0:
logger.info("SSH: synced credential %s -> %s", mount_entry["host_path"], remote_path)
else:
logger.debug("SSH: rsync credential failed: %s", result.stderr.strip())
# Sync skills directory (remap to detected home)
skills_mount = get_skills_directory_mount(container_base=container_base)
if skills_mount:
remote_path = skills_mount["container_path"]
mkdir_cmd = self._build_ssh_command()
mkdir_cmd.append(f"mkdir -p {remote_path}")
subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
cmd = rsync_base + [
skills_mount["host_path"].rstrip("/") + "/",
f"{dest_prefix}:{remote_path}/",
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if result.returncode == 0:
logger.info("SSH: synced skills dir %s -> %s", skills_mount["host_path"], remote_path)
else:
logger.debug("SSH: rsync skills dir failed: %s", result.stderr.strip())
except Exception as e:
logger.debug("SSH: could not sync skills/credentials: %s", e)
def execute(self, command: str, cwd: str = "", *,
timeout: int | None = None,
stdin_data: str | None = None) -> dict:
# Incremental sync before each command so mid-session credential
# refreshes and skill updates are picked up.
self._sync_skills_and_credentials()
return super().execute(command, cwd, timeout=timeout, stdin_data=stdin_data)
_poll_interval_start: float = 0.15 # SSH: higher initial interval (150ms) for network latency
@property