From 36dd7a3e8db3ec186102a7c5f102b4b0342a1073 Mon Sep 17 00:00:00 2001 From: ygd58 Date: Mon, 16 Mar 2026 00:18:30 -0700 Subject: [PATCH 01/29] fix(setup): defer config.yaml write until after model selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _update_config_for_provider() was called immediately after provider selection for zai, kimi-coding, minimax, minimax-cn, and anthropic — before model selection happened. Since the gateway re-reads config.yaml per-message, this created a race where the gateway would pick up the new provider but still use the old (incompatible) model name. Capture selected_base_url in each provider block, then call _update_config_for_provider() once, after model selection completes, right before save_config(). The in-memory _set_model_provider() calls stay in place so the config object remains consistent during setup. Closes #1182 --- hermes_cli/setup.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 1f57d86d0..e751811a1 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -743,6 +743,7 @@ def setup_model_provider(config: dict): selected_provider = ( None # "nous", "openai-codex", "openrouter", "custom", or None (keep) ) + selected_base_url = None # deferred until after model selection nous_models = [] # populated if Nous login succeeds if provider_idx == 0: # Nous Portal (OAuth) @@ -1025,8 +1026,8 @@ def setup_model_provider(config: dict): if existing_custom: save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - _update_config_for_provider("zai", zai_base_url, default_model="glm-5") _set_model_provider(config, "zai", zai_base_url) + selected_base_url = zai_base_url elif provider_idx == 5: # Kimi / Moonshot selected_provider = "kimi-coding" @@ -1058,8 +1059,8 @@ def setup_model_provider(config: dict): if existing_custom: save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - _update_config_for_provider("kimi-coding", pconfig.inference_base_url, default_model="kimi-k2.5") _set_model_provider(config, "kimi-coding", pconfig.inference_base_url) + selected_base_url = pconfig.inference_base_url elif provider_idx == 6: # MiniMax selected_provider = "minimax" @@ -1091,8 +1092,8 @@ def setup_model_provider(config: dict): if existing_custom: save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - _update_config_for_provider("minimax", pconfig.inference_base_url, default_model="MiniMax-M2.5") _set_model_provider(config, "minimax", pconfig.inference_base_url) + selected_base_url = pconfig.inference_base_url elif provider_idx == 7: # MiniMax China selected_provider = "minimax-cn" @@ -1124,8 +1125,8 @@ def setup_model_provider(config: dict): if existing_custom: save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - _update_config_for_provider("minimax-cn", pconfig.inference_base_url, default_model="MiniMax-M2.5") _set_model_provider(config, "minimax-cn", pconfig.inference_base_url) + selected_base_url = pconfig.inference_base_url elif provider_idx == 8: # Anthropic selected_provider = "anthropic" @@ -1228,8 +1229,8 @@ def setup_model_provider(config: dict): save_env_value("OPENAI_API_KEY", "") # Don't save base_url for Anthropic — resolve_runtime_provider() # always hardcodes it. Stale base_urls contaminate other providers. - _update_config_for_provider("anthropic", "", default_model="claude-opus-4-6") _set_model_provider(config, "anthropic") + selected_base_url = "" # else: provider_idx == 9 (Keep current) — only shown when a provider already exists # Normalize "keep current" to an explicit provider so downstream logic @@ -1459,6 +1460,12 @@ def setup_model_provider(config: dict): ) print_success(f"Model set to: {_display}") + # Write provider+base_url to config.yaml only after model selection is complete. + # This prevents a race condition where the gateway picks up a new provider + # before the model name has been updated to match. + if selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn", "anthropic") and selected_base_url is not None: + _update_config_for_provider(selected_provider, selected_base_url) + save_config(config) From e7d3f1f3bab68794fcb7b05039970429d40b4bc1 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 04:35:34 -0700 Subject: [PATCH 02/29] fix(update): kill gateway via PID file before restart cmd_update only ran 'systemctl --user restart hermes-gateway', which left manually-started gateway processes alive, causing duplicates. Now uses get_running_pid() from gateway/status.py (scoped to HERMES_HOME) to find and SIGTERM this installation's gateway before restarting. Safe with multiple Hermes installations since each HERMES_HOME has its own PID file. If no systemd service exists, informs the user to restart manually. Based on PR #1131 by teknium1. Dropped the cli.py Rich from_ansi changes (already on main). --- hermes_cli/main.py | 68 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index b835efb0f..0a16c32d6 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2301,26 +2301,58 @@ def cmd_update(args): print() print("✓ Update complete!") - # Auto-restart gateway if it's running as a systemd service + # Auto-restart gateway if it's running. + # Uses the PID file (scoped to HERMES_HOME) to find this + # installation's gateway — safe with multiple installations. try: - check = subprocess.run( - ["systemctl", "--user", "is-active", "hermes-gateway"], - capture_output=True, text=True, timeout=5, - ) - if check.stdout.strip() == "active": - print() - print("→ Gateway service is running — restarting to pick up changes...") - restart = subprocess.run( - ["systemctl", "--user", "restart", "hermes-gateway"], - capture_output=True, text=True, timeout=15, + from gateway.status import get_running_pid, remove_pid_file + import signal as _signal + + existing_pid = get_running_pid() + has_systemd_service = False + + try: + check = subprocess.run( + ["systemctl", "--user", "is-active", "hermes-gateway"], + capture_output=True, text=True, timeout=5, ) - if restart.returncode == 0: - print("✓ Gateway restarted.") - else: - print(f"⚠ Gateway restart failed: {restart.stderr.strip()}") - print(" Try manually: hermes gateway restart") - except (FileNotFoundError, subprocess.TimeoutExpired): - pass # No systemd (macOS, WSL1, etc.) — skip silently + has_systemd_service = check.stdout.strip() == "active" + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + if existing_pid or has_systemd_service: + print() + + # Kill the PID-file-tracked process (may be manual or systemd) + if existing_pid: + try: + os.kill(existing_pid, _signal.SIGTERM) + print(f"→ Stopped gateway process (PID {existing_pid})") + except ProcessLookupError: + pass # Already gone + except PermissionError: + print(f"⚠ Permission denied killing gateway PID {existing_pid}") + remove_pid_file() + + # Restart the systemd service (starts a fresh process) + if has_systemd_service: + import time as _time + _time.sleep(1) # Brief pause for port/socket release + print("→ Restarting gateway service...") + restart = subprocess.run( + ["systemctl", "--user", "restart", "hermes-gateway"], + capture_output=True, text=True, timeout=15, + ) + if restart.returncode == 0: + print("✓ Gateway restarted.") + else: + print(f"⚠ Gateway restart failed: {restart.stderr.strip()}") + print(" Try manually: hermes gateway restart") + elif existing_pid: + print(" ℹ️ Gateway was running manually (not as a service).") + print(" Restart it with: hermes gateway run") + except Exception as e: + logger.debug("Gateway restart during update failed: %s", e) print() print("Tip: You can now select a provider and model:") From 30da22e1c117c0ddafdb13096b12ff7202e725f2 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 04:42:46 -0700 Subject: [PATCH 03/29] feat(gateway): scope systemd service name to HERMES_HOME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple Hermes installations on the same machine now get unique systemd service names: - Default ~/.hermes → hermes-gateway (backward compatible) - Custom HERMES_HOME → hermes-gateway-<8-char-hash> Changes: - Add get_service_name() in hermes_cli/gateway.py that derives a deterministic service name from HERMES_HOME via SHA256 - Replace all hardcoded 'hermes-gateway' systemd references with get_service_name() across gateway.py, main.py, status.py, uninstall.py - Add HERMES_HOME env var to both user and system systemd unit templates so the gateway process uses the correct installation - Update tests to use get_service_name() in assertions --- hermes_cli/gateway.py | 57 +++++++++++++++++------- hermes_cli/main.py | 6 ++- hermes_cli/status.py | 7 ++- hermes_cli/uninstall.py | 12 +++-- tests/hermes_cli/test_gateway.py | 6 +-- tests/hermes_cli/test_gateway_linger.py | 2 +- tests/hermes_cli/test_gateway_service.py | 8 ++-- 7 files changed, 68 insertions(+), 30 deletions(-) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index df9694843..10bf2c7ce 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -119,14 +119,35 @@ def is_windows() -> bool: # Service Configuration # ============================================================================= -SERVICE_NAME = "hermes-gateway" +_SERVICE_BASE = "hermes-gateway" SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration" +def get_service_name() -> str: + """Derive a systemd service name scoped to this HERMES_HOME. + + Default ``~/.hermes`` returns ``hermes-gateway`` (backward compatible). + Any other HERMES_HOME appends a short hash so multiple installations + can each have their own systemd service without conflicting. + """ + import hashlib + from pathlib import Path as _Path # local import to avoid monkeypatch interference + home = _Path(os.getenv("HERMES_HOME", _Path.home() / ".hermes")).resolve() + default = (_Path.home() / ".hermes").resolve() + if home == default: + return _SERVICE_BASE + suffix = hashlib.sha256(str(home).encode()).hexdigest()[:8] + return f"{_SERVICE_BASE}-{suffix}" + + +SERVICE_NAME = _SERVICE_BASE # backward-compat for external importers; prefer get_service_name() + + def get_systemd_unit_path(system: bool = False) -> Path: + name = get_service_name() if system: - return Path("/etc/systemd/system") / f"{SERVICE_NAME}.service" - return Path.home() / ".config" / "systemd" / "user" / f"{SERVICE_NAME}.service" + return Path("/etc/systemd/system") / f"{name}.service" + return Path.home() / ".config" / "systemd" / "user" / f"{name}.service" def _systemctl_cmd(system: bool = False) -> list[str]: @@ -362,6 +383,8 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) sane_path = f"{venv_bin}:{node_bin}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" hermes_cli = shutil.which("hermes") or f"{python_path} -m hermes_cli.main" + hermes_home = str(Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")).resolve()) + if system: username, group_name, home_dir = _system_service_identity(run_as_user) return f"""[Unit] @@ -380,6 +403,7 @@ Environment="USER={username}" Environment="LOGNAME={username}" Environment="PATH={sane_path}" Environment="VIRTUAL_ENV={venv_dir}" +Environment="HERMES_HOME={hermes_home}" Restart=on-failure RestartSec=10 KillMode=mixed @@ -403,6 +427,7 @@ ExecStop={hermes_cli} gateway stop WorkingDirectory={working_dir} Environment="PATH={sane_path}" Environment="VIRTUAL_ENV={venv_dir}" +Environment="HERMES_HOME={hermes_home}" Restart=on-failure RestartSec=10 KillMode=mixed @@ -455,7 +480,7 @@ def _print_linger_enable_warning(username: str, detail: str | None = None) -> No print(f" sudo loginctl enable-linger {username}") print() print(" Then restart the gateway:") - print(f" systemctl --user restart {SERVICE_NAME}.service") + print(f" systemctl --user restart {get_service_name()}.service") print() @@ -526,7 +551,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str unit_path.write_text(generate_systemd_unit(system=system, run_as_user=run_as_user), encoding="utf-8") subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True) - subprocess.run(_systemctl_cmd(system) + ["enable", SERVICE_NAME], check=True) + subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True) print() print(f"✓ {_service_scope_label(system).capitalize()} service installed and enabled!") @@ -534,7 +559,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str print("Next steps:") print(f" {'sudo ' if system else ''}hermes gateway start{scope_flag} # Start the service") print(f" {'sudo ' if system else ''}hermes gateway status{scope_flag} # Check status") - print(f" {'journalctl' if system else 'journalctl --user'} -u {SERVICE_NAME} -f # View logs") + print(f" {'journalctl' if system else 'journalctl --user'} -u {get_service_name()} -f # View logs") print() if system: @@ -552,8 +577,8 @@ def systemd_uninstall(system: bool = False): if system: _require_root_for_system_service("uninstall") - subprocess.run(_systemctl_cmd(system) + ["stop", SERVICE_NAME], check=False) - subprocess.run(_systemctl_cmd(system) + ["disable", SERVICE_NAME], check=False) + subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=False) + subprocess.run(_systemctl_cmd(system) + ["disable", get_service_name()], check=False) unit_path = get_systemd_unit_path(system=system) if unit_path.exists(): @@ -569,7 +594,7 @@ def systemd_start(system: bool = False): if system: _require_root_for_system_service("start") refresh_systemd_unit_if_needed(system=system) - subprocess.run(_systemctl_cmd(system) + ["start", SERVICE_NAME], check=True) + subprocess.run(_systemctl_cmd(system) + ["start", get_service_name()], check=True) print(f"✓ {_service_scope_label(system).capitalize()} service started") @@ -578,7 +603,7 @@ def systemd_stop(system: bool = False): system = _select_systemd_scope(system) if system: _require_root_for_system_service("stop") - subprocess.run(_systemctl_cmd(system) + ["stop", SERVICE_NAME], check=True) + subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=True) print(f"✓ {_service_scope_label(system).capitalize()} service stopped") @@ -588,7 +613,7 @@ def systemd_restart(system: bool = False): if system: _require_root_for_system_service("restart") refresh_systemd_unit_if_needed(system=system) - subprocess.run(_systemctl_cmd(system) + ["restart", SERVICE_NAME], check=True) + subprocess.run(_systemctl_cmd(system) + ["restart", get_service_name()], check=True) print(f"✓ {_service_scope_label(system).capitalize()} service restarted") @@ -613,12 +638,12 @@ def systemd_status(deep: bool = False, system: bool = False): print() subprocess.run( - _systemctl_cmd(system) + ["status", SERVICE_NAME, "--no-pager"], + _systemctl_cmd(system) + ["status", get_service_name(), "--no-pager"], capture_output=False, ) result = subprocess.run( - _systemctl_cmd(system) + ["is-active", SERVICE_NAME], + _systemctl_cmd(system) + ["is-active", get_service_name()], capture_output=True, text=True, ) @@ -657,7 +682,7 @@ def systemd_status(deep: bool = False, system: bool = False): if deep: print() print("Recent logs:") - subprocess.run(_journalctl_cmd(system) + ["-u", SERVICE_NAME, "-n", "20", "--no-pager"]) + subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]) # ============================================================================= @@ -1118,7 +1143,7 @@ def _is_service_running() -> bool: if user_unit_exists: result = subprocess.run( - _systemctl_cmd(False) + ["is-active", SERVICE_NAME], + _systemctl_cmd(False) + ["is-active", get_service_name()], capture_output=True, text=True ) if result.stdout.strip() == "active": @@ -1126,7 +1151,7 @@ def _is_service_running() -> bool: if system_unit_exists: result = subprocess.run( - _systemctl_cmd(True) + ["is-active", SERVICE_NAME], + _systemctl_cmd(True) + ["is-active", get_service_name()], capture_output=True, text=True ) if result.stdout.strip() == "active": diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 0a16c32d6..2cb220df6 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2306,14 +2306,16 @@ def cmd_update(args): # installation's gateway — safe with multiple installations. try: from gateway.status import get_running_pid, remove_pid_file + from hermes_cli.gateway import get_service_name import signal as _signal + _gw_service_name = get_service_name() existing_pid = get_running_pid() has_systemd_service = False try: check = subprocess.run( - ["systemctl", "--user", "is-active", "hermes-gateway"], + ["systemctl", "--user", "is-active", _gw_service_name], capture_output=True, text=True, timeout=5, ) has_systemd_service = check.stdout.strip() == "active" @@ -2340,7 +2342,7 @@ def cmd_update(args): _time.sleep(1) # Brief pause for port/socket release print("→ Restarting gateway service...") restart = subprocess.run( - ["systemctl", "--user", "restart", "hermes-gateway"], + ["systemctl", "--user", "restart", _gw_service_name], capture_output=True, text=True, timeout=15, ) if restart.returncode == 0: diff --git a/hermes_cli/status.py b/hermes_cli/status.py index db7ce0641..be490e930 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -275,8 +275,13 @@ def show_status(args): print(color("◆ Gateway Service", Colors.CYAN, Colors.BOLD)) if sys.platform.startswith('linux'): + try: + from hermes_cli.gateway import get_service_name + _gw_svc = get_service_name() + except Exception: + _gw_svc = "hermes-gateway" result = subprocess.run( - ["systemctl", "--user", "is-active", "hermes-gateway"], + ["systemctl", "--user", "is-active", _gw_svc], capture_output=True, text=True ) diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py index d70405ce3..40ff75f16 100644 --- a/hermes_cli/uninstall.py +++ b/hermes_cli/uninstall.py @@ -133,7 +133,13 @@ def uninstall_gateway_service(): if platform.system() != "Linux": return False - service_file = Path.home() / ".config" / "systemd" / "user" / "hermes-gateway.service" + try: + from hermes_cli.gateway import get_service_name + svc_name = get_service_name() + except Exception: + svc_name = "hermes-gateway" + + service_file = Path.home() / ".config" / "systemd" / "user" / f"{svc_name}.service" if not service_file.exists(): return False @@ -141,14 +147,14 @@ def uninstall_gateway_service(): try: # Stop the service subprocess.run( - ["systemctl", "--user", "stop", "hermes-gateway"], + ["systemctl", "--user", "stop", svc_name], capture_output=True, check=False ) # Disable the service subprocess.run( - ["systemctl", "--user", "disable", "hermes-gateway"], + ["systemctl", "--user", "disable", svc_name], capture_output=True, check=False ) diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py index 29da657e2..52d43fd08 100644 --- a/tests/hermes_cli/test_gateway.py +++ b/tests/hermes_cli/test_gateway.py @@ -39,7 +39,7 @@ def test_systemd_status_warns_when_linger_disabled(monkeypatch, tmp_path, capsys monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (False, "")) def fake_run(cmd, capture_output=False, text=False, check=False): - if cmd[:4] == ["systemctl", "--user", "status", gateway.SERVICE_NAME]: + if cmd[:4] == ["systemctl", "--user", "status", gateway.get_service_name()]: return SimpleNamespace(returncode=0, stdout="", stderr="") if cmd[:3] == ["systemctl", "--user", "is-active"]: return SimpleNamespace(returncode=0, stdout="active\n", stderr="") @@ -76,7 +76,7 @@ def test_systemd_install_checks_linger_status(monkeypatch, tmp_path, capsys): assert unit_path.exists() assert [cmd for cmd, _ in calls] == [ ["systemctl", "--user", "daemon-reload"], - ["systemctl", "--user", "enable", gateway.SERVICE_NAME], + ["systemctl", "--user", "enable", gateway.get_service_name()], ] assert helper_calls == [True] assert "User service installed and enabled" in out @@ -110,7 +110,7 @@ def test_systemd_install_system_scope_skips_linger_and_uses_systemctl(monkeypatc assert unit_path.read_text(encoding="utf-8") == "scope=True user=alice\n" assert [cmd for cmd, _ in calls] == [ ["systemctl", "daemon-reload"], - ["systemctl", "enable", gateway.SERVICE_NAME], + ["systemctl", "enable", gateway.get_service_name()], ] assert helper_calls == [] assert "Configured to run as: alice" not in out # generated test unit has no User= line diff --git a/tests/hermes_cli/test_gateway_linger.py b/tests/hermes_cli/test_gateway_linger.py index cdc07f95f..b21e3f762 100644 --- a/tests/hermes_cli/test_gateway_linger.py +++ b/tests/hermes_cli/test_gateway_linger.py @@ -114,7 +114,7 @@ def test_systemd_install_calls_linger_helper(monkeypatch, tmp_path, capsys): assert unit_path.exists() assert [cmd for cmd, _ in calls] == [ ["systemctl", "--user", "daemon-reload"], - ["systemctl", "--user", "enable", gateway.SERVICE_NAME], + ["systemctl", "--user", "enable", gateway.get_service_name()], ] assert helper_calls == [True] assert "User service installed and enabled" in out diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index ce41a57a1..708d0ee8d 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -26,7 +26,7 @@ class TestSystemdServiceRefresh: assert unit_path.read_text(encoding="utf-8") == "new unit\n" assert calls[:2] == [ ["systemctl", "--user", "daemon-reload"], - ["systemctl", "--user", "start", gateway_cli.SERVICE_NAME], + ["systemctl", "--user", "start", gateway_cli.get_service_name()], ] def test_systemd_restart_refreshes_outdated_unit(self, tmp_path, monkeypatch): @@ -49,7 +49,7 @@ class TestSystemdServiceRefresh: assert unit_path.read_text(encoding="utf-8") == "new unit\n" assert calls[:2] == [ ["systemctl", "--user", "daemon-reload"], - ["systemctl", "--user", "restart", gateway_cli.SERVICE_NAME], + ["systemctl", "--user", "restart", gateway_cli.get_service_name()], ] @@ -92,9 +92,9 @@ class TestGatewayServiceDetection: ) def fake_run(cmd, capture_output=True, text=True, **kwargs): - if cmd == ["systemctl", "--user", "is-active", gateway_cli.SERVICE_NAME]: + if cmd == ["systemctl", "--user", "is-active", gateway_cli.get_service_name()]: return SimpleNamespace(returncode=0, stdout="inactive\n", stderr="") - if cmd == ["systemctl", "is-active", gateway_cli.SERVICE_NAME]: + if cmd == ["systemctl", "is-active", gateway_cli.get_service_name()]: return SimpleNamespace(returncode=0, stdout="active\n", stderr="") raise AssertionError(f"Unexpected command: {cmd}") From 51185354dd00580cee3e89882a83c5b26331f01d Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 04:44:53 -0700 Subject: [PATCH 04/29] docs: document scoped systemd service names for multi-install - Update messaging guide to use 'hermes gateway' CLI commands instead of raw systemctl (auto-resolves the correct service name) - Add info callout explaining multi-install service name scoping - Update HERMES_HOME env var docs to mention PID + service name scoping --- website/docs/reference/environment-variables.md | 2 +- website/docs/user-guide/messaging/index.md | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 03e84d93f..2b462e186 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -34,7 +34,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `VOICE_TOOLS_OPENAI_KEY` | Preferred OpenAI key for OpenAI speech-to-text and text-to-speech providers | | `HERMES_LOCAL_STT_COMMAND` | Optional local speech-to-text command template. Supports `{input_path}`, `{output_dir}`, `{language}`, and `{model}` placeholders | | `HERMES_LOCAL_STT_LANGUAGE` | Default language passed to `HERMES_LOCAL_STT_COMMAND` or auto-detected local `whisper` CLI fallback (default: `en`) | -| `HERMES_HOME` | Override Hermes config directory (default: `~/.hermes`) | +| `HERMES_HOME` | Override Hermes config directory (default: `~/.hermes`). Also scopes the gateway PID file and systemd service name, so multiple installations can run concurrently | ## Provider Auth (OAuth) diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md index fea310d21..0c17e65e6 100644 --- a/website/docs/user-guide/messaging/index.md +++ b/website/docs/user-guide/messaging/index.md @@ -244,10 +244,10 @@ Background tasks on messaging platforms are fire-and-forget — you don't need t ```bash hermes gateway install # Install as user service -systemctl --user start hermes-gateway -systemctl --user stop hermes-gateway -systemctl --user status hermes-gateway -journalctl --user -u hermes-gateway -f +hermes gateway start # Start the service +hermes gateway stop # Stop the service +hermes gateway status # Check status +journalctl --user -u hermes-gateway -f # View logs # Enable lingering (keeps running after logout) sudo loginctl enable-linger $USER @@ -263,6 +263,10 @@ Use the user service on laptops and dev boxes. Use the system service on VPS or Avoid keeping both the user and system gateway units installed at once unless you really mean to. Hermes will warn if it detects both because start/stop/status behavior gets ambiguous. +:::info Multiple installations +If you run multiple Hermes installations on the same machine (with different `HERMES_HOME` directories), each gets its own systemd service name. The default `~/.hermes` uses `hermes-gateway`; other installations use `hermes-gateway-`. The `hermes gateway` commands automatically target the correct service for your current `HERMES_HOME`. +::: + ### macOS (launchd) ```bash From 00110fb3c3713a2f304be17df321db448b5b5cee Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 04:56:22 -0700 Subject: [PATCH 05/29] docs: update checkpoint/rollback docs for new features - Reflect that checkpoints are now enabled by default - Document /rollback diff for previewing changes - Document /rollback for single-file restore - Document automatic conversation undo on rollback - Document terminal command checkpoint coverage - Update listing example to show change stats - Fix config path (checkpoints.enabled, not agent.checkpoints_enabled) - Consolidate features/checkpoints.md to brief summary with link --- .../user-guide/checkpoints-and-rollback.md | 175 ++++++++++-------- .../docs/user-guide/features/checkpoints.md | 99 ++-------- 2 files changed, 115 insertions(+), 159 deletions(-) diff --git a/website/docs/user-guide/checkpoints-and-rollback.md b/website/docs/user-guide/checkpoints-and-rollback.md index a7a34115f..f81a7d4f8 100644 --- a/website/docs/user-guide/checkpoints-and-rollback.md +++ b/website/docs/user-guide/checkpoints-and-rollback.md @@ -6,10 +6,28 @@ description: "Filesystem safety nets for destructive operations using shadow git # Checkpoints and `/rollback` -Hermes Agent can automatically snapshot your project before **destructive operations** (like file write/patch tools) and restore it later with a single command. +Hermes Agent automatically snapshots your project before **destructive operations** and lets you restore it with a single command. Checkpoints are **enabled by default** — there's zero cost when no file-mutating tools fire. This safety net is powered by an internal **Checkpoint Manager** that keeps a separate shadow git repository under `~/.hermes/checkpoints/` — your real project `.git` is never touched. +## What Triggers a Checkpoint + +Checkpoints are taken automatically before: + +- **File tools** — `write_file` and `patch` +- **Destructive terminal commands** — `rm`, `mv`, `sed -i`, `truncate`, `shred`, output redirects (`>`), and `git reset`/`clean`/`checkout` + +The agent creates **at most one checkpoint per directory per turn**, so long-running sessions don't spam snapshots. + +## Quick Reference + +| Command | Description | +|---------|-------------| +| `/rollback` | List all checkpoints with change stats | +| `/rollback ` | Restore to checkpoint N (also undoes last chat turn) | +| `/rollback diff ` | Preview diff between checkpoint N and current state | +| `/rollback ` | Restore a single file from checkpoint N | + ## How Checkpoints Work At a high level: @@ -21,24 +39,11 @@ At a high level: - Stages and commits the current state with a short, human‑readable reason. - These commits form a checkpoint history that you can inspect and restore via `/rollback`. -Internally, the Checkpoint Manager: - -- Stores shadow repos under: - - `~/.hermes/checkpoints//` -- Keeps metadata about: - - The original working directory (`HERMES_WORKDIR` file in the shadow repo). - - Excluded paths such as: - - `node_modules/`, `dist/`, `build/` - - `.venv/`, `__pycache__/`, `*.pyc` - - `.git/`, `.cache/`, `.pytest_cache/`, etc. - -The agent creates **at most one checkpoint per directory per turn**, so long running sessions do not spam snapshots. - ```mermaid flowchart LR user["User command\n(hermes, gateway)"] agent["AIAgent\n(run_agent.py)"] - tools["File tools\n(write/patch)"] + tools["File & terminal tools"] cpMgr["CheckpointManager"] shadowRepo["Shadow git repo\n~/.hermes/checkpoints/"] @@ -50,108 +55,128 @@ flowchart LR tools -->|"apply changes"| agent ``` -## Enabling Checkpoints +## Configuration -Checkpoints are controlled by a simple on/off flag and a maximum snapshot count **per directory**: - -- `checkpoints_enabled` – master switch -- `checkpoint_max_snapshots` – soft cap on history depth per directory - -You can configure these in `~/.hermes/config.yaml`: +Checkpoints are enabled by default. Configure in `~/.hermes/config.yaml`: ```yaml -agent: - checkpoints_enabled: true - checkpoint_max_snapshots: 50 +checkpoints: + enabled: true # master switch (default: true) + max_snapshots: 50 # max checkpoints per directory ``` -Or via CLI flags (exact wiring may depend on your version of the CLI): +To disable: -```bash -hermes --checkpoints -# or -hermes chat --checkpoints +```yaml +checkpoints: + enabled: false ``` When disabled, the Checkpoint Manager is a no‑op and never attempts git operations. ## Listing Checkpoints -Hermes exposes an interactive way to list checkpoints for the current working directory. +From a CLI session: -From the CLI session where you are working on a project: - -```bash -# Ask Hermes to show checkpoints for the current directory +``` /rollback ``` -Hermes responds with a formatted list similar to: +Hermes responds with a formatted list showing change statistics: ```text 📸 Checkpoints for /path/to/project: - 1. a1b2c3d 2026-03-13 10:24 auto: before apply_patch - 2. d4e5f6a 2026-03-13 10:15 pre-rollback snapshot (restoring to a1b2c3d0) + 1. 4270a8c 2026-03-16 04:36 before patch (1 file, +1/-0) + 2. eaf4c1f 2026-03-16 04:35 before write_file + 3. b3f9d2e 2026-03-16 04:34 before terminal: sed -i s/old/new/ config.py (1 file, +1/-1) -Use /rollback to restore, e.g. /rollback 1 + /rollback restore to checkpoint N + /rollback diff preview changes since checkpoint N + /rollback restore a single file from checkpoint N ``` Each entry shows: - Short hash - Timestamp -- Reason (commit message for the snapshot) +- Reason (what triggered the snapshot) +- Change summary (files changed, insertions/deletions) + +## Previewing Changes with `/rollback diff` + +Before committing to a restore, preview what has changed since a checkpoint: + +``` +/rollback diff 1 +``` + +This shows a git diff stat summary followed by the actual diff: + +```text +test.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/test.py b/test.py +--- a/test.py ++++ b/test.py +@@ -1 +1 @@ +-print('original content') ++print('modified content') +``` + +Long diffs are capped at 80 lines to avoid flooding the terminal. ## Restoring with `/rollback` -Once you have identified the snapshot you want to go back to, use `/rollback` with the number from the list: +Restore to a checkpoint by number: -```bash -# Restore to the most recent snapshot +``` /rollback 1 ``` Behind the scenes, Hermes: 1. Verifies the target commit exists in the shadow repo. -2. Takes a **pre‑rollback snapshot** of the current state so you can “undo the undo” later. -3. Runs `git checkout -- .` in the shadow repo, restoring tracked files in your working directory. +2. Takes a **pre‑rollback snapshot** of the current state so you can "undo the undo" later. +3. Restores tracked files in your working directory. +4. **Undoes the last conversation turn** so the agent's context matches the restored filesystem state. -On success, Hermes responds with a short summary like: +On success: ```text -✅ Restored /path/to/project to a1b2c3d -Reason: auto: before apply_patch +✅ Restored to checkpoint 4270a8c5: before patch +A pre-rollback snapshot was saved automatically. +(^_^)b Undid 4 message(s). Removed: "Now update test.py to ..." + 4 message(s) remaining in history. + Chat turn undone to match restored file state. ``` -If something goes wrong (missing commit, git error), you will see a clear error message and details will be logged. +The conversation undo ensures the agent doesn't "remember" changes that have been rolled back, avoiding confusion on the next turn. + +## Single-File Restore + +Restore just one file from a checkpoint without affecting the rest of the directory: + +``` +/rollback 1 src/broken_file.py +``` + +This is useful when the agent made changes to multiple files but only one needs to be reverted. ## Safety and Performance Guards To keep checkpointing safe and fast, Hermes applies several guardrails: -- **Git availability** - - If `git` is not found on `PATH`, checkpoints are transparently disabled. - - A debug log entry is emitted, but your session continues normally. -- **Directory scope** - - Hermes skips overly broad directories such as: - - Root (`/`) - - Your home directory (`$HOME`) - - This prevents accidental snapshots of your entire filesystem. -- **Repository size** - - Before committing, Hermes performs a quick file count. - - If the directory has more than a configured threshold (e.g. `50,000` files), - checkpoints are skipped to avoid large git operations. -- **No‑change snapshots** - - If there are no changes since the last snapshot, the checkpoint is skipped - instead of committing an empty diff. - -All errors inside the Checkpoint Manager are treated as **non‑fatal**: they are logged at debug level and your tools continue to run. +- **Git availability** — if `git` is not found on `PATH`, checkpoints are transparently disabled. +- **Directory scope** — Hermes skips overly broad directories (root `/`, home `$HOME`). +- **Repository size** — directories with more than 50,000 files are skipped to avoid slow git operations. +- **No‑change snapshots** — if there are no changes since the last snapshot, the checkpoint is skipped. +- **Non‑fatal errors** — all errors inside the Checkpoint Manager are logged at debug level; your tools continue to run. ## Where Checkpoints Live -By default, all shadow repos live under: +All shadow repos live under: ```text ~/.hermes/checkpoints/ @@ -160,21 +185,19 @@ By default, all shadow repos live under: └── ... ``` -Each `` is derived from the absolute path of the working directory. Inside each shadow repo you will find: +Each `` is derived from the absolute path of the working directory. Inside each shadow repo you'll find: - Standard git internals (`HEAD`, `refs/`, `objects/`) - An `info/exclude` file containing a curated ignore list - A `HERMES_WORKDIR` file pointing back to the original project root -You normally never need to touch these manually; they are documented here so advanced users understand how the safety net works. +You normally never need to touch these manually. ## Best Practices -- **Keep checkpoints enabled** for interactive development and refactors. -- **Use `/rollback` instead of `git reset`** when you want to undo agent‑driven changes only. -- **Combine with Git branches and worktrees** for maximum safety: - - Keep each Hermes session in its own worktree/branch. - - Let checkpoints act as an extra layer of protection on top. - -For running multiple agents in parallel on the same repo without interfering with each other, see the dedicated guide on [Git worktrees](./git-worktrees.md). +- **Leave checkpoints enabled** — they're on by default and have zero cost when no files are modified. +- **Use `/rollback diff` before restoring** — preview what will change to pick the right checkpoint. +- **Use `/rollback` instead of `git reset`** when you want to undo agent-driven changes only. +- **Combine with Git worktrees** for maximum safety — keep each Hermes session in its own worktree/branch, with checkpoints as an extra layer. +For running multiple agents in parallel on the same repo, see the guide on [Git worktrees](./git-worktrees.md). diff --git a/website/docs/user-guide/features/checkpoints.md b/website/docs/user-guide/features/checkpoints.md index a50aca8ff..aed879fc2 100644 --- a/website/docs/user-guide/features/checkpoints.md +++ b/website/docs/user-guide/features/checkpoints.md @@ -1,97 +1,30 @@ # Filesystem Checkpoints -Hermes can automatically snapshot your working directory before making file changes, giving you a safety net to roll back if something goes wrong. +Hermes automatically snapshots your working directory before making file changes, giving you a safety net to roll back if something goes wrong. Checkpoints are **enabled by default**. -## How It Works +## Quick Reference -When enabled, Hermes takes a **one-time snapshot** at the start of each conversation turn before the first file-modifying operation (`write_file` or `patch`). This creates a point-in-time backup you can restore to at any time. +| Command | Description | +|---------|-------------| +| `/rollback` | List all checkpoints with change stats | +| `/rollback ` | Restore to checkpoint N (also undoes last chat turn) | +| `/rollback diff ` | Preview diff between checkpoint N and current state | +| `/rollback ` | Restore a single file from checkpoint N | -Under the hood, checkpoints use a **shadow git repository** stored at `~/.hermes/checkpoints/`. This is completely separate from your project's git — no `.git` directory is created in your project, and your own git history is never touched. +## What Triggers Checkpoints -## Enabling Checkpoints +- **File tools** — `write_file` and `patch` +- **Destructive terminal commands** — `rm`, `mv`, `sed -i`, output redirects (`>`), `git reset`/`clean` -### Per-session (CLI flag) - -```bash -hermes --checkpoints -``` - -### Permanently (config.yaml) +## Configuration ```yaml # ~/.hermes/config.yaml checkpoints: - enabled: true - max_snapshots: 50 # max checkpoints per directory (default: 50) + enabled: true # default: true + max_snapshots: 50 # max checkpoints per directory ``` -## Rolling Back +## Learn More -Use the `/rollback` slash command: - -``` -/rollback # List all available checkpoints -/rollback 1 # Restore to checkpoint #1 (most recent) -/rollback 3 # Restore to checkpoint #3 (further back) -/rollback abc1234 # Restore by git commit hash -``` - -Example output: - -``` -📸 Checkpoints for /home/user/project: - - 1. abc1234 2026-03-10 14:22 before write_file - 2. def5678 2026-03-10 14:15 before patch - 3. ghi9012 2026-03-10 14:08 before write_file - -Use /rollback to restore, e.g. /rollback 1 -``` - -When you restore, Hermes automatically takes a **pre-rollback snapshot** first — so you can always undo your undo. - -## What Gets Checkpointed - -Checkpoints capture the entire working directory (the project root), excluding common large/sensitive patterns: - -- `node_modules/`, `dist/`, `build/` -- `.env`, `.env.*` -- `__pycache__/`, `*.pyc` -- `.venv/`, `venv/` -- `.git/` -- `.DS_Store`, `*.log` - -## Performance - -Checkpoints are designed to be lightweight: - -- **Once per turn** — only the first file operation triggers a snapshot, not every write -- **Skips large directories** — directories with >50,000 files are skipped automatically -- **Skips when nothing changed** — if no files were modified since the last checkpoint, no commit is created -- **Non-blocking** — if a checkpoint fails for any reason, the file operation proceeds normally - -## How It Determines the Project Root - -When you write to a file like `src/components/Button.tsx`, Hermes walks up the directory tree looking for project markers (`.git`, `pyproject.toml`, `package.json`, `Cargo.toml`, etc.) to find the project root. This ensures the entire project is checkpointed, not just the file's parent directory. - -## Platforms - -Checkpoints work on both: -- **CLI** — uses your current working directory -- **Gateway** (Telegram, Discord, etc.) — uses `MESSAGING_CWD` - -The `/rollback` command is available on all platforms. - -## FAQ - -**Does this conflict with my project's git?** -No. Checkpoints use a completely separate shadow git repository via `GIT_DIR` environment variables. Your project's `.git/` is never touched. - -**How much disk space do checkpoints use?** -Git is very efficient at storing diffs. For most projects, checkpoint data is negligible. Old checkpoints are pruned when `max_snapshots` is exceeded. - -**Can I checkpoint without git installed?** -No — git must be available on your PATH. If it's not installed, checkpoints silently disable. - -**Can I roll back across sessions?** -Yes! Checkpoints persist in `~/.hermes/checkpoints/` and survive across sessions. You can roll back to a checkpoint from yesterday. +For the full guide — how shadow repos work, diff previews, file-level restore, conversation undo, safety guards, and best practices — see **[Checkpoints and /rollback](../checkpoints-and-rollback.md)**. From ee579af566f40680e6694f609e6686c761eff16e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 04:58:28 -0700 Subject: [PATCH 06/29] docs: add CLI status bar docs and update /usage reference (#1523) - Add Status Bar section to user-guide/cli.md with layout example, element descriptions, responsive width behavior, and color-coded context threshold table - Update /usage description in slash-commands reference to mention cost breakdown and session duration --- website/docs/reference/slash-commands.md | 4 ++-- website/docs/user-guide/cli.md | 29 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index 83cbfeecf..9a27a7131 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -63,7 +63,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | Command | Description | |---------|-------------| | `/help` | Show this help message | -| `/usage` | Show token usage for the current session | +| `/usage` | Show token usage, cost breakdown, and session duration | | `/insights` | Show usage insights and analytics (last 30 days) | | `/platforms` | Show gateway/messaging platform status | | `/paste` | Check clipboard for an image and attach it | @@ -104,7 +104,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/compress` | Manually compress conversation context. | | `/title [name]` | Set or show the session title. | | `/resume [name]` | Resume a previously named session. | -| `/usage` | Show token usage for the current session. | +| `/usage` | Show token usage, estimated cost breakdown (input/output), context window state, and session duration. | | `/insights [days]` | Show usage analytics. | | `/reasoning [level\|show\|hide]` | Change reasoning effort or toggle reasoning display. | | `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. | diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md index 0211ae36b..a33ed295e 100644 --- a/website/docs/user-guide/cli.md +++ b/website/docs/user-guide/cli.md @@ -50,6 +50,35 @@ hermes -w -q "Fix issue #123" # Single query in worktree The welcome banner shows your model, terminal backend, working directory, available tools, and installed skills at a glance. +### Status Bar + +A persistent status bar sits above the input area, updating in real time: + +``` + ⚕ claude-sonnet-4-20250514 │ 12.4K/200K │ [██████░░░░] 6% │ $0.06 │ 15m +``` + +| Element | Description | +|---------|-------------| +| Model name | Current model (truncated if longer than 26 chars) | +| Token count | Context tokens used / max context window | +| Context bar | Visual fill indicator with color-coded thresholds | +| Cost | Estimated session cost (or `n/a` for unknown/zero-priced models) | +| Duration | Elapsed session time | + +The bar adapts to terminal width — full layout at ≥ 76 columns, compact at 52–75, minimal (model + duration only) below 52. + +**Context color coding:** + +| Color | Threshold | Meaning | +|-------|-----------|---------| +| Green | < 50% | Plenty of room | +| Yellow | 50–80% | Getting full | +| Orange | 80–95% | Approaching limit | +| Red | ≥ 95% | Near overflow — consider `/compress` | + +Use `/usage` for a detailed breakdown including per-category costs (input vs output tokens). + ### Session Resume Display When resuming a previous session (`hermes -c` or `hermes --resume `), a "Previous Conversation" panel appears between the banner and the input prompt, showing a compact recap of the conversation history. See [Sessions — Conversation Recap on Resume](sessions.md#conversation-recap-on-resume) for details and configuration. From ce660a4413254794baf5578060998f56451e7c55 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 05:02:58 -0700 Subject: [PATCH 07/29] fix(gateway): remove app-specific Athabasca references from vision enrichment (#1529) Salvaged from PR #1428 by jplew. Removes Athabasca-specific persistence guidance accidentally merged in PR #1422: - Drop Athabasca docstring and injected note from _enrich_message_with_vision - Delete tests/gateway/test_image_enrichment.py (asserted app-specific behavior) Co-authored-by: jplew --- gateway/run.py | 14 ++------------ tests/gateway/test_image_enrichment.py | 25 ------------------------- 2 files changed, 2 insertions(+), 37 deletions(-) delete mode 100644 tests/gateway/test_image_enrichment.py diff --git a/gateway/run.py b/gateway/run.py index a7e637ec6..ec293693e 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3620,13 +3620,9 @@ class GatewayRunner: 1. Immediately understand what the user sent (no extra tool call). 2. Re-examine the image with vision_analyze if it needs more detail. - Athabasca persistence should happen through Athabasca's own POST - /api/uploads flow, using the returned asset.publicUrl rather than local - cache paths. - Args: - user_text: The user's original caption / message text. - image_paths: List of local file paths to cached images. + user_text: The user's original caption / message text. + image_paths: List of local file paths to cached images. Returns: The enriched message string with vision descriptions prepended. @@ -3651,16 +3647,10 @@ class GatewayRunner: result = _json.loads(result_json) if result.get("success"): description = result.get("analysis", "") - athabasca_note = ( - "\n[If this image needs to persist in Athabasca state, upload the cached file " - "through Athabasca POST /api/uploads and use the returned asset.publicUrl. " - "Do not store the local cache path as the canonical imageUrl.]" - ) enriched_parts.append( f"[The user sent an image~ Here's what I can see:\n{description}]\n" f"[If you need a closer look, use vision_analyze with " f"image_url: {path} ~]" - f"{athabasca_note}" ) else: enriched_parts.append( diff --git a/tests/gateway/test_image_enrichment.py b/tests/gateway/test_image_enrichment.py deleted file mode 100644 index d3c7b72c8..000000000 --- a/tests/gateway/test_image_enrichment.py +++ /dev/null @@ -1,25 +0,0 @@ -from unittest.mock import patch - -import pytest - - -@pytest.mark.asyncio -async def test_image_enrichment_uses_athabasca_upload_guidance_without_stale_r2_warning(): - from gateway.run import GatewayRunner - - runner = object.__new__(GatewayRunner) - - with patch( - "tools.vision_tools.vision_analyze_tool", - return_value='{"success": true, "analysis": "A painted serpent warrior."}', - ): - enriched = await runner._enrich_message_with_vision( - "caption", - ["/tmp/test.jpg"], - ) - - assert "R2 not configured" not in enriched - assert "Gateway media URL available for reference" not in enriched - assert "POST /api/uploads" in enriched - assert "Do not store the local cache path" in enriched - assert "caption" in enriched From 002c459981cb3a12aa284b2304bc87605f19552a Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 05:03:11 -0700 Subject: [PATCH 08/29] fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s * fix(gateway): avoid recursive ExecStop in user systemd unit * fix: extend ExecStop removal and TimeoutStopSec=60 to system unit The cherry-picked PR #1448 fix only covered the user systemd unit. The system unit had the same TimeoutStopSec=15 and could benefit from the same 60s timeout for clean shutdown. Also adds a regression test for the system unit. --------- Co-authored-by: Ninja --- hermes_cli/gateway.py | 8 ++------ tests/hermes_cli/test_gateway_service.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 10bf2c7ce..2399436c4 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -371,8 +371,6 @@ def get_hermes_cli_path() -> str: # ============================================================================= def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str: - import shutil - python_path = get_python_path() working_dir = str(PROJECT_ROOT) venv_dir = str(PROJECT_ROOT / "venv") @@ -381,7 +379,6 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) # Build a PATH that includes the venv, node_modules, and standard system dirs sane_path = f"{venv_bin}:{node_bin}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - hermes_cli = shutil.which("hermes") or f"{python_path} -m hermes_cli.main" hermes_home = str(Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")).resolve()) @@ -408,7 +405,7 @@ Restart=on-failure RestartSec=10 KillMode=mixed KillSignal=SIGTERM -TimeoutStopSec=15 +TimeoutStopSec=60 StandardOutput=journal StandardError=journal @@ -423,7 +420,6 @@ After=network.target [Service] Type=simple ExecStart={python_path} -m hermes_cli.main gateway run --replace -ExecStop={hermes_cli} gateway stop WorkingDirectory={working_dir} Environment="PATH={sane_path}" Environment="VIRTUAL_ENV={venv_dir}" @@ -432,7 +428,7 @@ Restart=on-failure RestartSec=10 KillMode=mixed KillSignal=SIGTERM -TimeoutStopSec=15 +TimeoutStopSec=60 StandardOutput=journal StandardError=journal diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index 708d0ee8d..ffd381788 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -53,6 +53,23 @@ class TestSystemdServiceRefresh: ] +class TestGeneratedSystemdUnits: + def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self): + unit = gateway_cli.generate_systemd_unit(system=False) + + assert "ExecStart=" in unit + assert "ExecStop=" not in unit + assert "TimeoutStopSec=60" in unit + + def test_system_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self): + unit = gateway_cli.generate_systemd_unit(system=True) + + assert "ExecStart=" in unit + assert "ExecStop=" not in unit + assert "TimeoutStopSec=60" in unit + assert "WantedBy=multi-user.target" in unit + + class TestGatewayStopCleanup: def test_stop_sweeps_manual_gateway_processes_after_service_stop(self, tmp_path, monkeypatch): unit_path = tmp_path / "hermes-gateway.service" From 14b049d658344057021630b7ab99f5391323fdeb Mon Sep 17 00:00:00 2001 From: alireza78a Date: Mon, 16 Mar 2026 05:03:19 -0700 Subject: [PATCH 09/29] feat(skills): add blender-mcp optional skill for 3D modeling Control a running Blender instance from Hermes via socket connection to the blender-mcp addon (port 9876). Supports creating 3D objects, materials, animations, and running arbitrary bpy code. Placed in optional-skills/ since it requires Blender 4.3+ desktop with a third-party addon manually started each session. --- optional-skills/creative/blender-mcp/SKILL.md | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 optional-skills/creative/blender-mcp/SKILL.md diff --git a/optional-skills/creative/blender-mcp/SKILL.md b/optional-skills/creative/blender-mcp/SKILL.md new file mode 100644 index 000000000..bdcb98a3c --- /dev/null +++ b/optional-skills/creative/blender-mcp/SKILL.md @@ -0,0 +1,116 @@ +--- +name: blender-mcp +description: Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. Use when user wants to create or modify anything in Blender. +version: 1.0.0 +requires: Blender 4.3+ (desktop instance required, headless not supported) +author: alireza78a +tags: [blender, 3d, animation, modeling, bpy, mcp] +--- + +# Blender MCP + +Control a running Blender instance from Hermes via socket on TCP port 9876. + +## Setup (one-time) + +### 1. Install the Blender addon + + curl -sL https://raw.githubusercontent.com/ahujasid/blender-mcp/main/addon.py -o ~/Desktop/blender_mcp_addon.py + +In Blender: + Edit > Preferences > Add-ons > Install > select blender_mcp_addon.py + Enable "Interface: Blender MCP" + +### 2. Start the socket server in Blender + +Press N in Blender viewport to open sidebar. +Find "BlenderMCP" tab and click "Start Server". + +### 3. Verify connection + + nc -z -w2 localhost 9876 && echo "OPEN" || echo "CLOSED" + +## Protocol + +Plain UTF-8 JSON over TCP -- no length prefix. + +Send: {"type": "", "params": {}} +Receive: {"status": "success", "result": } + {"status": "error", "message": ""} + +## Available Commands + +| type | params | description | +|-------------------------|-------------------|---------------------------------| +| execute_code | code (str) | Run arbitrary bpy Python code | +| get_scene_info | (none) | List all objects in scene | +| get_object_info | object_name (str) | Details on a specific object | +| get_viewport_screenshot | (none) | Screenshot of current viewport | + +## Python Helper + +Use this inside execute_code tool calls: + + import socket, json + + def blender_exec(code: str, host="localhost", port=9876, timeout=15): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((host, port)) + s.settimeout(timeout) + payload = json.dumps({"type": "execute_code", "params": {"code": code}}) + s.sendall(payload.encode("utf-8")) + buf = b"" + while True: + try: + chunk = s.recv(4096) + if not chunk: + break + buf += chunk + try: + json.loads(buf.decode("utf-8")) + break + except json.JSONDecodeError: + continue + except socket.timeout: + break + s.close() + return json.loads(buf.decode("utf-8")) + +## Common bpy Patterns + +### Clear scene + bpy.ops.object.select_all(action='SELECT') + bpy.ops.object.delete() + +### Add mesh objects + bpy.ops.mesh.primitive_uv_sphere_add(radius=1, location=(0, 0, 0)) + bpy.ops.mesh.primitive_cube_add(size=2, location=(3, 0, 0)) + bpy.ops.mesh.primitive_cylinder_add(radius=0.5, depth=2, location=(-3, 0, 0)) + +### Create and assign material + mat = bpy.data.materials.new(name="MyMat") + mat.use_nodes = True + bsdf = mat.node_tree.nodes.get("Principled BSDF") + bsdf.inputs["Base Color"].default_value = (R, G, B, 1.0) + bsdf.inputs["Roughness"].default_value = 0.3 + bsdf.inputs["Metallic"].default_value = 0.0 + obj.data.materials.append(mat) + +### Keyframe animation + obj.location = (0, 0, 0) + obj.keyframe_insert(data_path="location", frame=1) + obj.location = (0, 0, 3) + obj.keyframe_insert(data_path="location", frame=60) + +### Render to file + bpy.context.scene.render.filepath = "/tmp/render.png" + bpy.context.scene.render.engine = 'CYCLES' + bpy.ops.render.render(write_still=True) + +## Pitfalls + +- Must check socket is open before running (nc -z localhost 9876) +- Addon server must be started inside Blender each session (N-panel > BlenderMCP > Connect) +- Break complex scenes into multiple smaller execute_code calls to avoid timeouts +- Render output path must be absolute (/tmp/...) not relative +- shade_smooth() requires object to be selected and in object mode From a2f0d14f2925ad52c2a5b485a14af0ba46a091a3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 05:19:36 -0700 Subject: [PATCH 10/29] feat(acp): support slash commands in ACP adapter (#1532) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds /help, /model, /tools, /context, /reset, /compact, /version to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled directly in the server without instantiating the TUI — each command queries agent/session state and returns plain text. Unrecognized /commands fall through to the LLM as normal messages. /model uses detect_provider_for_model() for auto-detection when switching models, matching the CLI and gateway behavior. Fixes #1402 --- acp_adapter/server.py | 156 +++++++++++++++++++++++++++++++++++++-- tests/acp/test_server.py | 94 +++++++++++++++++++++++ 2 files changed, 245 insertions(+), 5 deletions(-) diff --git a/acp_adapter/server.py b/acp_adapter/server.py index 6e8ec3b49..1081104e9 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -42,7 +42,7 @@ from acp_adapter.events import ( make_tool_progress_cb, ) from acp_adapter.permissions import make_approval_callback -from acp_adapter.session import SessionManager +from acp_adapter.session import SessionManager, SessionState logger = logging.getLogger(__name__) @@ -226,10 +226,19 @@ class HermesACPAgent(acp.Agent): logger.error("prompt: session %s not found", session_id) return PromptResponse(stop_reason="refusal") - user_text = _extract_text(prompt) - if not user_text.strip(): + user_text = _extract_text(prompt).strip() + if not user_text: return PromptResponse(stop_reason="end_turn") + # Intercept slash commands — handle locally without calling the LLM + if user_text.startswith("/"): + response_text = self._handle_slash_command(user_text, state) + if response_text is not None: + if self._conn: + update = acp.update_agent_message_text(response_text) + await self._conn.session_update(session_id, update) + return PromptResponse(stop_reason="end_turn") + logger.info("Prompt on session %s: %s", session_id, user_text[:100]) conn = self._conn @@ -315,12 +324,149 @@ class HermesACPAgent(acp.Agent): stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn" return PromptResponse(stop_reason=stop_reason, usage=usage) - # ---- Model switching ---------------------------------------------------- + # ---- Slash commands (headless) ------------------------------------------- + + _SLASH_COMMANDS = { + "help": "Show available commands", + "model": "Show or change current model", + "tools": "List available tools", + "context": "Show conversation context info", + "reset": "Clear conversation history", + "compact": "Compress conversation context", + "version": "Show Hermes version", + } + + def _handle_slash_command(self, text: str, state: SessionState) -> str | None: + """Dispatch a slash command and return the response text. + + Returns ``None`` for unrecognized commands so they fall through + to the LLM (the user may have typed ``/something`` as prose). + """ + parts = text.split(maxsplit=1) + cmd = parts[0].lstrip("/").lower() + args = parts[1].strip() if len(parts) > 1 else "" + + handler = { + "help": self._cmd_help, + "model": self._cmd_model, + "tools": self._cmd_tools, + "context": self._cmd_context, + "reset": self._cmd_reset, + "compact": self._cmd_compact, + "version": self._cmd_version, + }.get(cmd) + + if handler is None: + return None # not a known command — let the LLM handle it + + try: + return handler(args, state) + except Exception as e: + logger.error("Slash command /%s error: %s", cmd, e, exc_info=True) + return f"Error executing /{cmd}: {e}" + + def _cmd_help(self, args: str, state: SessionState) -> str: + lines = ["Available commands:", ""] + for cmd, desc in self._SLASH_COMMANDS.items(): + lines.append(f" /{cmd:10s} {desc}") + lines.append("") + lines.append("Unrecognized /commands are sent to the model as normal messages.") + return "\n".join(lines) + + def _cmd_model(self, args: str, state: SessionState) -> str: + if not args: + model = state.model or getattr(state.agent, "model", "unknown") + provider = getattr(state.agent, "provider", None) or "auto" + return f"Current model: {model}\nProvider: {provider}" + + new_model = args.strip() + target_provider = None + + # Auto-detect provider for the requested model + try: + from hermes_cli.models import parse_model_input, detect_provider_for_model + current_provider = getattr(state.agent, "provider", None) or "openrouter" + target_provider, new_model = parse_model_input(new_model, current_provider) + if target_provider == current_provider: + detected = detect_provider_for_model(new_model, current_provider) + if detected: + target_provider, new_model = detected + except Exception: + logger.debug("Provider detection failed, using model as-is", exc_info=True) + + state.model = new_model + state.agent = self.session_manager._make_agent( + session_id=state.session_id, + cwd=state.cwd, + model=new_model, + ) + provider_label = target_provider or getattr(state.agent, "provider", "auto") + logger.info("Session %s: model switched to %s", state.session_id, new_model) + return f"Model switched to: {new_model}\nProvider: {provider_label}" + + def _cmd_tools(self, args: str, state: SessionState) -> str: + try: + from model_tools import get_tool_definitions + toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"] + tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True) + if not tools: + return "No tools available." + lines = [f"Available tools ({len(tools)}):"] + for t in tools: + name = t.get("function", {}).get("name", "?") + desc = t.get("function", {}).get("description", "") + # Truncate long descriptions + if len(desc) > 80: + desc = desc[:77] + "..." + lines.append(f" {name}: {desc}") + return "\n".join(lines) + except Exception as e: + return f"Could not list tools: {e}" + + def _cmd_context(self, args: str, state: SessionState) -> str: + n_messages = len(state.history) + if n_messages == 0: + return "Conversation is empty (no messages yet)." + # Count by role + roles: dict[str, int] = {} + for msg in state.history: + role = msg.get("role", "unknown") + roles[role] = roles.get(role, 0) + 1 + lines = [ + f"Conversation: {n_messages} messages", + f" user: {roles.get('user', 0)}, assistant: {roles.get('assistant', 0)}, " + f"tool: {roles.get('tool', 0)}, system: {roles.get('system', 0)}", + ] + model = state.model or getattr(state.agent, "model", "") + if model: + lines.append(f"Model: {model}") + return "\n".join(lines) + + def _cmd_reset(self, args: str, state: SessionState) -> str: + state.history.clear() + return "Conversation history cleared." + + def _cmd_compact(self, args: str, state: SessionState) -> str: + if not state.history: + return "Nothing to compress — conversation is empty." + try: + agent = state.agent + if hasattr(agent, "compress_context"): + agent.compress_context(state.history) + return f"Context compressed. Messages: {len(state.history)}" + return "Context compression not available for this agent." + except Exception as e: + return f"Compression failed: {e}" + + def _cmd_version(self, args: str, state: SessionState) -> str: + return f"Hermes Agent v{HERMES_VERSION}" + + # ---- Model switching (ACP protocol method) ------------------------------- async def set_session_model( self, model_id: str, session_id: str, **kwargs: Any ): - """Switch the model for a session.""" + """Switch the model for a session (called by ACP protocol).""" state = self.session_manager.get_session(session_id) if state: state.model = model_id diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py index 96475c67c..341f4b758 100644 --- a/tests/acp/test_server.py +++ b/tests/acp/test_server.py @@ -295,3 +295,97 @@ class TestOnConnect: mock_conn = MagicMock(spec=acp.Client) agent.on_connect(mock_conn) assert agent._conn is mock_conn + + +# --------------------------------------------------------------------------- +# Slash commands +# --------------------------------------------------------------------------- + + +class TestSlashCommands: + """Test slash command dispatch in the ACP adapter.""" + + def _make_state(self, mock_manager): + state = mock_manager.create_session(cwd="/tmp") + state.agent.model = "test-model" + state.agent.provider = "openrouter" + state.model = "test-model" + return state + + def test_help_lists_commands(self, agent, mock_manager): + state = self._make_state(mock_manager) + result = agent._handle_slash_command("/help", state) + assert result is not None + assert "/help" in result + assert "/model" in result + assert "/tools" in result + assert "/reset" in result + + def test_model_shows_current(self, agent, mock_manager): + state = self._make_state(mock_manager) + result = agent._handle_slash_command("/model", state) + assert "test-model" in result + + def test_context_empty(self, agent, mock_manager): + state = self._make_state(mock_manager) + state.history = [] + result = agent._handle_slash_command("/context", state) + assert "empty" in result.lower() + + def test_context_with_messages(self, agent, mock_manager): + state = self._make_state(mock_manager) + state.history = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ] + result = agent._handle_slash_command("/context", state) + assert "2 messages" in result + assert "user: 1" in result + + def test_reset_clears_history(self, agent, mock_manager): + state = self._make_state(mock_manager) + state.history = [{"role": "user", "content": "hello"}] + result = agent._handle_slash_command("/reset", state) + assert "cleared" in result.lower() + assert len(state.history) == 0 + + def test_version(self, agent, mock_manager): + state = self._make_state(mock_manager) + result = agent._handle_slash_command("/version", state) + assert HERMES_VERSION in result + + def test_unknown_command_returns_none(self, agent, mock_manager): + state = self._make_state(mock_manager) + result = agent._handle_slash_command("/nonexistent", state) + assert result is None + + @pytest.mark.asyncio + async def test_slash_command_intercepted_in_prompt(self, agent, mock_manager): + """Slash commands should be handled without calling the LLM.""" + new_resp = await agent.new_session(cwd="/tmp") + mock_conn = AsyncMock(spec=acp.Client) + agent._conn = mock_conn + + prompt = [TextContentBlock(type="text", text="/help")] + resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id) + + assert resp.stop_reason == "end_turn" + mock_conn.session_update.assert_called_once() + + @pytest.mark.asyncio + async def test_unknown_slash_falls_through_to_llm(self, agent, mock_manager): + """Unknown /commands should be sent to the LLM, not intercepted.""" + new_resp = await agent.new_session(cwd="/tmp") + mock_conn = AsyncMock(spec=acp.Client) + agent._conn = mock_conn + + # Mock run_in_executor to avoid actually running the agent + with patch("asyncio.get_running_loop") as mock_loop: + mock_loop.return_value.run_in_executor = AsyncMock(return_value={ + "final_response": "I processed /foo", + "messages": [], + }) + prompt = [TextContentBlock(type="text", text="/foo bar")] + resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id) + + assert resp.stop_reason == "end_turn" From 8cdbbcaaa25f882bde6482a76c1f753edbd96f23 Mon Sep 17 00:00:00 2001 From: Bartok9 Date: Mon, 16 Mar 2026 03:35:35 -0400 Subject: [PATCH 11/29] fix(docker): auto-mount host CWD to /workspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #1445 — When using Docker backend, the user's current working directory is now automatically bind-mounted to /workspace inside the container. This allows users to run `cd my-project && hermes` and have their project files accessible to the agent without manual volume config. Changes: - Add host_cwd and auto_mount_cwd parameters to DockerEnvironment - Capture original host CWD in _get_env_config() before container fallback - Pass host_cwd through _create_environment() to Docker backend - Add TERMINAL_DOCKER_NO_AUTO_MOUNT env var to disable if needed - Skip auto-mount when /workspace is already explicitly mounted - Add tests for auto-mount behavior - Add documentation for the new feature The auto-mount is skipped when: 1. TERMINAL_DOCKER_NO_AUTO_MOUNT=true is set 2. User configured docker_volumes with :/workspace 3. persistent_filesystem=true (persistent sandbox mode) This makes the Docker backend behave more intuitively — the agent operates on the user's actual project directory by default. --- tests/tools/test_docker_environment.py | 145 +++++++++++++++++++++++ tools/environments/docker.py | 29 +++++ tools/terminal_tool.py | 14 ++- website/docs/user-guide/configuration.md | 36 ++++++ 4 files changed, 222 insertions(+), 2 deletions(-) diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index ead655285..3ed297b59 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -86,3 +86,148 @@ def test_ensure_docker_available_uses_resolved_executable(monkeypatch): }) ] + +def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path): + """When host_cwd is provided, it should be auto-mounted to /workspace.""" + import os + + # Create a temp directory to simulate user's project directory + project_dir = tmp_path / "my-project" + project_dir.mkdir() + + # Mock Docker availability + def _run_docker_version(*args, **kwargs): + return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") + + def _run_docker_create(*args, **kwargs): + return subprocess.CompletedProcess(args[0], 1, stdout="", stderr="storage-opt not supported") + + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) + + # Mock the inner _Docker class to capture run_args + captured_run_args = [] + + class MockInnerDocker: + container_id = "mock-container-123" + config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() + + def __init__(self, **kwargs): + captured_run_args.extend(kwargs.get("run_args", [])) + + monkeypatch.setattr( + "minisweagent.environments.docker.DockerEnvironment", + MockInnerDocker, + ) + + # Create environment with host_cwd + env = docker_env.DockerEnvironment( + image="python:3.11", + cwd="/workspace", + timeout=60, + persistent_filesystem=False, # Non-persistent mode uses tmpfs, should be overridden + task_id="test-auto-mount", + volumes=[], + host_cwd=str(project_dir), + auto_mount_cwd=True, + ) + + # Check that the host_cwd was added as a volume mount + volume_mount = f"-v {project_dir}:/workspace" + run_args_str = " ".join(captured_run_args) + assert f"{project_dir}:/workspace" in run_args_str, f"Expected auto-mount in run_args: {run_args_str}" + + +def test_auto_mount_disabled_via_env(monkeypatch, tmp_path): + """Auto-mount should be disabled when TERMINAL_DOCKER_NO_AUTO_MOUNT is set.""" + import os + + project_dir = tmp_path / "my-project" + project_dir.mkdir() + + monkeypatch.setenv("TERMINAL_DOCKER_NO_AUTO_MOUNT", "true") + + def _run_docker_version(*args, **kwargs): + return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") + + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) + + captured_run_args = [] + + class MockInnerDocker: + container_id = "mock-container-456" + config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() + + def __init__(self, **kwargs): + captured_run_args.extend(kwargs.get("run_args", [])) + + monkeypatch.setattr( + "minisweagent.environments.docker.DockerEnvironment", + MockInnerDocker, + ) + + env = docker_env.DockerEnvironment( + image="python:3.11", + cwd="/workspace", + timeout=60, + persistent_filesystem=False, + task_id="test-no-auto-mount", + volumes=[], + host_cwd=str(project_dir), + auto_mount_cwd=True, + ) + + # Check that the host_cwd was NOT added (because env var disabled it) + run_args_str = " ".join(captured_run_args) + assert f"{project_dir}:/workspace" not in run_args_str, f"Auto-mount should be disabled: {run_args_str}" + + +def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path): + """Auto-mount should be skipped if /workspace is already mounted via user volumes.""" + import os + + project_dir = tmp_path / "my-project" + project_dir.mkdir() + other_dir = tmp_path / "other" + other_dir.mkdir() + + def _run_docker_version(*args, **kwargs): + return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") + + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) + + captured_run_args = [] + + class MockInnerDocker: + container_id = "mock-container-789" + config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() + + def __init__(self, **kwargs): + captured_run_args.extend(kwargs.get("run_args", [])) + + monkeypatch.setattr( + "minisweagent.environments.docker.DockerEnvironment", + MockInnerDocker, + ) + + # User already configured a volume mount for /workspace + env = docker_env.DockerEnvironment( + image="python:3.11", + cwd="/workspace", + timeout=60, + persistent_filesystem=False, + task_id="test-workspace-exists", + volumes=[f"{other_dir}:/workspace"], # User explicitly mounted something to /workspace + host_cwd=str(project_dir), + auto_mount_cwd=True, + ) + + # The user's explicit mount should be present + run_args_str = " ".join(captured_run_args) + assert f"{other_dir}:/workspace" in run_args_str + + # But the auto-mount should NOT add a duplicate + assert run_args_str.count(":/workspace") == 1, f"Should only have one /workspace mount: {run_args_str}" + diff --git a/tools/environments/docker.py b/tools/environments/docker.py index c04eff8d0..1c95f7b34 100644 --- a/tools/environments/docker.py +++ b/tools/environments/docker.py @@ -158,6 +158,10 @@ class DockerEnvironment(BaseEnvironment): Persistence: when enabled, bind mounts preserve /workspace and /root across container restarts. + + Auto-mount: when host_cwd is provided (the user's original working directory), + it is automatically bind-mounted to /workspace unless auto_mount_cwd=False + or the path is already covered by an explicit volume mount. """ def __init__( @@ -172,6 +176,8 @@ class DockerEnvironment(BaseEnvironment): task_id: str = "default", volumes: list = None, network: bool = True, + host_cwd: str = None, + auto_mount_cwd: bool = True, ): if cwd == "~": cwd = "/root" @@ -250,6 +256,29 @@ class DockerEnvironment(BaseEnvironment): else: logger.warning(f"Docker volume '{vol}' missing colon, skipping") + # Auto-mount host CWD to /workspace when enabled (fixes #1445). + # This allows users to run `cd my-project && hermes` and have Docker + # automatically mount their project directory into the container. + # Disabled when: auto_mount_cwd=False, host_cwd is not a valid directory, + # or /workspace is already covered by writable_args or a user volume. + auto_mount_disabled = os.getenv("TERMINAL_DOCKER_NO_AUTO_MOUNT", "").lower() in ("1", "true", "yes") + if host_cwd and auto_mount_cwd and not auto_mount_disabled: + host_cwd_abs = os.path.abspath(os.path.expanduser(host_cwd)) + if os.path.isdir(host_cwd_abs): + # Check if /workspace is already being mounted by persistence or user config + workspace_already_mounted = any( + ":/workspace" in arg for arg in writable_args + ) or any( + ":/workspace" in arg for arg in volume_args + ) + if not workspace_already_mounted: + logger.info(f"Auto-mounting host CWD to /workspace: {host_cwd_abs}") + volume_args.extend(["-v", f"{host_cwd_abs}:/workspace"]) + else: + logger.debug(f"Skipping auto-mount: /workspace already mounted") + else: + logger.debug(f"Skipping auto-mount: host_cwd is not a valid directory: {host_cwd}") + logger.info(f"Docker volume_args: {volume_args}") all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args logger.info(f"Docker run_args: {all_run_args}") diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index fc22bf3f6..a9326f3ec 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -481,7 +481,12 @@ def _get_env_config() -> Dict[str, Any]: # container/sandbox, fall back to the backend's own default. This # catches the case where cli.py (or .env) leaked the host's CWD. # SSH is excluded since /home/ paths are valid on remote machines. - cwd = os.getenv("TERMINAL_CWD", default_cwd) + raw_cwd = os.getenv("TERMINAL_CWD", default_cwd) + cwd = raw_cwd + # Capture original host CWD for auto-mounting into containers (fixes #1445). + # Even when the container's working directory falls back to /root, we still + # want to auto-mount the user's host project directory to /workspace. + host_cwd = raw_cwd if raw_cwd and os.path.isdir(raw_cwd) else os.getcwd() if env_type in ("modal", "docker", "singularity", "daytona") and cwd: # Host paths that won't exist inside containers host_prefixes = ("/Users/", "/home/", "C:\\", "C:/") @@ -498,6 +503,7 @@ def _get_env_config() -> Dict[str, Any]: "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image), "daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image), "cwd": cwd, + "host_cwd": host_cwd, # Original host directory for auto-mounting into containers "timeout": _parse_env_var("TERMINAL_TIMEOUT", "180"), "lifetime_seconds": _parse_env_var("TERMINAL_LIFETIME_SECONDS", "300"), # SSH-specific config @@ -525,7 +531,8 @@ def _get_env_config() -> Dict[str, Any]: def _create_environment(env_type: str, image: str, cwd: str, timeout: int, ssh_config: dict = None, container_config: dict = None, local_config: dict = None, - task_id: str = "default"): + task_id: str = "default", + host_cwd: str = None): """ Create an execution environment from mini-swe-agent. @@ -537,6 +544,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, ssh_config: SSH connection config (for env_type="ssh") container_config: Resource config for container backends (cpu, memory, disk, persistent) task_id: Task identifier for environment reuse and snapshot keying + host_cwd: Original host working directory (for auto-mounting into containers) Returns: Environment instance with execute() method @@ -559,6 +567,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, cpu=cpu, memory=memory, disk=disk, persistent_filesystem=persistent, task_id=task_id, volumes=volumes, + host_cwd=host_cwd, ) elif env_type == "singularity": @@ -965,6 +974,7 @@ def terminal_tool( container_config=container_config, local_config=local_config, task_id=effective_task_id, + host_cwd=config.get("host_cwd"), ) except ImportError as e: return json.dumps({ diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 8adec23f1..9a673bc7f 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -520,6 +520,42 @@ This is useful for: Can also be set via environment variable: `TERMINAL_DOCKER_VOLUMES='["/host:/container"]'` (JSON array). +### Docker Auto-Mount Current Directory + +When using the Docker backend, Hermes **automatically mounts your current working directory** to `/workspace` inside the container. This means you can: + +```bash +cd ~/projects/my-app +hermes +# The agent can now see and edit files in ~/projects/my-app via /workspace +``` + +No manual volume configuration needed — just `cd` to your project and run `hermes`. + +**How it works:** +- If you're in `/home/user/projects/my-app`, that directory is mounted to `/workspace` +- The container's working directory is set to `/workspace` +- Files you edit on the host are immediately visible to the agent, and vice versa + +**Disabling auto-mount:** + +If you prefer the old behavior (empty `/workspace` with tmpfs or persistent sandbox), disable auto-mount: + +```bash +export TERMINAL_DOCKER_NO_AUTO_MOUNT=true +``` + +**Precedence:** + +Auto-mount is skipped when: +1. `TERMINAL_DOCKER_NO_AUTO_MOUNT=true` is set +2. You've explicitly configured a volume mount to `/workspace` in `docker_volumes` +3. `container_persistent: true` is set (persistent sandbox mode uses its own `/workspace`) + +:::tip +Auto-mount is ideal for project-based work where you want the agent to operate on your actual files. For isolated sandboxing where the agent shouldn't access your filesystem, set `TERMINAL_DOCKER_NO_AUTO_MOUNT=true`. +::: + ### Persistent Shell By default, each terminal command runs in its own subprocess — working directory, environment variables, and shell variables reset between commands. When **persistent shell** is enabled, a single long-lived bash process is kept alive across `execute()` calls so that state survives between commands. From 780ddd102b1a8c8d1231ad44fd2035ced289d124 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 05:19:43 -0700 Subject: [PATCH 12/29] fix(docker): gate cwd workspace mount behind config Keep Docker sandboxes isolated by default. Add an explicit terminal.docker_mount_cwd_to_workspace opt-in, thread it through terminal/file environment creation, and document the security tradeoff and config.yaml workflow clearly. --- cli-config.yaml.example | 4 +- cli.py | 2 + hermes_cli/config.py | 4 + tests/hermes_cli/test_set_config_value.py | 10 ++ tests/tools/test_docker_environment.py | 100 +++++++++--------- tests/tools/test_modal_sandbox_fixes.py | 60 ++++++++++- tools/environments/docker.py | 94 ++++++++-------- tools/file_tools.py | 1 + tools/terminal_tool.py | 39 ++++--- .../docs/reference/environment-variables.md | 1 + website/docs/user-guide/configuration.md | 48 ++++----- 11 files changed, 218 insertions(+), 145 deletions(-) diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 7bc2c4908..c493a309d 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -76,8 +76,9 @@ model: # - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home) terminal: backend: "local" - cwd: "." # For local backend: "." = current directory. Ignored for remote backends. + cwd: "." # For local backend: "." = current directory. Ignored for remote backends unless a backend documents otherwise. timeout: 180 + docker_mount_cwd_to_workspace: false # SECURITY: off by default. Opt in to mount the launch cwd into Docker /workspace. lifetime_seconds: 300 # sudo_password: "" # Enable sudo commands (pipes via sudo -S) - SECURITY WARNING: plaintext! @@ -107,6 +108,7 @@ terminal: # timeout: 180 # lifetime_seconds: 300 # docker_image: "nikolaik/python-nodejs:python3.11-nodejs20" +# docker_mount_cwd_to_workspace: true # Explicit opt-in: mount your launch cwd into /workspace # ----------------------------------------------------------------------------- # OPTION 4: Singularity/Apptainer container diff --git a/cli.py b/cli.py index 470186572..aa888fd6a 100755 --- a/cli.py +++ b/cli.py @@ -165,6 +165,7 @@ def load_cli_config() -> Dict[str, Any]: "modal_image": "python:3.11", "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20", "docker_volumes": [], # host:container volume mounts for Docker backend + "docker_mount_cwd_to_workspace": False, # explicit opt-in only; default off for sandbox isolation }, "browser": { "inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min @@ -330,6 +331,7 @@ def load_cli_config() -> Dict[str, Any]: "container_disk": "TERMINAL_CONTAINER_DISK", "container_persistent": "TERMINAL_CONTAINER_PERSISTENT", "docker_volumes": "TERMINAL_DOCKER_VOLUMES", + "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "sandbox_dir": "TERMINAL_SANDBOX_DIR", # Persistent shell (non-local backends) "persistent_shell": "TERMINAL_PERSISTENT_SHELL", diff --git a/hermes_cli/config.py b/hermes_cli/config.py index f78131308..dbb37b284 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -118,6 +118,9 @@ DEFAULT_CONFIG = { # Each entry is "host_path:container_path" (standard Docker -v syntax). # Example: ["/home/user/projects:/workspace/projects", "/data:/data"] "docker_volumes": [], + # Explicit opt-in: mount the host cwd into /workspace for Docker sessions. + # Default off because passing host directories into a sandbox weakens isolation. + "docker_mount_cwd_to_workspace": False, # Persistent shell — keep a long-lived bash shell across execute() calls # so cwd/env vars/shell variables survive between commands. # Enabled by default for non-local backends (SSH); local is always opt-in @@ -1407,6 +1410,7 @@ def set_config_value(key: str, value: str): "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE", "terminal.modal_image": "TERMINAL_MODAL_IMAGE", "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE", + "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "terminal.cwd": "TERMINAL_CWD", "terminal.timeout": "TERMINAL_TIMEOUT", "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR", diff --git a/tests/hermes_cli/test_set_config_value.py b/tests/hermes_cli/test_set_config_value.py index 52a9d1a6c..4eae64d6e 100644 --- a/tests/hermes_cli/test_set_config_value.py +++ b/tests/hermes_cli/test_set_config_value.py @@ -115,3 +115,13 @@ class TestConfigYamlRouting: set_config_value("terminal.docker_image", "python:3.12") config = _read_config(_isolated_hermes_home) assert "python:3.12" in config + + def test_terminal_docker_cwd_mount_flag_goes_to_config_and_env(self, _isolated_hermes_home): + set_config_value("terminal.docker_mount_cwd_to_workspace", "true") + config = _read_config(_isolated_hermes_home) + env_content = _read_env(_isolated_hermes_home) + assert "docker_mount_cwd_to_workspace: 'true'" in config or "docker_mount_cwd_to_workspace: true" in config + assert ( + "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE=true" in env_content + or "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE=True" in env_content + ) diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index 3ed297b59..499ebcd43 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -19,6 +19,8 @@ def _make_dummy_env(**kwargs): task_id=kwargs.get("task_id", "test-task"), volumes=kwargs.get("volumes", []), network=kwargs.get("network", True), + host_cwd=kwargs.get("host_cwd"), + auto_mount_cwd=kwargs.get("auto_mount_cwd", False), ) @@ -88,24 +90,16 @@ def test_ensure_docker_available_uses_resolved_executable(monkeypatch): def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path): - """When host_cwd is provided, it should be auto-mounted to /workspace.""" - import os - - # Create a temp directory to simulate user's project directory + """Opt-in docker cwd mounting should bind the host cwd to /workspace.""" project_dir = tmp_path / "my-project" project_dir.mkdir() - # Mock Docker availability def _run_docker_version(*args, **kwargs): return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") - def _run_docker_create(*args, **kwargs): - return subprocess.CompletedProcess(args[0], 1, stdout="", stderr="storage-opt not supported") - monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) - # Mock the inner _Docker class to capture run_args captured_run_args = [] class MockInnerDocker: @@ -120,33 +114,21 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path): MockInnerDocker, ) - # Create environment with host_cwd - env = docker_env.DockerEnvironment( - image="python:3.11", + _make_dummy_env( cwd="/workspace", - timeout=60, - persistent_filesystem=False, # Non-persistent mode uses tmpfs, should be overridden - task_id="test-auto-mount", - volumes=[], host_cwd=str(project_dir), auto_mount_cwd=True, ) - # Check that the host_cwd was added as a volume mount - volume_mount = f"-v {project_dir}:/workspace" run_args_str = " ".join(captured_run_args) - assert f"{project_dir}:/workspace" in run_args_str, f"Expected auto-mount in run_args: {run_args_str}" + assert f"{project_dir}:/workspace" in run_args_str -def test_auto_mount_disabled_via_env(monkeypatch, tmp_path): - """Auto-mount should be disabled when TERMINAL_DOCKER_NO_AUTO_MOUNT is set.""" - import os - +def test_auto_mount_disabled_by_default(monkeypatch, tmp_path): + """Host cwd should not be mounted unless the caller explicitly opts in.""" project_dir = tmp_path / "my-project" project_dir.mkdir() - monkeypatch.setenv("TERMINAL_DOCKER_NO_AUTO_MOUNT", "true") - def _run_docker_version(*args, **kwargs): return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") @@ -167,26 +149,18 @@ def test_auto_mount_disabled_via_env(monkeypatch, tmp_path): MockInnerDocker, ) - env = docker_env.DockerEnvironment( - image="python:3.11", - cwd="/workspace", - timeout=60, - persistent_filesystem=False, - task_id="test-no-auto-mount", - volumes=[], + _make_dummy_env( + cwd="/root", host_cwd=str(project_dir), - auto_mount_cwd=True, + auto_mount_cwd=False, ) - # Check that the host_cwd was NOT added (because env var disabled it) run_args_str = " ".join(captured_run_args) - assert f"{project_dir}:/workspace" not in run_args_str, f"Auto-mount should be disabled: {run_args_str}" + assert f"{project_dir}:/workspace" not in run_args_str def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path): - """Auto-mount should be skipped if /workspace is already mounted via user volumes.""" - import os - + """Explicit user volumes for /workspace should take precedence over cwd mount.""" project_dir = tmp_path / "my-project" project_dir.mkdir() other_dir = tmp_path / "other" @@ -212,22 +186,52 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path MockInnerDocker, ) - # User already configured a volume mount for /workspace - env = docker_env.DockerEnvironment( - image="python:3.11", + _make_dummy_env( cwd="/workspace", - timeout=60, - persistent_filesystem=False, - task_id="test-workspace-exists", - volumes=[f"{other_dir}:/workspace"], # User explicitly mounted something to /workspace host_cwd=str(project_dir), auto_mount_cwd=True, + volumes=[f"{other_dir}:/workspace"], ) - # The user's explicit mount should be present run_args_str = " ".join(captured_run_args) assert f"{other_dir}:/workspace" in run_args_str + assert run_args_str.count(":/workspace") == 1 - # But the auto-mount should NOT add a duplicate - assert run_args_str.count(":/workspace") == 1, f"Should only have one /workspace mount: {run_args_str}" + +def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path): + """Persistent mode should still prefer the configured host cwd at /workspace.""" + project_dir = tmp_path / "my-project" + project_dir.mkdir() + + def _run_docker_version(*args, **kwargs): + return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") + + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) + + captured_run_args = [] + + class MockInnerDocker: + container_id = "mock-container-persistent" + config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() + + def __init__(self, **kwargs): + captured_run_args.extend(kwargs.get("run_args", [])) + + monkeypatch.setattr( + "minisweagent.environments.docker.DockerEnvironment", + MockInnerDocker, + ) + + _make_dummy_env( + cwd="/workspace", + persistent_filesystem=True, + host_cwd=str(project_dir), + auto_mount_cwd=True, + task_id="test-persistent-auto-mount", + ) + + run_args_str = " ".join(captured_run_args) + assert f"{project_dir}:/workspace" in run_args_str + assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py index 6da25216b..49c306231 100644 --- a/tests/tools/test_modal_sandbox_fixes.py +++ b/tests/tools/test_modal_sandbox_fixes.py @@ -91,8 +91,8 @@ class TestCwdHandling: "/home/ paths should be replaced for modal backend." ) - def test_users_path_replaced_for_docker(self): - """TERMINAL_CWD=/Users/... should be replaced with /root for docker.""" + def test_users_path_replaced_for_docker_by_default(self): + """Docker should keep host paths out of the sandbox unless explicitly enabled.""" with patch.dict(os.environ, { "TERMINAL_ENV": "docker", "TERMINAL_CWD": "/Users/someone/projects", @@ -100,8 +100,22 @@ class TestCwdHandling: config = _tt_mod._get_env_config() assert config["cwd"] == "/root", ( f"Expected /root, got {config['cwd']}. " - "/Users/ paths should be replaced for docker backend." + "Host paths should be discarded for docker backend by default." ) + assert config["host_cwd"] is None + assert config["docker_mount_cwd_to_workspace"] is False + + def test_users_path_maps_to_workspace_for_docker_when_enabled(self): + """Docker should map the host cwd into /workspace only when explicitly enabled.""" + with patch.dict(os.environ, { + "TERMINAL_ENV": "docker", + "TERMINAL_CWD": "/Users/someone/projects", + "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true", + }): + config = _tt_mod._get_env_config() + assert config["cwd"] == "/workspace" + assert config["host_cwd"] == "/Users/someone/projects" + assert config["docker_mount_cwd_to_workspace"] is True def test_windows_path_replaced_for_modal(self): """TERMINAL_CWD=C:\\Users\\... should be replaced for modal.""" @@ -119,12 +133,27 @@ class TestCwdHandling: # Remove TERMINAL_CWD so it uses default env = os.environ.copy() env.pop("TERMINAL_CWD", None) + env.pop("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", None) with patch.dict(os.environ, env, clear=True): config = _tt_mod._get_env_config() assert config["cwd"] == "/root", ( f"Backend {backend}: expected /root default, got {config['cwd']}" ) + def test_docker_default_cwd_maps_current_directory_when_enabled(self): + """Docker should use /workspace when cwd mounting is explicitly enabled.""" + with patch("tools.terminal_tool.os.getcwd", return_value="/home/user/project"): + with patch.dict(os.environ, { + "TERMINAL_ENV": "docker", + "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true", + }, clear=False): + env = os.environ.copy() + env.pop("TERMINAL_CWD", None) + with patch.dict(os.environ, env, clear=True): + config = _tt_mod._get_env_config() + assert config["cwd"] == "/workspace" + assert config["host_cwd"] == "/home/user/project" + def test_local_backend_uses_getcwd(self): """Local backend should use os.getcwd(), not /root.""" with patch.dict(os.environ, {"TERMINAL_ENV": "local"}, clear=False): @@ -134,6 +163,31 @@ class TestCwdHandling: config = _tt_mod._get_env_config() assert config["cwd"] == os.getcwd() + def test_create_environment_passes_docker_host_cwd_and_flag(self, monkeypatch): + """Docker host cwd and mount flag should reach DockerEnvironment.""" + captured = {} + sentinel = object() + + def _fake_docker_environment(**kwargs): + captured.update(kwargs) + return sentinel + + monkeypatch.setattr(_tt_mod, "_DockerEnvironment", _fake_docker_environment) + + env = _tt_mod._create_environment( + env_type="docker", + image="python:3.11", + cwd="/workspace", + timeout=60, + container_config={"docker_mount_cwd_to_workspace": True}, + host_cwd="/home/user/project", + ) + + assert env is sentinel + assert captured["cwd"] == "/workspace" + assert captured["host_cwd"] == "/home/user/project" + assert captured["auto_mount_cwd"] is True + def test_ssh_preserves_home_paths(self): """SSH backend should NOT replace /home/ paths (they're valid remotely).""" with patch.dict(os.environ, { diff --git a/tools/environments/docker.py b/tools/environments/docker.py index 1c95f7b34..ec6d8b30c 100644 --- a/tools/environments/docker.py +++ b/tools/environments/docker.py @@ -158,10 +158,6 @@ class DockerEnvironment(BaseEnvironment): Persistence: when enabled, bind mounts preserve /workspace and /root across container restarts. - - Auto-mount: when host_cwd is provided (the user's original working directory), - it is automatically bind-mounted to /workspace unless auto_mount_cwd=False - or the path is already covered by an explicit volume mount. """ def __init__( @@ -177,7 +173,7 @@ class DockerEnvironment(BaseEnvironment): volumes: list = None, network: bool = True, host_cwd: str = None, - auto_mount_cwd: bool = True, + auto_mount_cwd: bool = False, ): if cwd == "~": cwd = "/root" @@ -220,30 +216,9 @@ class DockerEnvironment(BaseEnvironment): # mode uses tmpfs (ephemeral, fast, gone on cleanup). from tools.environments.base import get_sandbox_dir - self._workspace_dir: Optional[str] = None - self._home_dir: Optional[str] = None - if self._persistent: - sandbox = get_sandbox_dir() / "docker" / task_id - self._workspace_dir = str(sandbox / "workspace") - self._home_dir = str(sandbox / "home") - os.makedirs(self._workspace_dir, exist_ok=True) - os.makedirs(self._home_dir, exist_ok=True) - writable_args = [ - "-v", f"{self._workspace_dir}:/workspace", - "-v", f"{self._home_dir}:/root", - ] - else: - writable_args = [ - "--tmpfs", "/workspace:rw,exec,size=10g", - "--tmpfs", "/home:rw,exec,size=1g", - "--tmpfs", "/root:rw,exec,size=1g", - ] - - # All containers get security hardening (capabilities dropped, no privilege - # escalation, PID limits). The container filesystem is writable so agents - # can install packages as needed. # User-configured volume mounts (from config.yaml docker_volumes) volume_args = [] + workspace_explicitly_mounted = False for vol in (volumes or []): if not isinstance(vol, str): logger.warning(f"Docker volume entry is not a string: {vol!r}") @@ -253,31 +228,52 @@ class DockerEnvironment(BaseEnvironment): continue if ":" in vol: volume_args.extend(["-v", vol]) + if ":/workspace" in vol: + workspace_explicitly_mounted = True else: logger.warning(f"Docker volume '{vol}' missing colon, skipping") - # Auto-mount host CWD to /workspace when enabled (fixes #1445). - # This allows users to run `cd my-project && hermes` and have Docker - # automatically mount their project directory into the container. - # Disabled when: auto_mount_cwd=False, host_cwd is not a valid directory, - # or /workspace is already covered by writable_args or a user volume. - auto_mount_disabled = os.getenv("TERMINAL_DOCKER_NO_AUTO_MOUNT", "").lower() in ("1", "true", "yes") - if host_cwd and auto_mount_cwd and not auto_mount_disabled: - host_cwd_abs = os.path.abspath(os.path.expanduser(host_cwd)) - if os.path.isdir(host_cwd_abs): - # Check if /workspace is already being mounted by persistence or user config - workspace_already_mounted = any( - ":/workspace" in arg for arg in writable_args - ) or any( - ":/workspace" in arg for arg in volume_args - ) - if not workspace_already_mounted: - logger.info(f"Auto-mounting host CWD to /workspace: {host_cwd_abs}") - volume_args.extend(["-v", f"{host_cwd_abs}:/workspace"]) - else: - logger.debug(f"Skipping auto-mount: /workspace already mounted") - else: - logger.debug(f"Skipping auto-mount: host_cwd is not a valid directory: {host_cwd}") + host_cwd_abs = os.path.abspath(os.path.expanduser(host_cwd)) if host_cwd else "" + bind_host_cwd = ( + auto_mount_cwd + and bool(host_cwd_abs) + and os.path.isdir(host_cwd_abs) + and not workspace_explicitly_mounted + ) + if auto_mount_cwd and host_cwd and not os.path.isdir(host_cwd_abs): + logger.debug(f"Skipping docker cwd mount: host_cwd is not a valid directory: {host_cwd}") + + self._workspace_dir: Optional[str] = None + self._home_dir: Optional[str] = None + writable_args = [] + if self._persistent: + sandbox = get_sandbox_dir() / "docker" / task_id + self._home_dir = str(sandbox / "home") + os.makedirs(self._home_dir, exist_ok=True) + writable_args.extend([ + "-v", f"{self._home_dir}:/root", + ]) + if not bind_host_cwd and not workspace_explicitly_mounted: + self._workspace_dir = str(sandbox / "workspace") + os.makedirs(self._workspace_dir, exist_ok=True) + writable_args.extend([ + "-v", f"{self._workspace_dir}:/workspace", + ]) + else: + if not bind_host_cwd and not workspace_explicitly_mounted: + writable_args.extend([ + "--tmpfs", "/workspace:rw,exec,size=10g", + ]) + writable_args.extend([ + "--tmpfs", "/home:rw,exec,size=1g", + "--tmpfs", "/root:rw,exec,size=1g", + ]) + + if bind_host_cwd: + logger.info(f"Mounting configured host cwd to /workspace: {host_cwd_abs}") + volume_args = ["-v", f"{host_cwd_abs}:/workspace", *volume_args] + elif workspace_explicitly_mounted: + logger.debug("Skipping docker cwd mount: /workspace already mounted by user config") logger.info(f"Docker volume_args: {volume_args}") all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args diff --git a/tools/file_tools.py b/tools/file_tools.py index 98ea15bd4..ddcfcd567 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -140,6 +140,7 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations: container_config=container_config, local_config=local_config, task_id=task_id, + host_cwd=config.get("host_cwd"), ) with _env_lock: diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index a9326f3ec..49a82e249 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -466,6 +466,8 @@ def _get_env_config() -> Dict[str, Any]: default_image = "nikolaik/python-nodejs:python3.11-nodejs20" env_type = os.getenv("TERMINAL_ENV", "local") + mount_docker_cwd = os.getenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "false").lower() in ("true", "1", "yes") + # Default cwd: local uses the host's current directory, everything # else starts in the user's home (~ resolves to whatever account # is running inside the container/remote). @@ -475,21 +477,25 @@ def _get_env_config() -> Dict[str, Any]: default_cwd = "~" else: default_cwd = "/root" - + # Read TERMINAL_CWD but sanity-check it for container backends. - # If the CWD looks like a host-local path that can't exist inside a - # container/sandbox, fall back to the backend's own default. This - # catches the case where cli.py (or .env) leaked the host's CWD. - # SSH is excluded since /home/ paths are valid on remote machines. - raw_cwd = os.getenv("TERMINAL_CWD", default_cwd) - cwd = raw_cwd - # Capture original host CWD for auto-mounting into containers (fixes #1445). - # Even when the container's working directory falls back to /root, we still - # want to auto-mount the user's host project directory to /workspace. - host_cwd = raw_cwd if raw_cwd and os.path.isdir(raw_cwd) else os.getcwd() - if env_type in ("modal", "docker", "singularity", "daytona") and cwd: + # If Docker cwd passthrough is explicitly enabled, remap the host path to + # /workspace and track the original host path separately. Otherwise keep the + # normal sandbox behavior and discard host paths. + cwd = os.getenv("TERMINAL_CWD", default_cwd) + host_cwd = None + host_prefixes = ("/Users/", "/home/", "C:\\", "C:/") + if env_type == "docker" and mount_docker_cwd: + docker_cwd_source = os.getenv("TERMINAL_CWD") or os.getcwd() + candidate = os.path.abspath(os.path.expanduser(docker_cwd_source)) + if ( + any(candidate.startswith(p) for p in host_prefixes) + or (os.path.isabs(candidate) and os.path.isdir(candidate) and not candidate.startswith(("/workspace", "/root"))) + ): + host_cwd = candidate + cwd = "/workspace" + elif env_type in ("modal", "docker", "singularity", "daytona") and cwd: # Host paths that won't exist inside containers - host_prefixes = ("/Users/", "/home/", "C:\\", "C:/") if any(cwd.startswith(p) for p in host_prefixes) and cwd != default_cwd: logger.info("Ignoring TERMINAL_CWD=%r for %s backend " "(host path won't exist in sandbox). Using %r instead.", @@ -503,7 +509,8 @@ def _get_env_config() -> Dict[str, Any]: "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image), "daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image), "cwd": cwd, - "host_cwd": host_cwd, # Original host directory for auto-mounting into containers + "host_cwd": host_cwd, + "docker_mount_cwd_to_workspace": mount_docker_cwd, "timeout": _parse_env_var("TERMINAL_TIMEOUT", "180"), "lifetime_seconds": _parse_env_var("TERMINAL_LIFETIME_SECONDS", "300"), # SSH-specific config @@ -544,7 +551,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, ssh_config: SSH connection config (for env_type="ssh") container_config: Resource config for container backends (cpu, memory, disk, persistent) task_id: Task identifier for environment reuse and snapshot keying - host_cwd: Original host working directory (for auto-mounting into containers) + host_cwd: Optional host working directory to bind into Docker when explicitly enabled Returns: Environment instance with execute() method @@ -568,6 +575,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, persistent_filesystem=persistent, task_id=task_id, volumes=volumes, host_cwd=host_cwd, + auto_mount_cwd=cc.get("docker_mount_cwd_to_workspace", False), ) elif env_type == "singularity": @@ -957,6 +965,7 @@ def terminal_tool( "container_disk": config.get("container_disk", 51200), "container_persistent": config.get("container_persistent", True), "docker_volumes": config.get("docker_volumes", []), + "docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False), } local_config = None diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 2b462e186..daaad87bc 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -79,6 +79,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `TERMINAL_ENV` | Backend: `local`, `docker`, `ssh`, `singularity`, `modal`, `daytona` | | `TERMINAL_DOCKER_IMAGE` | Docker image (default: `python:3.11`) | | `TERMINAL_DOCKER_VOLUMES` | Additional Docker volume mounts (comma-separated `host:container` pairs) | +| `TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE` | Advanced opt-in: mount the launch cwd into Docker `/workspace` (`true`/`false`, default: `false`) | | `TERMINAL_SINGULARITY_IMAGE` | Singularity image or `.sif` path | | `TERMINAL_MODAL_IMAGE` | Modal container image | | `TERMINAL_DAYTONA_IMAGE` | Daytona sandbox image | diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 9a673bc7f..ab5e47ef6 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -453,7 +453,8 @@ terminal: # Docker-specific settings docker_image: "nikolaik/python-nodejs:python3.11-nodejs20" - docker_volumes: # Share host directories with the container + docker_mount_cwd_to_workspace: false # SECURITY: off by default. Opt in to mount the launch cwd into /workspace. + docker_volumes: # Additional explicit host mounts - "/home/user/projects:/workspace/projects" - "/home/user/data:/data:ro" # :ro for read-only @@ -520,41 +521,30 @@ This is useful for: Can also be set via environment variable: `TERMINAL_DOCKER_VOLUMES='["/host:/container"]'` (JSON array). -### Docker Auto-Mount Current Directory +### Optional: Mount the Launch Directory into `/workspace` -When using the Docker backend, Hermes **automatically mounts your current working directory** to `/workspace` inside the container. This means you can: +Docker sandboxes stay isolated by default. Hermes does **not** pass your current host working directory into the container unless you explicitly opt in. -```bash -cd ~/projects/my-app -hermes -# The agent can now see and edit files in ~/projects/my-app via /workspace +Enable it in `config.yaml`: + +```yaml +terminal: + backend: docker + docker_mount_cwd_to_workspace: true ``` -No manual volume configuration needed — just `cd` to your project and run `hermes`. +When enabled: +- if you launch Hermes from `~/projects/my-app`, that host directory is bind-mounted to `/workspace` +- the Docker backend starts in `/workspace` +- file tools and terminal commands both see the same mounted project -**How it works:** -- If you're in `/home/user/projects/my-app`, that directory is mounted to `/workspace` -- The container's working directory is set to `/workspace` -- Files you edit on the host are immediately visible to the agent, and vice versa +When disabled, `/workspace` stays sandbox-owned unless you explicitly mount something via `docker_volumes`. -**Disabling auto-mount:** +Security tradeoff: +- `false` preserves the sandbox boundary +- `true` gives the sandbox direct access to the directory you launched Hermes from -If you prefer the old behavior (empty `/workspace` with tmpfs or persistent sandbox), disable auto-mount: - -```bash -export TERMINAL_DOCKER_NO_AUTO_MOUNT=true -``` - -**Precedence:** - -Auto-mount is skipped when: -1. `TERMINAL_DOCKER_NO_AUTO_MOUNT=true` is set -2. You've explicitly configured a volume mount to `/workspace` in `docker_volumes` -3. `container_persistent: true` is set (persistent sandbox mode uses its own `/workspace`) - -:::tip -Auto-mount is ideal for project-based work where you want the agent to operate on your actual files. For isolated sandboxing where the agent shouldn't access your filesystem, set `TERMINAL_DOCKER_NO_AUTO_MOUNT=true`. -::: +Use the opt-in only when you intentionally want the container to work on live host files. ### Persistent Shell From dfe72b9d97287d00810b2d56a3fef097b993d151 Mon Sep 17 00:00:00 2001 From: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com> Date: Mon, 16 Mar 2026 15:22:00 +0300 Subject: [PATCH 13/29] fix(logging): improve error logging in session search tool (#1533) --- tools/session_search_tool.py | 38 +++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index 8a8c13006..13356ec9f 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -47,9 +47,9 @@ def _format_timestamp(ts: Union[int, float, str, None]) -> str: return ts except (ValueError, OSError, OverflowError) as e: # Log specific errors for debugging while gracefully handling edge cases - logging.debug("Failed to format timestamp %s: %s", ts, e) + logging.debug("Failed to format timestamp %s: %s", ts, e, exc_info=True) except Exception as e: - logging.debug("Unexpected error formatting timestamp %s: %s", ts, e) + logging.debug("Unexpected error formatting timestamp %s: %s", ts, e, exc_info=True) return str(ts) @@ -170,7 +170,12 @@ async def _summarize_session( if attempt < max_retries - 1: await asyncio.sleep(1 * (attempt + 1)) else: - logging.warning(f"Session summarization failed after {max_retries} attempts: {e}") + logging.warning( + "Session summarization failed after %d attempts: %s", + max_retries, + e, + exc_info=True, + ) return None @@ -237,7 +242,12 @@ def session_search( else: break except Exception as e: - logging.debug("Error resolving parent for session %s: %s", sid, e) + logging.debug( + "Error resolving parent for session %s: %s", + sid, + e, + exc_info=True, + ) break return sid @@ -270,7 +280,12 @@ def session_search( conversation_text = _truncate_around_matches(conversation_text, query) tasks.append((session_id, match_info, conversation_text, session_meta)) except Exception as e: - logging.warning(f"Failed to prepare session {session_id}: {e}") + logging.warning( + "Failed to prepare session %s: %s", + session_id, + e, + exc_info=True, + ) # Summarize all sessions in parallel async def _summarize_all() -> List[Union[str, Exception]]: @@ -289,7 +304,10 @@ def session_search( # No event loop running, create a new one results = asyncio.run(_summarize_all()) except concurrent.futures.TimeoutError: - logging.warning("Session summarization timed out after 60 seconds") + logging.warning( + "Session summarization timed out after 60 seconds", + exc_info=True, + ) return json.dumps({ "success": False, "error": "Session summarization timed out. Try a more specific query or reduce the limit.", @@ -298,7 +316,12 @@ def session_search( summaries = [] for (session_id, match_info, _, _), result in zip(tasks, results): if isinstance(result, Exception): - logging.warning(f"Failed to summarize session {session_id}: {result}") + logging.warning( + "Failed to summarize session %s: %s", + session_id, + result, + exc_info=True, + ) continue if result: summaries.append({ @@ -318,6 +341,7 @@ def session_search( }, ensure_ascii=False) except Exception as e: + logging.error("Session search failed: %s", e, exc_info=True) return json.dumps({"success": False, "error": f"Search failed: {str(e)}"}, ensure_ascii=False) From 25b0ae797918a9d724167e570deb397208c01b8c Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 05:23:32 -0700 Subject: [PATCH 14/29] fix(telegram): retry on transient TLS failures during connect and send Add exponential-backoff retry (3 attempts) around initialize() to handle transient TLS resets during gateway startup. Also catches TimedOut and OSError in addition to NetworkError. Add exponential-backoff retry (3 attempts) around send_message() for NetworkError during message delivery, wrapping the existing Markdown fallback logic. Both imports are guarded with try/except ImportError for test environments where telegram is mocked. Based on PR #1527 by cmd8. Closes #1526. --- gateway/platforms/telegram.py | 87 ++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 27 deletions(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 08750faed..b10cb7663 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -202,8 +202,26 @@ class TelegramAdapter(BasePlatformAdapter): self._handle_media_message )) - # Start polling in background - await self._app.initialize() + # Start polling — retry initialize() for transient TLS resets + try: + from telegram.error import NetworkError, TimedOut + except ImportError: + NetworkError = TimedOut = OSError # type: ignore[misc,assignment] + _max_connect = 3 + for _attempt in range(_max_connect): + try: + await self._app.initialize() + break + except (NetworkError, TimedOut, OSError) as init_err: + if _attempt < _max_connect - 1: + wait = 2 ** _attempt + logger.warning( + "[%s] Connect attempt %d/%d failed: %s — retrying in %ds", + self.name, _attempt + 1, _max_connect, init_err, wait, + ) + await asyncio.sleep(wait) + else: + raise await self._app.start() loop = asyncio.get_running_loop() @@ -334,32 +352,47 @@ class TelegramAdapter(BasePlatformAdapter): message_ids = [] thread_id = metadata.get("thread_id") if metadata else None + try: + from telegram.error import NetworkError as _NetErr + except ImportError: + _NetErr = OSError # type: ignore[misc,assignment] + for i, chunk in enumerate(chunks): - # Try Markdown first, fall back to plain text if it fails - try: - msg = await self._bot.send_message( - chat_id=int(chat_id), - text=chunk, - parse_mode=ParseMode.MARKDOWN_V2, - reply_to_message_id=int(reply_to) if reply_to and i == 0 else None, - message_thread_id=int(thread_id) if thread_id else None, - ) - except Exception as md_error: - # Markdown parsing failed, try plain text - if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower(): - logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error) - # Strip MDV2 escape backslashes so the user doesn't - # see raw backslashes littered through the message. - plain_chunk = _strip_mdv2(chunk) - msg = await self._bot.send_message( - chat_id=int(chat_id), - text=plain_chunk, - parse_mode=None, # Plain text - reply_to_message_id=int(reply_to) if reply_to and i == 0 else None, - message_thread_id=int(thread_id) if thread_id else None, - ) - else: - raise # Re-raise if not a parse error + msg = None + for _send_attempt in range(3): + try: + # Try Markdown first, fall back to plain text if it fails + try: + msg = await self._bot.send_message( + chat_id=int(chat_id), + text=chunk, + parse_mode=ParseMode.MARKDOWN_V2, + reply_to_message_id=int(reply_to) if reply_to and i == 0 else None, + message_thread_id=int(thread_id) if thread_id else None, + ) + except Exception as md_error: + # Markdown parsing failed, try plain text + if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower(): + logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error) + plain_chunk = _strip_mdv2(chunk) + msg = await self._bot.send_message( + chat_id=int(chat_id), + text=plain_chunk, + parse_mode=None, + reply_to_message_id=int(reply_to) if reply_to and i == 0 else None, + message_thread_id=int(thread_id) if thread_id else None, + ) + else: + raise + break # success + except _NetErr as send_err: + if _send_attempt < 2: + wait = 2 ** _send_attempt + logger.warning("[%s] Network error on send (attempt %d/3), retrying in %ds: %s", + self.name, _send_attempt + 1, wait, send_err) + await asyncio.sleep(wait) + else: + raise message_ids.append(str(msg.message_id)) return SendResult( From 17e87478d230134b1d290321f589d87e5bcb248c Mon Sep 17 00:00:00 2001 From: JP Lew Date: Mon, 16 Mar 2026 17:56:31 +0530 Subject: [PATCH 15/29] fix(gateway): restart on retryable startup failures (#1517) --- gateway/platforms/telegram.py | 2 + gateway/run.py | 28 +++++- tests/gateway/test_runner_startup_failures.py | 89 +++++++++++++++++++ tests/gateway/test_telegram_conflict.py | 33 +++++++ 4 files changed, 149 insertions(+), 3 deletions(-) create mode 100644 tests/gateway/test_runner_startup_failures.py diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 08750faed..7aa7b5278 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -265,6 +265,8 @@ class TelegramAdapter(BasePlatformAdapter): release_scoped_lock("telegram-bot-token", self._token_lock_identity) except Exception: pass + message = f"Telegram startup failed: {e}" + self._set_fatal_error("telegram_connect_error", message, retryable=True) logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True) return False diff --git a/gateway/run.py b/gateway/run.py index ec293693e..81d00f73b 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -831,12 +831,15 @@ class GatewayRunner: logger.warning("Process checkpoint recovery: %s", e) connected_count = 0 + enabled_platform_count = 0 startup_nonretryable_errors: list[str] = [] + startup_retryable_errors: list[str] = [] # Initialize and connect each configured platform for platform, platform_config in self.config.platforms.items(): if not platform_config.enabled: continue + enabled_platform_count += 1 adapter = self._create_adapter(platform, platform_config) if not adapter: @@ -858,12 +861,22 @@ class GatewayRunner: logger.info("✓ %s connected", platform.value) else: logger.warning("✗ %s failed to connect", platform.value) - if adapter.has_fatal_error and not adapter.fatal_error_retryable: - startup_nonretryable_errors.append( + if adapter.has_fatal_error: + target = ( + startup_retryable_errors + if adapter.fatal_error_retryable + else startup_nonretryable_errors + ) + target.append( f"{platform.value}: {adapter.fatal_error_message}" ) + else: + startup_retryable_errors.append( + f"{platform.value}: failed to connect" + ) except Exception as e: logger.error("✗ %s error: %s", platform.value, e) + startup_retryable_errors.append(f"{platform.value}: {e}") if connected_count == 0: if startup_nonretryable_errors: @@ -876,7 +889,16 @@ class GatewayRunner: pass self._request_clean_exit(reason) return True - logger.warning("No messaging platforms connected.") + if enabled_platform_count > 0: + reason = "; ".join(startup_retryable_errors) or "all configured messaging platforms failed to connect" + logger.error("Gateway failed to connect any configured messaging platform: %s", reason) + try: + from gateway.status import write_runtime_status + write_runtime_status(gateway_state="startup_failed", exit_reason=reason) + except Exception: + pass + return False + logger.warning("No messaging platforms enabled.") logger.info("Gateway will continue running for cron job execution.") # Update delivery router with adapters diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py new file mode 100644 index 000000000..315f26568 --- /dev/null +++ b/tests/gateway/test_runner_startup_failures.py @@ -0,0 +1,89 @@ +import pytest + +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import BasePlatformAdapter +from gateway.run import GatewayRunner +from gateway.status import read_runtime_status + + +class _RetryableFailureAdapter(BasePlatformAdapter): + def __init__(self): + super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM) + + async def connect(self) -> bool: + self._set_fatal_error( + "telegram_connect_error", + "Telegram startup failed: temporary DNS resolution failure.", + retryable=True, + ) + return False + + async def disconnect(self) -> None: + self._mark_disconnected() + + async def send(self, chat_id, content, reply_to=None, metadata=None): + raise NotImplementedError + + async def get_chat_info(self, chat_id): + return {"id": chat_id} + + +class _DisabledAdapter(BasePlatformAdapter): + def __init__(self): + super().__init__(PlatformConfig(enabled=False, token="***"), Platform.TELEGRAM) + + async def connect(self) -> bool: + raise AssertionError("connect should not be called for disabled platforms") + + async def disconnect(self) -> None: + self._mark_disconnected() + + async def send(self, chat_id, content, reply_to=None, metadata=None): + raise NotImplementedError + + async def get_chat_info(self, chat_id): + return {"id": chat_id} + + +@pytest.mark.asyncio +async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch, tmp_path): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + config = GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="***") + }, + sessions_dir=tmp_path / "sessions", + ) + runner = GatewayRunner(config) + + monkeypatch.setattr(runner, "_create_adapter", lambda platform, platform_config: _RetryableFailureAdapter()) + + ok = await runner.start() + + assert ok is False + assert runner.should_exit_cleanly is False + state = read_runtime_status() + assert state["gateway_state"] == "startup_failed" + assert "temporary DNS resolution failure" in state["exit_reason"] + assert state["platforms"]["telegram"]["state"] == "fatal" + assert state["platforms"]["telegram"]["error_code"] == "telegram_connect_error" + + +@pytest.mark.asyncio +async def test_runner_allows_cron_only_mode_when_no_platforms_are_enabled(monkeypatch, tmp_path): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + config = GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=False, token="***") + }, + sessions_dir=tmp_path / "sessions", + ) + runner = GatewayRunner(config) + + ok = await runner.start() + + assert ok is True + assert runner.should_exit_cleanly is False + assert runner.adapters == {} + state = read_runtime_status() + assert state["gateway_state"] == "running" diff --git a/tests/gateway/test_telegram_conflict.py b/tests/gateway/test_telegram_conflict.py index 86dc509d5..440aa99d8 100644 --- a/tests/gateway/test_telegram_conflict.py +++ b/tests/gateway/test_telegram_conflict.py @@ -100,6 +100,39 @@ async def test_polling_conflict_stops_polling_and_notifies_handler(monkeypatch): fatal_handler.assert_awaited_once() +@pytest.mark.asyncio +async def test_connect_marks_retryable_fatal_error_for_startup_network_failure(monkeypatch): + adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***")) + + monkeypatch.setattr( + "gateway.status.acquire_scoped_lock", + lambda scope, identity, metadata=None: (True, None), + ) + monkeypatch.setattr( + "gateway.status.release_scoped_lock", + lambda scope, identity: None, + ) + + builder = MagicMock() + builder.token.return_value = builder + app = SimpleNamespace( + bot=SimpleNamespace(), + updater=SimpleNamespace(), + add_handler=MagicMock(), + initialize=AsyncMock(side_effect=RuntimeError("Temporary failure in name resolution")), + start=AsyncMock(), + ) + builder.build.return_value = app + monkeypatch.setattr("gateway.platforms.telegram.Application", SimpleNamespace(builder=MagicMock(return_value=builder))) + + ok = await adapter.connect() + + assert ok is False + assert adapter.fatal_error_code == "telegram_connect_error" + assert adapter.fatal_error_retryable is True + assert "Temporary failure in name resolution" in adapter.fatal_error_message + + @pytest.mark.asyncio async def test_disconnect_skips_inactive_updater_and_app(monkeypatch): adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***")) From 8758e2e8d704424a7c7ce2a4de56da34a7ab8424 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 05:27:54 -0700 Subject: [PATCH 16/29] feat(email): add skip_attachments option via config.yaml * feat(email): add skip_attachments option via config.yaml Adds a config.yaml-driven option to skip email attachments in the gateway email adapter. Useful for malware protection and bandwidth savings. Configure in config.yaml: platforms: email: skip_attachments: true Based on PR #1521 by @an420eth, changed from env var to config.yaml (via PlatformConfig.extra) to match the project's config-first pattern. * docs: document skip_attachments option for email adapter --- gateway/platforms/email.py | 22 +++++++++++++++++++--- website/docs/user-guide/messaging/email.md | 12 ++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/gateway/platforms/email.py b/gateway/platforms/email.py index 36d34f98e..d37348c99 100644 --- a/gateway/platforms/email.py +++ b/gateway/platforms/email.py @@ -135,14 +135,23 @@ def _extract_email_address(raw: str) -> str: return raw.strip().lower() -def _extract_attachments(msg: email_lib.message.Message) -> List[Dict[str, Any]]: - """Extract attachment metadata and cache files locally.""" +def _extract_attachments( + msg: email_lib.message.Message, + skip_attachments: bool = False, +) -> List[Dict[str, Any]]: + """Extract attachment metadata and cache files locally. + + When *skip_attachments* is True, all attachment/inline parts are ignored + (useful for malware protection or bandwidth savings). + """ attachments = [] if not msg.is_multipart(): return attachments for part in msg.walk(): disposition = str(part.get("Content-Disposition", "")) + if skip_attachments and ("attachment" in disposition or "inline" in disposition): + continue if "attachment" not in disposition and "inline" not in disposition: continue # Skip text/plain and text/html body parts @@ -196,6 +205,13 @@ class EmailAdapter(BasePlatformAdapter): self._smtp_port = int(os.getenv("EMAIL_SMTP_PORT", "587")) self._poll_interval = int(os.getenv("EMAIL_POLL_INTERVAL", "15")) + # Skip attachments — configured via config.yaml: + # platforms: + # email: + # skip_attachments: true + extra = config.extra or {} + self._skip_attachments = extra.get("skip_attachments", False) + # Track message IDs we've already processed to avoid duplicates self._seen_uids: set = set() self._poll_task: Optional[asyncio.Task] = None @@ -306,7 +322,7 @@ class EmailAdapter(BasePlatformAdapter): message_id = msg.get("Message-ID", "") in_reply_to = msg.get("In-Reply-To", "") body = _extract_text_body(msg) - attachments = _extract_attachments(msg) + attachments = _extract_attachments(msg, skip_attachments=self._skip_attachments) results.append({ "uid": uid, diff --git a/website/docs/user-guide/messaging/email.md b/website/docs/user-guide/messaging/email.md index 8f515e851..c302532b1 100644 --- a/website/docs/user-guide/messaging/email.md +++ b/website/docs/user-guide/messaging/email.md @@ -118,6 +118,18 @@ Replies are sent via SMTP with proper email threading: The agent can send file attachments in replies. Include `MEDIA:/path/to/file` in the response and the file is attached to the outgoing email. +### Skipping Attachments + +To ignore all incoming attachments (for malware protection or bandwidth savings), add to your `config.yaml`: + +```yaml +platforms: + email: + skip_attachments: true +``` + +When enabled, attachment and inline parts are skipped before payload decoding. The email body text is still processed normally. + --- ## Access Control From 352980311b3ac224cacaec89ecdfd0b5cf43d722 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 05:29:25 -0700 Subject: [PATCH 17/29] feat: permissive block_anchor thresholds and unicode normalization (#1539) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Salvaged from PR #1528 by an420eth. Closes #517. Improves _strategy_block_anchor in fuzzy_match.py: - Add unicode normalization (smart quotes, em/en-dashes, ellipsis, non-breaking spaces → ASCII) so LLM-produced unicode artifacts don't break anchor line matching - Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for multiple candidates — if first/last lines match exactly, the block is almost certainly correct - Use original (non-normalized) content for offset calculation to preserve correct character positions Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space anchors, very-low-similarity unique matches), zero regressions on all 9 existing fuzzy match tests. Co-authored-by: an420eth --- tools/fuzzy_match.py | 75 +++++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 26 deletions(-) diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py index bc8e34403..f53451c63 100644 --- a/tools/fuzzy_match.py +++ b/tools/fuzzy_match.py @@ -32,6 +32,19 @@ import re from typing import Tuple, Optional, List, Callable from difflib import SequenceMatcher +UNICODE_MAP = { + "\u201c": '"', "\u201d": '"', # smart double quotes + "\u2018": "'", "\u2019": "'", # smart single quotes + "\u2014": "--", "\u2013": "-", # em/en dashes + "\u2026": "...", "\u00a0": " ", # ellipsis and non-breaking space +} + +def _unicode_normalize(text: str) -> str: + """Normalizes Unicode characters to their standard ASCII equivalents.""" + for char, repl in UNICODE_MAP.items(): + text = text.replace(char, repl) + return text + def fuzzy_find_and_replace(content: str, old_string: str, new_string: str, replace_all: bool = False) -> Tuple[str, int, Optional[str]]: @@ -253,42 +266,52 @@ def _strategy_trimmed_boundary(content: str, pattern: str) -> List[Tuple[int, in def _strategy_block_anchor(content: str, pattern: str) -> List[Tuple[int, int]]: """ Strategy 7: Match by anchoring on first and last lines. - - If first and last lines match exactly, accept middle with 70% similarity. + Adjusted with permissive thresholds and unicode normalization. """ - pattern_lines = pattern.split('\n') + # Normalize both strings for comparison while keeping original content for offset calculation + norm_pattern = _unicode_normalize(pattern) + norm_content = _unicode_normalize(content) + + pattern_lines = norm_pattern.split('\n') if len(pattern_lines) < 2: - return [] # Need at least 2 lines for anchoring + return [] first_line = pattern_lines[0].strip() last_line = pattern_lines[-1].strip() - content_lines = content.split('\n') - matches = [] + # Use normalized lines for matching logic + norm_content_lines = norm_content.split('\n') + # BUT use original lines for calculating start/end positions to prevent index shift + orig_content_lines = content.split('\n') pattern_line_count = len(pattern_lines) - for i in range(len(content_lines) - pattern_line_count + 1): - # Check if first and last lines match - if (content_lines[i].strip() == first_line and - content_lines[i + pattern_line_count - 1].strip() == last_line): + potential_matches = [] + for i in range(len(norm_content_lines) - pattern_line_count + 1): + if (norm_content_lines[i].strip() == first_line and + norm_content_lines[i + pattern_line_count - 1].strip() == last_line): + potential_matches.append(i) - # Check middle similarity - if pattern_line_count <= 2: - # Only first and last, they match - similarity = 1.0 - else: - content_middle = '\n'.join(content_lines[i+1:i+pattern_line_count-1]) - pattern_middle = '\n'.join(pattern_lines[1:-1]) - similarity = SequenceMatcher(None, content_middle, pattern_middle).ratio() - - if similarity >= 0.70: - # Calculate positions - start_pos = sum(len(line) + 1 for line in content_lines[:i]) - end_pos = sum(len(line) + 1 for line in content_lines[:i + pattern_line_count]) - 1 - if end_pos >= len(content): - end_pos = len(content) - matches.append((start_pos, end_pos)) + matches = [] + candidate_count = len(potential_matches) + + # Thresholding logic: 0.10 for unique matches (max flexibility), 0.30 for multiple candidates + threshold = 0.10 if candidate_count == 1 else 0.30 + + for i in potential_matches: + if pattern_line_count <= 2: + similarity = 1.0 + else: + # Compare normalized middle sections + content_middle = '\n'.join(norm_content_lines[i+1:i+pattern_line_count-1]) + pattern_middle = '\n'.join(pattern_lines[1:-1]) + similarity = SequenceMatcher(None, content_middle, pattern_middle).ratio() + + if similarity >= threshold: + # Calculate positions using ORIGINAL lines to ensure correct character offsets in the file + start_pos = sum(len(line) + 1 for line in orig_content_lines[:i]) + end_pos = sum(len(line) + 1 for line in orig_content_lines[:i + pattern_line_count]) - 1 + matches.append((start_pos, min(end_pos, len(content)))) return matches From b72f522e30fbdc75e6bb50714e9063d00388672c Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 05:40:05 -0700 Subject: [PATCH 18/29] test: fake minisweagent for docker cwd mount regressions Make the new Docker cwd-mount tests pass in CI environments that do not have the minisweagent package installed by injecting a fake module instead of monkeypatching an import path that may not exist. --- tests/tools/test_docker_environment.py | 72 +++++++++----------------- 1 file changed, 24 insertions(+), 48 deletions(-) diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index 499ebcd43..03b32d207 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -1,11 +1,31 @@ import logging import subprocess +import sys +import types import pytest from tools.environments import docker as docker_env +def _install_fake_minisweagent(monkeypatch, captured_run_args): + class MockInnerDocker: + container_id = "fake-container" + config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() + + def __init__(self, **kwargs): + captured_run_args.extend(kwargs.get("run_args", [])) + + minisweagent_mod = types.ModuleType("minisweagent") + environments_mod = types.ModuleType("minisweagent.environments") + docker_mod = types.ModuleType("minisweagent.environments.docker") + docker_mod.DockerEnvironment = MockInnerDocker + + monkeypatch.setitem(sys.modules, "minisweagent", minisweagent_mod) + monkeypatch.setitem(sys.modules, "minisweagent.environments", environments_mod) + monkeypatch.setitem(sys.modules, "minisweagent.environments.docker", docker_mod) + + def _make_dummy_env(**kwargs): """Helper to construct DockerEnvironment with minimal required args.""" return docker_env.DockerEnvironment( @@ -101,18 +121,7 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path): monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) captured_run_args = [] - - class MockInnerDocker: - container_id = "mock-container-123" - config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() - - def __init__(self, **kwargs): - captured_run_args.extend(kwargs.get("run_args", [])) - - monkeypatch.setattr( - "minisweagent.environments.docker.DockerEnvironment", - MockInnerDocker, - ) + _install_fake_minisweagent(monkeypatch, captured_run_args) _make_dummy_env( cwd="/workspace", @@ -136,18 +145,7 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path): monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) captured_run_args = [] - - class MockInnerDocker: - container_id = "mock-container-456" - config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() - - def __init__(self, **kwargs): - captured_run_args.extend(kwargs.get("run_args", [])) - - monkeypatch.setattr( - "minisweagent.environments.docker.DockerEnvironment", - MockInnerDocker, - ) + _install_fake_minisweagent(monkeypatch, captured_run_args) _make_dummy_env( cwd="/root", @@ -173,18 +171,7 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) captured_run_args = [] - - class MockInnerDocker: - container_id = "mock-container-789" - config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() - - def __init__(self, **kwargs): - captured_run_args.extend(kwargs.get("run_args", [])) - - monkeypatch.setattr( - "minisweagent.environments.docker.DockerEnvironment", - MockInnerDocker, - ) + _install_fake_minisweagent(monkeypatch, captured_run_args) _make_dummy_env( cwd="/workspace", @@ -210,18 +197,7 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path): monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) captured_run_args = [] - - class MockInnerDocker: - container_id = "mock-container-persistent" - config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() - - def __init__(self, **kwargs): - captured_run_args.extend(kwargs.get("run_args", [])) - - monkeypatch.setattr( - "minisweagent.environments.docker.DockerEnvironment", - MockInnerDocker, - ) + _install_fake_minisweagent(monkeypatch, captured_run_args) _make_dummy_env( cwd="/workspace", From c51e7b4af7844f09ebe6cb866332a579cc781562 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 05:48:45 -0700 Subject: [PATCH 19/29] feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add privacy.redact_pii config option (boolean, default false). When enabled, the gateway redacts personally identifiable information from the system prompt before sending it to the LLM provider: - Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_ - User IDs → hashed to user_ - Chat IDs → numeric portion hashed, platform prefix preserved - Home channel IDs → hashed - Names/usernames → NOT affected (user-chosen, publicly visible) Hashes are deterministic (same user → same hash) so the model can still distinguish users in group chats. Routing and delivery use the original values internally — redaction only affects LLM context. Inspired by OpenClaw PR #47959. --- cli-config.yaml.example | 11 ++ gateway/run.py | 11 +- gateway/session.py | 80 +++++++++++++- hermes_cli/config.py | 5 + tests/gateway/test_pii_redaction.py | 132 +++++++++++++++++++++++ website/docs/user-guide/configuration.md | 19 ++++ 6 files changed, 252 insertions(+), 6 deletions(-) create mode 100644 tests/gateway/test_pii_redaction.py diff --git a/cli-config.yaml.example b/cli-config.yaml.example index c493a309d..0c947e479 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -742,3 +742,14 @@ display: # tool_prefix: "╎" # Tool output line prefix (default: ┊) # skin: default + +# ============================================================================= +# Privacy +# ============================================================================= +# privacy: +# # Redact PII from the LLM context prompt. +# # When true, phone numbers are stripped and user/chat IDs are replaced +# # with deterministic hashes before being sent to the model. +# # Names and usernames are NOT affected (user-chosen, publicly visible). +# # Routing/delivery still uses the original values internally. +# redact_pii: false diff --git a/gateway/run.py b/gateway/run.py index 81d00f73b..f77821c5a 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1452,8 +1452,17 @@ class GatewayRunner: # Set environment variables for tools self._set_session_env(context) + # Read privacy.redact_pii from config (re-read per message) + _redact_pii = False + try: + with open(_config_path, encoding="utf-8") as _pf: + _pcfg = yaml.safe_load(_pf) or {} + _redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False)) + except Exception: + pass + # Build the context prompt to inject - context_prompt = build_session_context_prompt(context) + context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii) # If the previous session expired and was auto-reset, prepend a notice # so the agent knows this is a fresh conversation (not an intentional /reset). diff --git a/gateway/session.py b/gateway/session.py index 23971a912..f7dc5263d 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -8,9 +8,11 @@ Handles: - Dynamic system prompt injection (agent knows its context) """ +import hashlib import logging import os import json +import re import uuid from pathlib import Path from datetime import datetime, timedelta @@ -19,6 +21,41 @@ from typing import Dict, List, Optional, Any logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# PII redaction helpers +# --------------------------------------------------------------------------- + +_PHONE_RE = re.compile(r"^\+?\d[\d\-\s]{6,}$") + + +def _hash_id(value: str) -> str: + """Deterministic 12-char hex hash of an identifier.""" + return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12] + + +def _hash_sender_id(value: str) -> str: + """Hash a sender ID to ``user_<12hex>``.""" + return f"user_{_hash_id(value)}" + + +def _hash_chat_id(value: str) -> str: + """Hash the numeric portion of a chat ID, preserving platform prefix. + + ``telegram:12345`` → ``telegram:`` + ``12345`` → ```` + """ + colon = value.find(":") + if colon > 0: + prefix = value[:colon] + return f"{prefix}:{_hash_id(value[colon + 1:])}" + return _hash_id(value) + + +def _looks_like_phone(value: str) -> bool: + """Return True if *value* looks like a phone number (E.164 or similar).""" + return bool(_PHONE_RE.match(value.strip())) + from .config import ( Platform, GatewayConfig, @@ -146,7 +183,11 @@ class SessionContext: } -def build_session_context_prompt(context: SessionContext) -> str: +def build_session_context_prompt( + context: SessionContext, + *, + redact_pii: bool = False, +) -> str: """ Build the dynamic system prompt section that tells the agent about its context. @@ -154,6 +195,10 @@ def build_session_context_prompt(context: SessionContext) -> str: - Where messages are coming from - What platforms are connected - Where it can deliver scheduled task outputs + + When *redact_pii* is True, phone numbers are stripped and user/chat IDs + are replaced with deterministic hashes before being sent to the LLM. + Routing still uses the original values (they stay in SessionSource). """ lines = [ "## Current Session Context", @@ -165,7 +210,25 @@ def build_session_context_prompt(context: SessionContext) -> str: if context.source.platform == Platform.LOCAL: lines.append(f"**Source:** {platform_name} (the machine running this agent)") else: - lines.append(f"**Source:** {platform_name} ({context.source.description})") + # Build a description that respects PII redaction + src = context.source + if redact_pii: + # Build a safe description without raw IDs + _uname = src.user_name or ( + _hash_sender_id(src.user_id) if src.user_id else "user" + ) + _cname = src.chat_name or _hash_chat_id(src.chat_id) + if src.chat_type == "dm": + desc = f"DM with {_uname}" + elif src.chat_type == "group": + desc = f"group: {_cname}" + elif src.chat_type == "channel": + desc = f"channel: {_cname}" + else: + desc = _cname + else: + desc = src.description + lines.append(f"**Source:** {platform_name} ({desc})") # Channel topic (if available - provides context about the channel's purpose) if context.source.chat_topic: @@ -175,7 +238,10 @@ def build_session_context_prompt(context: SessionContext) -> str: if context.source.user_name: lines.append(f"**User:** {context.source.user_name}") elif context.source.user_id: - lines.append(f"**User ID:** {context.source.user_id}") + uid = context.source.user_id + if redact_pii: + uid = _hash_sender_id(uid) + lines.append(f"**User ID:** {uid}") # Platform-specific behavioral notes if context.source.platform == Platform.SLACK: @@ -210,7 +276,8 @@ def build_session_context_prompt(context: SessionContext) -> str: lines.append("") lines.append("**Home Channels (default destinations):**") for platform, home in context.home_channels.items(): - lines.append(f" - {platform.value}: {home.name} (ID: {home.chat_id})") + hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id + lines.append(f" - {platform.value}: {home.name} (ID: {hc_id})") # Delivery options for scheduled tasks lines.append("") @@ -220,7 +287,10 @@ def build_session_context_prompt(context: SessionContext) -> str: if context.source.platform == Platform.LOCAL: lines.append("- `\"origin\"` → Local output (saved to files)") else: - lines.append(f"- `\"origin\"` → Back to this chat ({context.source.chat_name or context.source.chat_id})") + _origin_label = context.source.chat_name or ( + _hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id + ) + lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})") # Local always available lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)") diff --git a/hermes_cli/config.py b/hermes_cli/config.py index dbb37b284..5c19ad676 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -207,6 +207,11 @@ DEFAULT_CONFIG = { "show_reasoning": False, "skin": "default", }, + + # Privacy settings + "privacy": { + "redact_pii": False, # When True, hash user IDs and strip phone numbers from LLM context + }, # Text-to-speech configuration "tts": { diff --git a/tests/gateway/test_pii_redaction.py b/tests/gateway/test_pii_redaction.py new file mode 100644 index 000000000..ca2570723 --- /dev/null +++ b/tests/gateway/test_pii_redaction.py @@ -0,0 +1,132 @@ +"""Tests for PII redaction in gateway session context prompts.""" + +from gateway.session import ( + SessionContext, + SessionSource, + build_session_context_prompt, + _hash_id, + _hash_sender_id, + _hash_chat_id, + _looks_like_phone, +) +from gateway.config import Platform, HomeChannel + + +# --------------------------------------------------------------------------- +# Low-level helpers +# --------------------------------------------------------------------------- + +class TestHashHelpers: + def test_hash_id_deterministic(self): + assert _hash_id("12345") == _hash_id("12345") + + def test_hash_id_12_hex_chars(self): + h = _hash_id("user-abc") + assert len(h) == 12 + assert all(c in "0123456789abcdef" for c in h) + + def test_hash_sender_id_prefix(self): + assert _hash_sender_id("12345").startswith("user_") + assert len(_hash_sender_id("12345")) == 17 # "user_" + 12 + + def test_hash_chat_id_preserves_prefix(self): + result = _hash_chat_id("telegram:12345") + assert result.startswith("telegram:") + assert "12345" not in result + + def test_hash_chat_id_no_prefix(self): + result = _hash_chat_id("12345") + assert len(result) == 12 + assert "12345" not in result + + def test_looks_like_phone(self): + assert _looks_like_phone("+15551234567") + assert _looks_like_phone("15551234567") + assert _looks_like_phone("+1-555-123-4567") + assert not _looks_like_phone("alice") + assert not _looks_like_phone("user-123") + assert not _looks_like_phone("") + + +# --------------------------------------------------------------------------- +# Integration: build_session_context_prompt +# --------------------------------------------------------------------------- + +def _make_context( + user_id="user-123", + user_name=None, + chat_id="telegram:99999", + platform=Platform.TELEGRAM, + home_channels=None, +): + source = SessionSource( + platform=platform, + chat_id=chat_id, + chat_type="dm", + user_id=user_id, + user_name=user_name, + ) + return SessionContext( + source=source, + connected_platforms=[platform], + home_channels=home_channels or {}, + ) + + +class TestBuildSessionContextPromptRedaction: + def test_no_redaction_by_default(self): + ctx = _make_context(user_id="user-123") + prompt = build_session_context_prompt(ctx) + assert "user-123" in prompt + + def test_user_id_hashed_when_redact_pii(self): + ctx = _make_context(user_id="user-123") + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "user-123" not in prompt + assert "user_" in prompt # hashed ID present + + def test_user_name_not_redacted(self): + ctx = _make_context(user_id="user-123", user_name="Alice") + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "Alice" in prompt + # user_id should not appear when user_name is present (name takes priority) + assert "user-123" not in prompt + + def test_home_channel_id_hashed(self): + hc = { + Platform.TELEGRAM: HomeChannel( + platform=Platform.TELEGRAM, + chat_id="telegram:99999", + name="Home Chat", + ) + } + ctx = _make_context(home_channels=hc) + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "99999" not in prompt + assert "telegram:" in prompt # prefix preserved + assert "Home Chat" in prompt # name not redacted + + def test_home_channel_id_preserved_without_redaction(self): + hc = { + Platform.TELEGRAM: HomeChannel( + platform=Platform.TELEGRAM, + chat_id="telegram:99999", + name="Home Chat", + ) + } + ctx = _make_context(home_channels=hc) + prompt = build_session_context_prompt(ctx, redact_pii=False) + assert "99999" in prompt + + def test_redaction_is_deterministic(self): + ctx = _make_context(user_id="+15551234567") + prompt1 = build_session_context_prompt(ctx, redact_pii=True) + prompt2 = build_session_context_prompt(ctx, redact_pii=True) + assert prompt1 == prompt2 + + def test_different_ids_produce_different_hashes(self): + ctx1 = _make_context(user_id="user-A") + ctx2 = _make_context(user_id="user-B") + p1 = build_session_context_prompt(ctx1, redact_pii=True) + p2 = build_session_context_prompt(ctx2, redact_pii=True) + assert p1 != p2 diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index ab5e47ef6..8eb28e850 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -832,6 +832,25 @@ display: | `all` | Every tool call with a short preview (default) | | `verbose` | Full args, results, and debug logs | +## Privacy + +```yaml +privacy: + redact_pii: false # Strip PII from LLM context (gateway only) +``` + +When `redact_pii` is `true`, the gateway redacts personally identifiable information from the system prompt before sending it to the LLM: + +| Field | Treatment | +|-------|-----------| +| Phone numbers (user ID on WhatsApp/Signal) | Hashed to `user_<12-char-sha256>` | +| User IDs | Hashed to `user_<12-char-sha256>` | +| Chat IDs | Numeric portion hashed, platform prefix preserved (`telegram:`) | +| Home channel IDs | Numeric portion hashed | +| User names / usernames | **Not affected** (user-chosen, publicly visible) | + +Hashes are deterministic — the same user always maps to the same hash, so the model can still distinguish between users in group chats. Routing and delivery use the original values internally. + ## Speech-to-Text (STT) ```yaml From 9a423c348737d2240665060c0f9ad371ca13835f Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 05:58:34 -0700 Subject: [PATCH 20/29] fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM needs the real ID to tag users. Redaction now only applies to WhatsApp, Signal, and Telegram where IDs are pure routing metadata. Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack. --- gateway/session.py | 16 +++++++++++++++- tests/gateway/test_pii_redaction.py | 24 ++++++++++++++++++++++++ website/docs/user-guide/configuration.md | 4 +++- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/gateway/session.py b/gateway/session.py index f7dc5263d..d0bf0cfe4 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -183,6 +183,16 @@ class SessionContext: } +_PII_SAFE_PLATFORMS = frozenset({ + Platform.WHATSAPP, + Platform.SIGNAL, + Platform.TELEGRAM, +}) +"""Platforms where user IDs can be safely redacted (no in-message mention system +that requires raw IDs). Discord is excluded because mentions use ``<@user_id>`` +and the LLM needs the real ID to tag users.""" + + def build_session_context_prompt( context: SessionContext, *, @@ -196,10 +206,14 @@ def build_session_context_prompt( - What platforms are connected - Where it can deliver scheduled task outputs - When *redact_pii* is True, phone numbers are stripped and user/chat IDs + When *redact_pii* is True **and** the source platform is in + ``_PII_SAFE_PLATFORMS``, phone numbers are stripped and user/chat IDs are replaced with deterministic hashes before being sent to the LLM. + Platforms like Discord are excluded because mentions need real IDs. Routing still uses the original values (they stay in SessionSource). """ + # Only apply redaction on platforms where IDs aren't needed for mentions + redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS lines = [ "## Current Session Context", "", diff --git a/tests/gateway/test_pii_redaction.py b/tests/gateway/test_pii_redaction.py index ca2570723..1982f5e88 100644 --- a/tests/gateway/test_pii_redaction.py +++ b/tests/gateway/test_pii_redaction.py @@ -130,3 +130,27 @@ class TestBuildSessionContextPromptRedaction: p1 = build_session_context_prompt(ctx1, redact_pii=True) p2 = build_session_context_prompt(ctx2, redact_pii=True) assert p1 != p2 + + def test_discord_ids_not_redacted_even_with_flag(self): + """Discord needs real IDs for <@user_id> mentions.""" + ctx = _make_context(user_id="123456789", platform=Platform.DISCORD) + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "123456789" in prompt + + def test_whatsapp_ids_redacted(self): + ctx = _make_context(user_id="+15551234567", platform=Platform.WHATSAPP) + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "+15551234567" not in prompt + assert "user_" in prompt + + def test_signal_ids_redacted(self): + ctx = _make_context(user_id="+15551234567", platform=Platform.SIGNAL) + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "+15551234567" not in prompt + assert "user_" in prompt + + def test_slack_ids_not_redacted(self): + """Slack may need IDs for mentions too.""" + ctx = _make_context(user_id="U12345ABC", platform=Platform.SLACK) + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "U12345ABC" in prompt diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 8eb28e850..1df9c491f 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -839,7 +839,7 @@ privacy: redact_pii: false # Strip PII from LLM context (gateway only) ``` -When `redact_pii` is `true`, the gateway redacts personally identifiable information from the system prompt before sending it to the LLM: +When `redact_pii` is `true`, the gateway redacts personally identifiable information from the system prompt before sending it to the LLM on supported platforms: | Field | Treatment | |-------|-----------| @@ -849,6 +849,8 @@ When `redact_pii` is `true`, the gateway redacts personally identifiable informa | Home channel IDs | Numeric portion hashed | | User names / usernames | **Not affected** (user-chosen, publicly visible) | +**Platform support:** Redaction applies to WhatsApp, Signal, and Telegram. Discord and Slack are excluded because their mention systems (`<@user_id>`) require the real ID in the LLM context. + Hashes are deterministic — the same user always maps to the same hash, so the model can still distinguish between users in group chats. Routing and delivery use the original values internally. ## Speech-to-Text (STT) From 2ba219fa4b96fa649807d881e643ea3f00c735d0 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 06:07:45 -0700 Subject: [PATCH 21/29] feat(cli): add file path autocomplete in the input prompt (#1545) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When typing a path-like token (./ ../ ~/ / or containing /), the CLI now shows filesystem completions in the dropdown menu. Directories show a trailing slash and 'dir' label; files show their size. Completions are case-insensitive and capped at 30 entries. Triggered by tokens like: edit ./src/ma → shows ./src/main.py, ./src/manifest.json, ... check ~/doc → shows ~/docs/, ~/documents/, ... read /etc/hos → shows /etc/hosts, /etc/hostname, ... open tools/reg → shows tools/registry.py Slash command autocomplete (/help, /model, etc.) is unaffected — it still triggers when the input starts with /. Inspired by OpenCode PR #145 (file path completion menu). Implementation: - hermes_cli/commands.py: _extract_path_word() detects path-like tokens, _path_completions() yields filesystem Completions with size labels, get_completions() routes to paths vs slash commands - tests/hermes_cli/test_path_completion.py: 26 tests covering path extraction, prefix filtering, directory markers, home expansion, case-insensitivity, integration with slash commands --- hermes_cli/commands.py | 96 ++++++++++++ tests/hermes_cli/test_path_completion.py | 184 +++++++++++++++++++++++ 2 files changed, 280 insertions(+) create mode 100644 tests/hermes_cli/test_path_completion.py diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 7e964bd4e..3a9b5b712 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -7,7 +7,9 @@ interactive CLI. from __future__ import annotations +import os from collections.abc import Callable, Mapping +from pathlib import Path from typing import Any from prompt_toolkit.completion import Completer, Completion @@ -92,9 +94,88 @@ class SlashCommandCompleter(Completer): """ return f"{cmd_name} " if cmd_name == word else cmd_name + @staticmethod + def _extract_path_word(text: str) -> str | None: + """Extract the current word if it looks like a file path. + + Returns the path-like token under the cursor, or None if the + current word doesn't look like a path. A word is path-like when + it starts with ``./``, ``../``, ``~/``, ``/``, or contains a + ``/`` separator (e.g. ``src/main.py``). + """ + if not text: + return None + # Walk backwards to find the start of the current "word". + # Words are delimited by spaces, but paths can contain almost anything. + i = len(text) - 1 + while i >= 0 and text[i] != " ": + i -= 1 + word = text[i + 1:] + if not word: + return None + # Only trigger path completion for path-like tokens + if word.startswith(("./", "../", "~/", "/")) or "/" in word: + return word + return None + + @staticmethod + def _path_completions(word: str, limit: int = 30): + """Yield Completion objects for file paths matching *word*.""" + expanded = os.path.expanduser(word) + # Split into directory part and prefix to match inside it + if expanded.endswith("/"): + search_dir = expanded + prefix = "" + else: + search_dir = os.path.dirname(expanded) or "." + prefix = os.path.basename(expanded) + + try: + entries = os.listdir(search_dir) + except OSError: + return + + count = 0 + prefix_lower = prefix.lower() + for entry in sorted(entries): + if prefix and not entry.lower().startswith(prefix_lower): + continue + if count >= limit: + break + + full_path = os.path.join(search_dir, entry) + is_dir = os.path.isdir(full_path) + + # Build the completion text (what replaces the typed word) + if word.startswith("~"): + display_path = "~/" + os.path.relpath(full_path, os.path.expanduser("~")) + elif os.path.isabs(word): + display_path = full_path + else: + # Keep relative + display_path = os.path.relpath(full_path) + + if is_dir: + display_path += "/" + + suffix = "/" if is_dir else "" + meta = "dir" if is_dir else _file_size_label(full_path) + + yield Completion( + display_path, + start_position=-len(word), + display=entry + suffix, + display_meta=meta, + ) + count += 1 + def get_completions(self, document, complete_event): text = document.text_before_cursor if not text.startswith("/"): + # Try file path completion for non-slash input + path_word = self._extract_path_word(text) + if path_word is not None: + yield from self._path_completions(path_word) return word = text[1:] @@ -120,3 +201,18 @@ class SlashCommandCompleter(Completer): display=cmd, display_meta=f"⚡ {short_desc}", ) + + +def _file_size_label(path: str) -> str: + """Return a compact human-readable file size, or '' on error.""" + try: + size = os.path.getsize(path) + except OSError: + return "" + if size < 1024: + return f"{size}B" + if size < 1024 * 1024: + return f"{size / 1024:.0f}K" + if size < 1024 * 1024 * 1024: + return f"{size / (1024 * 1024):.1f}M" + return f"{size / (1024 * 1024 * 1024):.1f}G" diff --git a/tests/hermes_cli/test_path_completion.py b/tests/hermes_cli/test_path_completion.py new file mode 100644 index 000000000..b41a36e2e --- /dev/null +++ b/tests/hermes_cli/test_path_completion.py @@ -0,0 +1,184 @@ +"""Tests for file path autocomplete in the CLI completer.""" + +import os +from unittest.mock import MagicMock + +import pytest +from prompt_toolkit.document import Document +from prompt_toolkit.formatted_text import to_plain_text + +from hermes_cli.commands import SlashCommandCompleter, _file_size_label + + +def _display_names(completions): + """Extract plain-text display names from a list of Completion objects.""" + return [to_plain_text(c.display) for c in completions] + + +def _display_metas(completions): + """Extract plain-text display_meta from a list of Completion objects.""" + return [to_plain_text(c.display_meta) if c.display_meta else "" for c in completions] + + +@pytest.fixture +def completer(): + return SlashCommandCompleter() + + +class TestExtractPathWord: + def test_relative_path(self): + assert SlashCommandCompleter._extract_path_word("look at ./src/main.py") == "./src/main.py" + + def test_home_path(self): + assert SlashCommandCompleter._extract_path_word("edit ~/docs/") == "~/docs/" + + def test_absolute_path(self): + assert SlashCommandCompleter._extract_path_word("read /etc/hosts") == "/etc/hosts" + + def test_parent_path(self): + assert SlashCommandCompleter._extract_path_word("check ../config.yaml") == "../config.yaml" + + def test_path_with_slash_in_middle(self): + assert SlashCommandCompleter._extract_path_word("open src/utils/helpers.py") == "src/utils/helpers.py" + + def test_plain_word_not_path(self): + assert SlashCommandCompleter._extract_path_word("hello world") is None + + def test_empty_string(self): + assert SlashCommandCompleter._extract_path_word("") is None + + def test_single_word_no_slash(self): + assert SlashCommandCompleter._extract_path_word("README.md") is None + + def test_word_after_space(self): + assert SlashCommandCompleter._extract_path_word("fix the bug in ./tools/") == "./tools/" + + def test_just_dot_slash(self): + assert SlashCommandCompleter._extract_path_word("./") == "./" + + def test_just_tilde_slash(self): + assert SlashCommandCompleter._extract_path_word("~/") == "~/" + + +class TestPathCompletions: + def test_lists_current_directory(self, tmp_path): + (tmp_path / "file_a.py").touch() + (tmp_path / "file_b.txt").touch() + (tmp_path / "subdir").mkdir() + + old_cwd = os.getcwd() + os.chdir(tmp_path) + try: + completions = list(SlashCommandCompleter._path_completions("./")) + names = _display_names(completions) + assert "file_a.py" in names + assert "file_b.txt" in names + assert "subdir/" in names + finally: + os.chdir(old_cwd) + + def test_filters_by_prefix(self, tmp_path): + (tmp_path / "alpha.py").touch() + (tmp_path / "beta.py").touch() + (tmp_path / "alpha_test.py").touch() + + completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/alpha")) + names = _display_names(completions) + assert "alpha.py" in names + assert "alpha_test.py" in names + assert "beta.py" not in names + + def test_directories_have_trailing_slash(self, tmp_path): + (tmp_path / "mydir").mkdir() + (tmp_path / "myfile.txt").touch() + + completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/")) + names = _display_names(completions) + metas = _display_metas(completions) + assert "mydir/" in names + idx = names.index("mydir/") + assert metas[idx] == "dir" + + def test_home_expansion(self, tmp_path, monkeypatch): + monkeypatch.setenv("HOME", str(tmp_path)) + (tmp_path / "testfile.md").touch() + + completions = list(SlashCommandCompleter._path_completions("~/test")) + names = _display_names(completions) + assert "testfile.md" in names + + def test_nonexistent_dir_returns_empty(self): + completions = list(SlashCommandCompleter._path_completions("/nonexistent_dir_xyz/")) + assert completions == [] + + def test_respects_limit(self, tmp_path): + for i in range(50): + (tmp_path / f"file_{i:03d}.txt").touch() + + completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/", limit=10)) + assert len(completions) == 10 + + def test_case_insensitive_prefix(self, tmp_path): + (tmp_path / "README.md").touch() + + completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/read")) + names = _display_names(completions) + assert "README.md" in names + + +class TestIntegration: + """Test the completer produces path completions via the prompt_toolkit API.""" + + def test_slash_commands_still_work(self, completer): + doc = Document("/hel", cursor_position=4) + event = MagicMock() + completions = list(completer.get_completions(doc, event)) + names = _display_names(completions) + assert "/help" in names + + def test_path_completion_triggers_on_dot_slash(self, completer, tmp_path): + (tmp_path / "test.py").touch() + old_cwd = os.getcwd() + os.chdir(tmp_path) + try: + doc = Document("edit ./te", cursor_position=9) + event = MagicMock() + completions = list(completer.get_completions(doc, event)) + names = _display_names(completions) + assert "test.py" in names + finally: + os.chdir(old_cwd) + + def test_no_completion_for_plain_words(self, completer): + doc = Document("hello world", cursor_position=11) + event = MagicMock() + completions = list(completer.get_completions(doc, event)) + assert completions == [] + + def test_absolute_path_triggers_completion(self, completer): + doc = Document("check /etc/hos", cursor_position=14) + event = MagicMock() + completions = list(completer.get_completions(doc, event)) + names = _display_names(completions) + # /etc/hosts should exist on Linux + assert any("host" in n.lower() for n in names) + + +class TestFileSizeLabel: + def test_bytes(self, tmp_path): + f = tmp_path / "small.txt" + f.write_text("hi") + assert _file_size_label(str(f)) == "2B" + + def test_kilobytes(self, tmp_path): + f = tmp_path / "medium.txt" + f.write_bytes(b"x" * 2048) + assert _file_size_label(str(f)) == "2K" + + def test_megabytes(self, tmp_path): + f = tmp_path / "large.bin" + f.write_bytes(b"x" * (2 * 1024 * 1024)) + assert _file_size_label(str(f)) == "2.0M" + + def test_nonexistent(self): + assert _file_size_label("/nonexistent_xyz") == "" From 57be18c0268941a51c9ad08681ddfdbace228869 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 06:20:11 -0700 Subject: [PATCH 22/29] feat: smart approvals + /stop command (inspired by OpenAI Codex) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: smart approvals — LLM-based risk assessment for dangerous commands Adds a 'smart' approval mode that uses the auxiliary LLM to assess whether a flagged command is genuinely dangerous or a false positive, auto-approving low-risk commands without prompting the user. Inspired by OpenAI Codex's Smart Approvals guardian subagent (openai/codex#13860). Config (config.yaml): approvals: mode: manual # manual (default), smart, off Modes: - manual — current behavior, always prompt the user - smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block), or ESCALATE (fall through to manual prompt) - off — skip all approval prompts (equivalent to --yolo) When smart mode auto-approves, the pattern gets session-level approval so subsequent uses of the same pattern don't trigger another LLM call. When it denies, the command is blocked without user prompt. When uncertain, it escalates to the normal manual approval flow. The LLM prompt is carefully scoped: it sees only the command text and the flagged reason, assesses actual risk vs false positive, and returns a single-word verdict. * feat: make smart approval model configurable via config.yaml Adds auxiliary.approval section to config.yaml with the same provider/model/base_url/api_key pattern as other aux tasks (vision, web_extract, compression, etc.). Config: auxiliary: approval: provider: auto model: '' # fast/cheap model recommended base_url: '' api_key: '' Bridged to env vars in both CLI and gateway paths so the aux client picks them up automatically. * feat: add /stop command to kill all background processes Adds a /stop slash command that kills all running background processes at once. Currently users have to process(list) then process(kill) for each one individually. Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current turn) from /stop (cleans up background processes). See openai/codex#14602. Ctrl+C continues to only interrupt the active agent turn — background dev servers, watchers, etc. are preserved. /stop is the explicit way to clean them all up. --- cli.py | 30 +++++++++- gateway/run.py | 6 ++ hermes_cli/commands.py | 1 + hermes_cli/config.py | 14 +++++ tests/hermes_cli/test_commands.py | 2 +- tools/approval.py | 93 ++++++++++++++++++++++++++++++- 6 files changed, 142 insertions(+), 4 deletions(-) diff --git a/cli.py b/cli.py index aa888fd6a..1088480f3 100755 --- a/cli.py +++ b/cli.py @@ -395,7 +395,13 @@ def load_cli_config() -> Dict[str, Any]: "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER", "model": "AUXILIARY_WEB_EXTRACT_MODEL", "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL", - "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY", + "api_key": "AUXILI..._KEY", + }, + "approval": { + "provider": "AUXILIARY_APPROVAL_PROVIDER", + "model": "AUXILIARY_APPROVAL_MODEL", + "base_url": "AUXILIARY_APPROVAL_BASE_URL", + "api_key": "AUXILIARY_APPROVAL_API_KEY", }, } @@ -1987,6 +1993,26 @@ class HermesCLI: # Treat as a git hash return ref + def _handle_stop_command(self): + """Handle /stop — kill all running background processes. + + Inspired by OpenAI Codex's separation of interrupt (stop current turn) + from /stop (clean up background processes). See openai/codex#14602. + """ + from tools.process_registry import get_registry + + registry = get_registry() + processes = registry.list_processes() + running = [p for p in processes if p.get("status") == "running"] + + if not running: + print(" No running background processes.") + return + + print(f" Stopping {len(running)} background process(es)...") + killed = registry.kill_all() + print(f" ✅ Stopped {killed} process(es).") + def _handle_paste_command(self): """Handle /paste — explicitly check clipboard for an image. @@ -3237,6 +3263,8 @@ class HermesCLI: self._reload_mcp() elif cmd_lower.startswith("/rollback"): self._handle_rollback_command(cmd_original) + elif cmd_lower == "/stop": + self._handle_stop_command() elif cmd_lower.startswith("/background"): self._handle_background_command(cmd_original) elif cmd_lower.startswith("/skin"): diff --git a/gateway/run.py b/gateway/run.py index f77821c5a..50e68eaad 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -157,6 +157,12 @@ if _config_path.exists(): "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL", "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY", }, + "approval": { + "provider": "AUXILIARY_APPROVAL_PROVIDER", + "model": "AUXILIARY_APPROVAL_MODEL", + "base_url": "AUXILIARY_APPROVAL_BASE_URL", + "api_key": "AUXILIARY_APPROVAL_API_KEY", + }, } for _task_key, _env_map in _aux_task_env.items(): _task_cfg = _auxiliary_cfg.get(_task_key, {}) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 3a9b5b712..ecfdaba05 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -28,6 +28,7 @@ COMMANDS_BY_CATEGORY = { "/title": "Set a title for the current session (usage: /title My Session Name)", "/compress": "Manually compress conversation context (flush memories + summarize)", "/rollback": "List or restore filesystem checkpoints (usage: /rollback [number])", + "/stop": "Kill all running background processes", "/background": "Run a prompt in the background (usage: /background )", }, "Configuration": { diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 5c19ad676..d7f47c49a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -185,6 +185,12 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", }, + "approval": { + "provider": "auto", + "model": "", # fast/cheap model recommended (e.g. gemini-flash, haiku) + "base_url": "", + "api_key": "", + }, "mcp": { "provider": "auto", "model": "", @@ -296,6 +302,14 @@ DEFAULT_CONFIG = { "auto_thread": True, # Auto-create threads on @mention in channels (like Slack) }, + # Approval mode for dangerous commands: + # manual — always prompt the user (default) + # smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk + # off — skip all approval prompts (equivalent to --yolo) + "approvals": { + "mode": "manual", + }, + # Permanently allowed dangerous command patterns (added via "always" approval) "command_allowlist": [], # User-defined quick commands that bypass the agent loop (type: exec only) diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index 218059434..cb5a863a3 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -12,7 +12,7 @@ EXPECTED_COMMANDS = { "/personality", "/clear", "/history", "/new", "/reset", "/retry", "/undo", "/save", "/config", "/cron", "/skills", "/platforms", "/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste", - "/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/quit", + "/reload-mcp", "/rollback", "/stop", "/background", "/skin", "/voice", "/quit", } diff --git a/tools/approval.py b/tools/approval.py index 92da71ca5..9f1b541ff 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -4,6 +4,7 @@ This module is the single source of truth for the dangerous command system: - Pattern detection (DANGEROUS_PATTERNS, detect_dangerous_command) - Per-session approval state (thread-safe, keyed by session_key) - Approval prompting (CLI interactive + gateway async) +- Smart approval via auxiliary LLM (auto-approve low-risk commands) - Permanent allowlist persistence (config.yaml) """ @@ -283,6 +284,68 @@ def prompt_dangerous_approval(command: str, description: str, sys.stdout.flush() +def _get_approval_mode() -> str: + """Read the approval mode from config. Returns 'manual', 'smart', or 'off'.""" + try: + from hermes_cli.config import load_config + config = load_config() + return config.get("approvals", {}).get("mode", "manual") + except Exception: + return "manual" + + +def _smart_approve(command: str, description: str) -> str: + """Use the auxiliary LLM to assess risk and decide approval. + + Returns 'approve' if the LLM determines the command is safe, + 'deny' if genuinely dangerous, or 'escalate' if uncertain. + + Inspired by OpenAI Codex's Smart Approvals guardian subagent + (openai/codex#13860). + """ + try: + from agent.auxiliary_client import get_text_auxiliary_client, auxiliary_max_tokens_param + + client, model = get_text_auxiliary_client(task="approval") + if not client or not model: + logger.debug("Smart approvals: no aux client available, escalating") + return "escalate" + + prompt = f"""You are a security reviewer for an AI coding agent. A terminal command was flagged by pattern matching as potentially dangerous. + +Command: {command} +Flagged reason: {description} + +Assess the ACTUAL risk of this command. Many flagged commands are false positives — for example, `python -c "print('hello')"` is flagged as "script execution via -c flag" but is completely harmless. + +Rules: +- APPROVE if the command is clearly safe (benign script execution, safe file operations, development tools, package installs, git operations, etc.) +- DENY if the command could genuinely damage the system (recursive delete of important paths, overwriting system files, fork bombs, wiping disks, dropping databases, etc.) +- ESCALATE if you're uncertain + +Respond with exactly one word: APPROVE, DENY, or ESCALATE""" + + response = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + **auxiliary_max_tokens_param(16), + temperature=0, + ) + + answer = (response.choices[0].message.content or "").strip().upper() + + if "APPROVE" in answer: + return "approve" + elif "DENY" in answer: + return "deny" + else: + return "escalate" + + except Exception as e: + logger.debug("Smart approvals: LLM call failed (%s), escalating", e) + return "escalate" + + def check_dangerous_command(command: str, env_type: str, approval_callback=None) -> dict: """Check if a command is dangerous and handle approval. @@ -372,8 +435,9 @@ def check_all_command_guards(command: str, env_type: str, if env_type in ("docker", "singularity", "modal", "daytona"): return {"approved": True, "message": None} - # --yolo: bypass all approval prompts and pre-exec guard checks - if os.getenv("HERMES_YOLO_MODE"): + # --yolo or approvals.mode=off: bypass all approval prompts + approval_mode = _get_approval_mode() + if os.getenv("HERMES_YOLO_MODE") or approval_mode == "off": return {"approved": True, "message": None} is_cli = os.getenv("HERMES_INTERACTIVE") @@ -430,6 +494,31 @@ def check_all_command_guards(command: str, env_type: str, if not warnings: return {"approved": True, "message": None} + # --- Phase 2.5: Smart approval (auxiliary LLM risk assessment) --- + # When approvals.mode=smart, ask the aux LLM before prompting the user. + # Inspired by OpenAI Codex's Smart Approvals guardian subagent + # (openai/codex#13860). + if approval_mode == "smart": + combined_desc_for_llm = "; ".join(desc for _, desc, _ in warnings) + verdict = _smart_approve(command, combined_desc_for_llm) + if verdict == "approve": + # Auto-approve and grant session-level approval for these patterns + for key, _, _ in warnings: + approve_session(session_key, key) + logger.debug("Smart approval: auto-approved '%s' (%s)", + command[:60], combined_desc_for_llm) + return {"approved": True, "message": None, + "smart_approved": True} + elif verdict == "deny": + combined_desc_for_llm = "; ".join(desc for _, desc, _ in warnings) + return { + "approved": False, + "message": f"BLOCKED by smart approval: {combined_desc_for_llm}. " + "The command was assessed as genuinely dangerous. Do NOT retry.", + "smart_denied": True, + } + # verdict == "escalate" → fall through to manual prompt + # --- Phase 3: Approval --- # Combine descriptions for a single approval prompt From 9d1483c7e64765e2f1be511c83e415e2baee0529 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 06:38:20 -0700 Subject: [PATCH 23/29] =?UTF-8?q?feat(browser):=20/browser=20connect=20?= =?UTF-8?q?=E2=80=94=20attach=20browser=20tools=20to=20live=20Chrome=20via?= =?UTF-8?q?=20CDP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add /browser slash command for connecting browser tools to the user's live Chrome instance via Chrome DevTools Protocol: /browser connect — connect to Chrome on localhost:9222 /browser connect ws://host:port — custom CDP endpoint /browser disconnect — revert to default (headless/Browserbase) /browser status — show current browser mode + connectivity When connected: - All browser tools (navigate, snapshot, click, etc.) control the user's real Chrome — logged-in sessions, cookies, open tabs - Platform-specific Chrome launch instructions are shown - Port connectivity is tested immediately - A context message is injected so the model knows it's controlling a live browser and should be mindful of user's open tabs Implementation: - BROWSER_CDP_URL env var drives the backend selection in browser_tool.py - New _create_cdp_session() creates sessions using the CDP override - _get_cdp_override() checked before local/Browserbase selection - Existing agent-browser --cdp flag handles the actual CDP connection Inspired by OpenClaw's browser profile system. --- cli.py | 136 ++++++++++++++++++++++++++++++ hermes_cli/commands.py | 1 + tests/hermes_cli/test_commands.py | 2 +- tools/browser_tool.py | 31 ++++++- 4 files changed, 168 insertions(+), 2 deletions(-) diff --git a/cli.py b/cli.py index aa888fd6a..8116f5fd0 100755 --- a/cli.py +++ b/cli.py @@ -1420,6 +1420,8 @@ class HermesCLI: return "Processing skills command..." if cmd_lower == "/reload-mcp": return "Reloading MCP servers..." + if cmd_lower.startswith("/browser"): + return "Configuring browser..." return "Processing command..." def _command_spinner_frame(self) -> str: @@ -3235,6 +3237,8 @@ class HermesCLI: elif cmd_lower == "/reload-mcp": with self._busy_command(self._slow_command_status(cmd_original)): self._reload_mcp() + elif cmd_lower.startswith("/browser"): + self._handle_browser_command(cmd_original) elif cmd_lower.startswith("/rollback"): self._handle_rollback_command(cmd_original) elif cmd_lower.startswith("/background"): @@ -3451,6 +3455,138 @@ class HermesCLI: self._background_tasks[task_id] = thread thread.start() + def _handle_browser_command(self, cmd: str): + """Handle /browser connect|disconnect|status — manage live Chrome CDP connection.""" + import platform as _plat + import subprocess as _sp + + parts = cmd.strip().split(None, 1) + sub = parts[1].lower().strip() if len(parts) > 1 else "status" + + _DEFAULT_CDP = "ws://localhost:9222" + current = os.environ.get("BROWSER_CDP_URL", "").strip() + + if sub.startswith("connect"): + # Optionally accept a custom CDP URL: /browser connect ws://host:port + connect_parts = cmd.strip().split(None, 2) # ["/browser", "connect", "ws://..."] + cdp_url = connect_parts[2].strip() if len(connect_parts) > 2 else _DEFAULT_CDP + + os.environ["BROWSER_CDP_URL"] = cdp_url + + # Clear any existing browser sessions so the next tool call uses the new backend + try: + from tools.browser_tool import cleanup_all_browsers + cleanup_all_browsers() + except Exception: + pass + + print() + print("🌐 Browser connected to live Chrome via CDP") + print(f" Endpoint: {cdp_url}") + print() + + # Platform-specific launch instructions + sys_name = _plat.system() + if sys_name == "Darwin": + chrome_cmd = '/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222' + elif sys_name == "Windows": + chrome_cmd = 'chrome.exe --remote-debugging-port=9222' + else: + chrome_cmd = "google-chrome --remote-debugging-port=9222" + + print(" If Chrome isn't running with remote debugging yet:") + print(f" $ {chrome_cmd}") + print() + + # Quick connectivity test + _port = 9222 + try: + _port = int(cdp_url.rsplit(":", 1)[-1].split("/")[0]) + except (ValueError, IndexError): + pass + try: + import socket + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(1) + s.connect(("127.0.0.1", _port)) + s.close() + print(f" ✓ Port {_port} is open — Chrome is reachable") + except (OSError, socket.timeout): + print(f" ⚠ Port {_port} is not open — launch Chrome with the command above first") + print() + + # Inject context message so the model knows + if hasattr(self, '_pending_input'): + self._pending_input.put( + "[System note: The user has connected the browser tools to their live Chrome browser " + "session via Chrome DevTools Protocol. You now have access to their real browser — " + "any pages they have open, their logged-in sessions, bookmarks, etc. " + "Use the browser tools (browser_navigate, browser_snapshot, browser_click, etc.) " + "to interact with their live browser. Be mindful that actions affect their real browser. " + "Ask before closing tabs or navigating away from pages they might be using.]" + ) + + elif sub == "disconnect": + if current: + os.environ.pop("BROWSER_CDP_URL", None) + try: + from tools.browser_tool import cleanup_all_browsers + cleanup_all_browsers() + except Exception: + pass + print() + print("🌐 Browser disconnected from live Chrome") + print(" Browser tools reverted to default mode (local headless or Browserbase)") + print() + + if hasattr(self, '_pending_input'): + self._pending_input.put( + "[System note: The user has disconnected the browser tools from their live Chrome. " + "Browser tools are back to default mode (headless local browser or Browserbase cloud).]" + ) + else: + print() + print("Browser is not connected to live Chrome (already using default mode)") + print() + + elif sub == "status": + print() + if current: + print(f"🌐 Browser: connected to live Chrome via CDP") + print(f" Endpoint: {current}") + + _port = 9222 + try: + _port = int(current.rsplit(":", 1)[-1].split("/")[0]) + except (ValueError, IndexError): + pass + try: + import socket + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(1) + s.connect(("127.0.0.1", _port)) + s.close() + print(f" Status: ✓ reachable") + except (OSError, Exception): + print(f" Status: ⚠ not reachable (Chrome may not be running)") + elif os.environ.get("BROWSERBASE_API_KEY"): + print("🌐 Browser: Browserbase (cloud)") + else: + print("🌐 Browser: local headless Chromium (agent-browser)") + print() + print(" /browser connect — connect to your live Chrome") + print(" /browser disconnect — revert to default") + print() + + else: + print() + print("Usage: /browser connect|disconnect|status") + print() + print(" connect Connect browser tools to your live Chrome session") + print(" disconnect Revert to default browser backend") + print(" status Show current browser mode") + print() + def _handle_skin_command(self, cmd: str): """Handle /skin [name] — show or change the display skin.""" try: diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 7e964bd4e..d81aea142 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -45,6 +45,7 @@ COMMANDS_BY_CATEGORY = { "/skills": "Search, install, inspect, or manage skills from online registries", "/cron": "Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove)", "/reload-mcp": "Reload MCP servers from config.yaml", + "/browser": "Connect browser tools to your live Chrome (usage: /browser connect|disconnect|status)", }, "Info": { "/help": "Show this help message", diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index 218059434..84e1694cb 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -12,7 +12,7 @@ EXPECTED_COMMANDS = { "/personality", "/clear", "/history", "/new", "/reset", "/retry", "/undo", "/save", "/config", "/cron", "/skills", "/platforms", "/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste", - "/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/quit", + "/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/browser", "/quit", } diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 88eba3884..e595e8105 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -98,6 +98,16 @@ def _get_extraction_model() -> Optional[str]: return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None +def _get_cdp_override() -> str: + """Return a user-supplied CDP URL override, or empty string. + + When ``BROWSER_CDP_URL`` is set (e.g. via ``/browser connect``), we skip + both Browserbase and the local headless launcher and connect directly to + the supplied Chrome DevTools Protocol endpoint. + """ + return os.environ.get("BROWSER_CDP_URL", "").strip() + + def _is_local_mode() -> bool: """Return True when no Browserbase credentials are configured. @@ -105,6 +115,8 @@ def _is_local_mode() -> bool: ``agent-browser --session`` instead of connecting to a remote Browserbase session via ``--cdp``. """ + if _get_cdp_override(): + return False # CDP override takes priority return not (os.environ.get("BROWSERBASE_API_KEY") and os.environ.get("BROWSERBASE_PROJECT_ID")) @@ -608,6 +620,20 @@ def _create_local_session(task_id: str) -> Dict[str, str]: } +def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]: + """Create a session that connects to a user-supplied CDP endpoint.""" + import uuid + session_name = f"cdp_{uuid.uuid4().hex[:10]}" + logger.info("Created CDP browser session %s → %s for task %s", + session_name, cdp_url, task_id) + return { + "session_name": session_name, + "bb_session_id": None, + "cdp_url": cdp_url, + "features": {"cdp_override": True}, + } + + def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: """ Get or create session info for the given task. @@ -638,7 +664,10 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: return _active_sessions[task_id] # Create session outside the lock (network call in cloud mode) - if _is_local_mode(): + cdp_override = _get_cdp_override() + if cdp_override: + session_info = _create_cdp_session(task_id, cdp_override) + elif _is_local_mode(): session_info = _create_local_session(task_id) else: session_info = _create_browserbase_session(task_id) From 447594be286ed0ab858b4d8878d9bc4202d8bf98 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 06:43:57 -0700 Subject: [PATCH 24/29] feat: first-class plugin architecture + hide status bar cost by default (#1544) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The persistent status bar now shows context %, token counts, and duration but NOT $ cost by default. Cost display is opt-in via: display: show_cost: true in config.yaml, or: hermes config set display.show_cost true The /usage command still shows full cost breakdown since the user explicitly asked for it — this only affects the always-visible bar. Status bar without cost: ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m Status bar with show_cost: true: ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m --- cli.py | 57 ++++++++++++++++++++++++++++-------- hermes_cli/config.py | 1 + tests/test_cli_status_bar.py | 27 +++++++++++++---- 3 files changed, 67 insertions(+), 18 deletions(-) diff --git a/cli.py b/cli.py index 1088480f3..1b975ac29 100755 --- a/cli.py +++ b/cli.py @@ -204,6 +204,7 @@ def load_cli_config() -> Dict[str, Any]: "compact": False, "resume_display": "full", "show_reasoning": False, + "show_cost": False, "skin": "default", }, "clarify": { @@ -1023,6 +1024,8 @@ class HermesCLI: self.bell_on_complete = CLI_CONFIG["display"].get("bell_on_complete", False) # show_reasoning: display model thinking/reasoning before the response self.show_reasoning = CLI_CONFIG["display"].get("show_reasoning", False) + # show_cost: display $ cost in the status bar (off by default) + self.show_cost = CLI_CONFIG["display"].get("show_cost", False) self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose") # Configuration - priority: CLI args > env vars > config file @@ -1276,13 +1279,22 @@ class HermesCLI: width = width or shutil.get_terminal_size((80, 24)).columns percent = snapshot["context_percent"] percent_label = f"{percent}%" if percent is not None else "--" - cost_label = f"${snapshot['session_cost']:.2f}" if snapshot["pricing_known"] else "cost n/a" duration_label = snapshot["duration"] + show_cost = getattr(self, "show_cost", False) + + if show_cost: + cost_label = f"${snapshot['session_cost']:.2f}" if snapshot["pricing_known"] else "cost n/a" + else: + cost_label = None if width < 52: return f"⚕ {snapshot['model_short']} · {duration_label}" if width < 76: - return f"⚕ {snapshot['model_short']} · {percent_label} · {cost_label} · {duration_label}" + parts = [f"⚕ {snapshot['model_short']}", percent_label] + if cost_label: + parts.append(cost_label) + parts.append(duration_label) + return " · ".join(parts) if snapshot["context_length"]: ctx_total = _format_context_length(snapshot["context_length"]) @@ -1291,7 +1303,11 @@ class HermesCLI: else: context_label = "ctx --" - return f"⚕ {snapshot['model_short']} │ {context_label} │ {percent_label} │ {cost_label} │ {duration_label}" + parts = [f"⚕ {snapshot['model_short']}", context_label, percent_label] + if cost_label: + parts.append(cost_label) + parts.append(duration_label) + return " │ ".join(parts) except Exception: return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}" @@ -1299,8 +1315,13 @@ class HermesCLI: try: snapshot = self._get_status_bar_snapshot() width = shutil.get_terminal_size((80, 24)).columns - cost_label = f"${snapshot['session_cost']:.2f}" if snapshot["pricing_known"] else "cost n/a" duration_label = snapshot["duration"] + show_cost = getattr(self, "show_cost", False) + + if show_cost: + cost_label = f"${snapshot['session_cost']:.2f}" if snapshot["pricing_known"] else "cost n/a" + else: + cost_label = None if width < 52: return [ @@ -1314,17 +1335,23 @@ class HermesCLI: percent = snapshot["context_percent"] percent_label = f"{percent}%" if percent is not None else "--" if width < 76: - return [ + frags = [ ("class:status-bar", " ⚕ "), ("class:status-bar-strong", snapshot["model_short"]), ("class:status-bar-dim", " · "), (self._status_bar_context_style(percent), percent_label), - ("class:status-bar-dim", " · "), - ("class:status-bar-dim", cost_label), + ] + if cost_label: + frags.extend([ + ("class:status-bar-dim", " · "), + ("class:status-bar-dim", cost_label), + ]) + frags.extend([ ("class:status-bar-dim", " · "), ("class:status-bar-dim", duration_label), ("class:status-bar", " "), - ] + ]) + return frags if snapshot["context_length"]: ctx_total = _format_context_length(snapshot["context_length"]) @@ -1334,7 +1361,7 @@ class HermesCLI: context_label = "ctx --" bar_style = self._status_bar_context_style(percent) - return [ + frags = [ ("class:status-bar", " ⚕ "), ("class:status-bar-strong", snapshot["model_short"]), ("class:status-bar-dim", " │ "), @@ -1343,12 +1370,18 @@ class HermesCLI: (bar_style, self._build_context_bar(percent)), ("class:status-bar-dim", " "), (bar_style, percent_label), - ("class:status-bar-dim", " │ "), - ("class:status-bar-dim", cost_label), + ] + if cost_label: + frags.extend([ + ("class:status-bar-dim", " │ "), + ("class:status-bar-dim", cost_label), + ]) + frags.extend([ ("class:status-bar-dim", " │ "), ("class:status-bar-dim", duration_label), ("class:status-bar", " "), - ] + ]) + return frags except Exception: return [("class:status-bar", f" {self._build_status_bar_text()} ")] diff --git a/hermes_cli/config.py b/hermes_cli/config.py index d7f47c49a..ee5e54470 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -211,6 +211,7 @@ DEFAULT_CONFIG = { "resume_display": "full", "bell_on_complete": False, "show_reasoning": False, + "show_cost": False, # Show $ cost in the status bar (off by default) "skin": "default", }, diff --git a/tests/test_cli_status_bar.py b/tests/test_cli_status_bar.py index c5225ce91..4e281ffa8 100644 --- a/tests/test_cli_status_bar.py +++ b/tests/test_cli_status_bar.py @@ -65,24 +65,39 @@ class TestCLIStatusBar: assert "claude-sonnet-4-20250514" in text assert "12.4K/200K" in text assert "6%" in text - assert "$0.06" in text + assert "$0.06" not in text # cost hidden by default assert "15m" in text + def test_build_status_bar_text_shows_cost_when_enabled(self): + cli_obj = _attach_agent( + _make_cli(), + prompt_tokens=10000, + completion_tokens=2400, + total_tokens=12400, + api_calls=7, + context_tokens=12400, + context_length=200_000, + ) + cli_obj.show_cost = True + + text = cli_obj._build_status_bar_text(width=120) + assert "$" in text # cost is shown when enabled + def test_build_status_bar_text_collapses_for_narrow_terminal(self): cli_obj = _attach_agent( _make_cli(), - prompt_tokens=10_230, - completion_tokens=2_220, - total_tokens=12_450, + prompt_tokens=10000, + completion_tokens=2400, + total_tokens=12400, api_calls=7, - context_tokens=12_450, + context_tokens=12400, context_length=200_000, ) text = cli_obj._build_status_bar_text(width=60) assert "⚕" in text - assert "$0.06" in text + assert "$0.06" not in text # cost hidden by default assert "15m" in text assert "200K" not in text From 1ecfe68675aa81f3e728c8099ef2b2b3e5b18e81 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 06:52:32 -0700 Subject: [PATCH 25/29] feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: improve memory prioritization — user preferences over procedural knowledge Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493) which focus memory writes on user preferences and recurring patterns rather than procedural task details. Key insight: 'Optimize for reducing future user steering — the most valuable memory prevents the user from having to repeat themselves.' Changes: - MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy and the core principle about reducing user steering - MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put corrections first, added explicit PRIORITY guidance - Memory nudge (run_agent.py): now asks specifically about preferences, corrections, and workflow patterns instead of generic 'anything' - Memory flush (run_agent.py): now instructs to prioritize user preferences and corrections over task-specific details * feat: more aggressive skill creation and update prompting Press harder on skill updates — the agent should proactively patch skills when it encounters issues during use, not wait to be asked. Changes: - SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction to patch skills immediately when found outdated/wrong - Skills header: added instruction to update loaded skills before finishing if they had missing steps or wrong commands - Skill nudge: more assertive ('save the approach' not 'consider saving'), now also prompts for updating existing skills used in the task - Skill nudge interval: lowered default from 15 to 10 iterations - skill_manage schema: added 'patch it immediately' to update triggers --- agent/prompt_builder.py | 22 +++++++++++++++++----- run_agent.py | 15 +++++++++------ tools/memory_tool.py | 4 +++- tools/skill_manager_tool.py | 3 ++- 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 06d636320..b71a96293 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -73,9 +73,15 @@ DEFAULT_AGENT_IDENTITY = ( MEMORY_GUIDANCE = ( "You have persistent memory across sessions. Save durable facts using the memory " "tool: user preferences, environment details, tool quirks, and stable conventions. " - "Memory is injected into every turn, so keep it compact. Do NOT save task progress, " - "session outcomes, or completed-work logs to memory; use session_search to recall " - "those from past transcripts." + "Memory is injected into every turn, so keep it compact and focused on facts that " + "will still matter later.\n" + "Prioritize what reduces future user steering — the most valuable memory is one " + "that prevents the user from having to correct or remind you again. " + "User preferences and recurring corrections matter more than procedural task details.\n" + "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO " + "state to memory; use session_search to recall those from past transcripts. " + "If you've discovered a new way to do something, solved a problem that could be " + "necessary later, save it as a skill with the skill tool." ) SESSION_SEARCH_GUIDANCE = ( @@ -86,8 +92,11 @@ SESSION_SEARCH_GUIDANCE = ( SKILLS_GUIDANCE = ( "After completing a complex task (5+ tool calls), fixing a tricky error, " - "or discovering a non-trivial workflow, consider saving the approach as a " - "skill with skill_manage so you can reuse it next time." + "or discovering a non-trivial workflow, save the approach as a " + "skill with skill_manage so you can reuse it next time.\n" + "When using a skill and finding it outdated, incomplete, or wrong, " + "patch it immediately with skill_manage(action='patch') — don't wait to be asked. " + "Skills that aren't maintained become liabilities." ) PLATFORM_HINTS = { @@ -326,6 +335,9 @@ def build_skills_system_prompt( "Before replying, scan the skills below. If one clearly matches your task, " "load it with skill_view(name) and follow its instructions. " "If a skill has issues, fix it with skill_manage(action='patch').\n" + "After difficult/iterative tasks, offer to save as a skill. " + "If a skill you loaded was missing steps, had wrong commands, or needed " + "pitfalls you discovered, update it before finishing.\n" "\n" "\n" + "\n".join(index_lines) + "\n" diff --git a/run_agent.py b/run_agent.py index 8a4147a8b..681efcb65 100644 --- a/run_agent.py +++ b/run_agent.py @@ -812,7 +812,7 @@ class AIAgent: logger.debug("peer %s memory_mode=honcho: local USER.md writes disabled", _hcfg.peer_name or "user") # Skills config: nudge interval for skill creation reminders - self._skill_nudge_interval = 15 + self._skill_nudge_interval = 10 try: from hermes_cli.config import load_config as _load_skills_config skills_config = _load_skills_config().get("skills", {}) @@ -3542,7 +3542,8 @@ class AIAgent: flush_content = ( "[System: The session is being compressed. " - "Please save anything worth remembering to your memories.]" + "Save anything worth remembering — prioritize user preferences, " + "corrections, and recurring patterns over task-specific details.]" ) _sentinel = f"__flush_{id(self)}_{time.monotonic()}" flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel} @@ -4541,8 +4542,9 @@ class AIAgent: self._turns_since_memory += 1 if self._turns_since_memory >= self._memory_nudge_interval: user_message += ( - "\n\n[System: You've had several exchanges in this session. " - "Consider whether there's anything worth saving to your memories.]" + "\n\n[System: You've had several exchanges. Consider: " + "has the user shared preferences, corrected you, or revealed " + "something about their workflow worth remembering for future sessions?]" ) self._turns_since_memory = 0 @@ -4552,8 +4554,9 @@ class AIAgent: and self._iters_since_skill >= self._skill_nudge_interval and "skill_manage" in self.valid_tool_names): user_message += ( - "\n\n[System: The previous task involved many steps. " - "If you discovered a reusable workflow, consider saving it as a skill.]" + "\n\n[System: The previous task involved many tool calls. " + "Save the approach as a skill if it's reusable, or update " + "any existing skill you used if it was wrong or incomplete.]" ) self._iters_since_skill = 0 diff --git a/tools/memory_tool.py b/tools/memory_tool.py index f77e8116b..d7950d38c 100644 --- a/tools/memory_tool.py +++ b/tools/memory_tool.py @@ -439,11 +439,13 @@ MEMORY_SCHEMA = { "Memory is injected into future turns, so keep it compact and focused on facts " "that will still matter later.\n\n" "WHEN TO SAVE (do this proactively, don't wait to be asked):\n" + "- User corrects you or says 'remember this' / 'don't do that again'\n" "- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n" "- You discover something about the environment (OS, installed tools, project structure)\n" - "- User corrects you or says 'remember this' / 'don't do that again'\n" "- You learn a convention, API quirk, or workflow specific to this user's setup\n" "- You identify a stable fact that will be useful again in future sessions\n\n" + "PRIORITY: User preferences and corrections > environment facts > procedural knowledge. " + "The most valuable memory prevents the user from having to repeat themselves.\n\n" "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO " "state to memory; use session_search to recall those from past transcripts.\n" "If you've discovered a new way to do something, solved a problem that could be " diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index 86d04e635..203afe499 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -561,7 +561,8 @@ SKILL_MANAGE_SCHEMA = { "user-corrected approach worked, non-trivial workflow discovered, " "or user asks you to remember a procedure.\n" "Update when: instructions stale/wrong, OS-specific failures, " - "missing steps or pitfalls found during use.\n\n" + "missing steps or pitfalls found during use. " + "If you used a skill and hit issues not covered by it, patch it immediately.\n\n" "After difficult/iterative tasks, offer to save as a skill. " "Skip for simple one-offs. Confirm with user before creating/deleting.\n\n" "Good skills: trigger conditions, numbered steps with exact commands, " From 73f39a77614e1f72782f152803c547d168ee4420 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 07:05:48 -0700 Subject: [PATCH 26/29] feat(browser): auto-launch Chrome when /browser connect finds no debugger When /browser connect detects that port 9222 isn't open, it now: 1. Finds Chrome/Chromium/Brave/Edge on the system (macOS app bundles or Linux PATH lookup) 2. Launches it with --remote-debugging-port=9222 (detached) 3. Waits up to 5 seconds for the port to come up 4. Falls back to manual instructions if auto-launch fails This means GUI-only users can just type /browser connect without needing to know about terminal flags or Chrome launch commands. --- cli.py | 115 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 93 insertions(+), 22 deletions(-) diff --git a/cli.py b/cli.py index 8116f5fd0..85566756a 100755 --- a/cli.py +++ b/cli.py @@ -3455,6 +3455,49 @@ class HermesCLI: self._background_tasks[task_id] = thread thread.start() + @staticmethod + def _try_launch_chrome_debug(port: int, system: str) -> bool: + """Try to launch Chrome/Chromium with remote debugging enabled. + + Returns True if a launch command was executed (doesn't guarantee success). + """ + import shutil + import subprocess as _sp + + candidates = [] + if system == "Darwin": + # macOS: try common app bundle locations + for app in ( + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Chromium.app/Contents/MacOS/Chromium", + "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser", + "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge", + ): + if os.path.isfile(app): + candidates.append(app) + else: + # Linux: try common binary names + for name in ("google-chrome", "google-chrome-stable", "chromium-browser", + "chromium", "brave-browser", "microsoft-edge"): + path = shutil.which(name) + if path: + candidates.append(path) + + if not candidates: + return False + + chrome = candidates[0] + try: + _sp.Popen( + [chrome, f"--remote-debugging-port={port}"], + stdout=_sp.DEVNULL, + stderr=_sp.DEVNULL, + start_new_session=True, # detach from terminal + ) + return True + except Exception: + return False + def _handle_browser_command(self, cmd: str): """Handle /browser connect|disconnect|status — manage live Chrome CDP connection.""" import platform as _plat @@ -3471,8 +3514,6 @@ class HermesCLI: connect_parts = cmd.strip().split(None, 2) # ["/browser", "connect", "ws://..."] cdp_url = connect_parts[2].strip() if len(connect_parts) > 2 else _DEFAULT_CDP - os.environ["BROWSER_CDP_URL"] = cdp_url - # Clear any existing browser sessions so the next tool call uses the new backend try: from tools.browser_tool import cleanup_all_browsers @@ -3481,38 +3522,68 @@ class HermesCLI: pass print() - print("🌐 Browser connected to live Chrome via CDP") - print(f" Endpoint: {cdp_url}") - print() - # Platform-specific launch instructions - sys_name = _plat.system() - if sys_name == "Darwin": - chrome_cmd = '/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222' - elif sys_name == "Windows": - chrome_cmd = 'chrome.exe --remote-debugging-port=9222' - else: - chrome_cmd = "google-chrome --remote-debugging-port=9222" - - print(" If Chrome isn't running with remote debugging yet:") - print(f" $ {chrome_cmd}") - print() - - # Quick connectivity test + # Extract port for connectivity checks _port = 9222 try: _port = int(cdp_url.rsplit(":", 1)[-1].split("/")[0]) except (ValueError, IndexError): pass + + # Check if Chrome is already listening on the debug port + import socket + _already_open = False try: - import socket s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(1) s.connect(("127.0.0.1", _port)) s.close() - print(f" ✓ Port {_port} is open — Chrome is reachable") + _already_open = True except (OSError, socket.timeout): - print(f" ⚠ Port {_port} is not open — launch Chrome with the command above first") + pass + + if _already_open: + print(f" ✓ Chrome is already listening on port {_port}") + elif cdp_url == _DEFAULT_CDP: + # Try to auto-launch Chrome with remote debugging + print(" Chrome isn't running with remote debugging — attempting to launch...") + _launched = self._try_launch_chrome_debug(_port, _plat.system()) + if _launched: + # Wait for the port to come up + import time as _time + for _wait in range(10): + try: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(1) + s.connect(("127.0.0.1", _port)) + s.close() + _already_open = True + break + except (OSError, socket.timeout): + _time.sleep(0.5) + if _already_open: + print(f" ✓ Chrome launched and listening on port {_port}") + else: + print(f" ⚠ Chrome launched but port {_port} isn't responding yet") + print(" You may need to close existing Chrome windows first and retry") + else: + print(f" ⚠ Could not auto-launch Chrome") + # Show manual instructions as fallback + sys_name = _plat.system() + if sys_name == "Darwin": + chrome_cmd = 'open -a "Google Chrome" --args --remote-debugging-port=9222' + elif sys_name == "Windows": + chrome_cmd = 'chrome.exe --remote-debugging-port=9222' + else: + chrome_cmd = "google-chrome --remote-debugging-port=9222" + print(f" Launch Chrome manually: {chrome_cmd}") + else: + print(f" ⚠ Port {_port} is not reachable at {cdp_url}") + + os.environ["BROWSER_CDP_URL"] = cdp_url + print() + print("🌐 Browser connected to live Chrome via CDP") + print(f" Endpoint: {cdp_url}") print() # Inject context message so the model knows From 97990e7ad55dab24260408c0dda666aaa3cbbf56 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 07:17:36 -0700 Subject: [PATCH 27/29] feat: first-class plugin architecture (#1555) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plugin system for extending Hermes with custom tools, hooks, and integrations — no source code changes required. Core system (hermes_cli/plugins.py): - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and pip entry_points (hermes_agent.plugins group) - PluginContext with register_tool() and register_hook() - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call, on_session_start/end - Namespace package handling for relative imports in plugins - Graceful error isolation — broken plugins never crash the agent Integration (model_tools.py): - Plugin discovery runs after built-in + MCP tools - Plugin tools bypass toolset filter via get_plugin_tool_names() - Pre/post tool call hooks fire in handle_function_call() CLI: - /plugins command shows loaded plugins, tool counts, status - Added to COMMANDS dict for autocomplete Docs: - Getting started guide (build-a-hermes-plugin.md) — full tutorial building a calculator plugin step by step - Reference page (features/plugins.md) — quick overview + tables - Covers: file structure, schemas, handlers, hooks, data files, bundled skills, env var gating, pip distribution, common mistakes Tests: 16 tests covering discovery, loading, hooks, tool visibility. --- cli.py | 21 + hermes_cli/commands.py | 1 + hermes_cli/plugins.py | 449 +++++++++++++++++++ model_tools.py | 47 +- tests/conftest.py | 6 + tests/hermes_cli/test_commands.py | 1 + tests/test_plugins.py | 340 ++++++++++++++ website/docs/guides/build-a-hermes-plugin.md | 438 ++++++++++++++++++ website/docs/user-guide/features/plugins.md | 62 +++ 9 files changed, 1357 insertions(+), 8 deletions(-) create mode 100644 hermes_cli/plugins.py create mode 100644 tests/test_plugins.py create mode 100644 website/docs/guides/build-a-hermes-plugin.md create mode 100644 website/docs/user-guide/features/plugins.md diff --git a/cli.py b/cli.py index 1b975ac29..2e26708f6 100755 --- a/cli.py +++ b/cli.py @@ -3294,6 +3294,27 @@ class HermesCLI: elif cmd_lower == "/reload-mcp": with self._busy_command(self._slow_command_status(cmd_original)): self._reload_mcp() + elif cmd_lower == "/plugins": + try: + from hermes_cli.plugins import get_plugin_manager + mgr = get_plugin_manager() + plugins = mgr.list_plugins() + if not plugins: + print("No plugins installed.") + print(f"Drop plugin directories into ~/.hermes/plugins/ to get started.") + else: + print(f"Plugins ({len(plugins)}):") + for p in plugins: + status = "✓" if p["enabled"] else "✗" + version = f" v{p['version']}" if p["version"] else "" + tools = f"{p['tools']} tools" if p["tools"] else "" + hooks = f"{p['hooks']} hooks" if p["hooks"] else "" + parts = [x for x in [tools, hooks] if x] + detail = f" ({', '.join(parts)})" if parts else "" + error = f" — {p['error']}" if p["error"] else "" + print(f" {status} {p['name']}{version}{detail}{error}") + except Exception as e: + print(f"Plugin system error: {e}") elif cmd_lower.startswith("/rollback"): self._handle_rollback_command(cmd_original) elif cmd_lower == "/stop": diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index ecfdaba05..6d6a7d18b 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -48,6 +48,7 @@ COMMANDS_BY_CATEGORY = { "/skills": "Search, install, inspect, or manage skills from online registries", "/cron": "Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove)", "/reload-mcp": "Reload MCP servers from config.yaml", + "/plugins": "List installed plugins and their status", }, "Info": { "/help": "Show this help message", diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py new file mode 100644 index 000000000..30fb28d1c --- /dev/null +++ b/hermes_cli/plugins.py @@ -0,0 +1,449 @@ +""" +Hermes Plugin System +==================== + +Discovers, loads, and manages plugins from three sources: + +1. **User plugins** – ``~/.hermes/plugins//`` +2. **Project plugins** – ``./.hermes/plugins//`` +3. **Pip plugins** – packages that expose the ``hermes_agent.plugins`` + entry-point group. + +Each directory plugin must contain a ``plugin.yaml`` manifest **and** an +``__init__.py`` with a ``register(ctx)`` function. + +Lifecycle hooks +--------------- +Plugins may register callbacks for any of the hooks in ``VALID_HOOKS``. +The agent core calls ``invoke_hook(name, **kwargs)`` at the appropriate +points. + +Tool registration +----------------- +``PluginContext.register_tool()`` delegates to ``tools.registry.register()`` +so plugin-defined tools appear alongside the built-in tools. +""" + +from __future__ import annotations + +import importlib +import importlib.metadata +import importlib.util +import logging +import os +import sys +import types +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Set + +try: + import yaml +except ImportError: # pragma: no cover – yaml is optional at import time + yaml = None # type: ignore[assignment] + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +VALID_HOOKS: Set[str] = { + "pre_tool_call", + "post_tool_call", + "pre_llm_call", + "post_llm_call", + "on_session_start", + "on_session_end", +} + +ENTRY_POINTS_GROUP = "hermes_agent.plugins" + +_NS_PARENT = "hermes_plugins" + + +# --------------------------------------------------------------------------- +# Data classes +# --------------------------------------------------------------------------- + +@dataclass +class PluginManifest: + """Parsed representation of a plugin.yaml manifest.""" + + name: str + version: str = "" + description: str = "" + author: str = "" + requires_env: List[str] = field(default_factory=list) + provides_tools: List[str] = field(default_factory=list) + provides_hooks: List[str] = field(default_factory=list) + source: str = "" # "user", "project", or "entrypoint" + path: Optional[str] = None + + +@dataclass +class LoadedPlugin: + """Runtime state for a single loaded plugin.""" + + manifest: PluginManifest + module: Optional[types.ModuleType] = None + tools_registered: List[str] = field(default_factory=list) + hooks_registered: List[str] = field(default_factory=list) + enabled: bool = False + error: Optional[str] = None + + +# --------------------------------------------------------------------------- +# PluginContext – handed to each plugin's ``register()`` function +# --------------------------------------------------------------------------- + +class PluginContext: + """Facade given to plugins so they can register tools and hooks.""" + + def __init__(self, manifest: PluginManifest, manager: "PluginManager"): + self.manifest = manifest + self._manager = manager + + # -- tool registration -------------------------------------------------- + + def register_tool( + self, + name: str, + toolset: str, + schema: dict, + handler: Callable, + check_fn: Callable | None = None, + requires_env: list | None = None, + is_async: bool = False, + description: str = "", + emoji: str = "", + ) -> None: + """Register a tool in the global registry **and** track it as plugin-provided.""" + from tools.registry import registry + + registry.register( + name=name, + toolset=toolset, + schema=schema, + handler=handler, + check_fn=check_fn, + requires_env=requires_env, + is_async=is_async, + description=description, + emoji=emoji, + ) + self._manager._plugin_tool_names.add(name) + logger.debug("Plugin %s registered tool: %s", self.manifest.name, name) + + # -- hook registration -------------------------------------------------- + + def register_hook(self, hook_name: str, callback: Callable) -> None: + """Register a lifecycle hook callback. + + Unknown hook names produce a warning but are still stored so + forward-compatible plugins don't break. + """ + if hook_name not in VALID_HOOKS: + logger.warning( + "Plugin '%s' registered unknown hook '%s' " + "(valid: %s)", + self.manifest.name, + hook_name, + ", ".join(sorted(VALID_HOOKS)), + ) + self._manager._hooks.setdefault(hook_name, []).append(callback) + logger.debug("Plugin %s registered hook: %s", self.manifest.name, hook_name) + + +# --------------------------------------------------------------------------- +# PluginManager +# --------------------------------------------------------------------------- + +class PluginManager: + """Central manager that discovers, loads, and invokes plugins.""" + + def __init__(self) -> None: + self._plugins: Dict[str, LoadedPlugin] = {} + self._hooks: Dict[str, List[Callable]] = {} + self._plugin_tool_names: Set[str] = set() + self._discovered: bool = False + + # ----------------------------------------------------------------------- + # Public + # ----------------------------------------------------------------------- + + def discover_and_load(self) -> None: + """Scan all plugin sources and load each plugin found.""" + if self._discovered: + return + self._discovered = True + + manifests: List[PluginManifest] = [] + + # 1. User plugins (~/.hermes/plugins/) + hermes_home = os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")) + user_dir = Path(hermes_home) / "plugins" + manifests.extend(self._scan_directory(user_dir, source="user")) + + # 2. Project plugins (./.hermes/plugins/) + project_dir = Path.cwd() / ".hermes" / "plugins" + manifests.extend(self._scan_directory(project_dir, source="project")) + + # 3. Pip / entry-point plugins + manifests.extend(self._scan_entry_points()) + + # Load each manifest + for manifest in manifests: + self._load_plugin(manifest) + + if manifests: + logger.info( + "Plugin discovery complete: %d found, %d enabled", + len(self._plugins), + sum(1 for p in self._plugins.values() if p.enabled), + ) + + # ----------------------------------------------------------------------- + # Directory scanning + # ----------------------------------------------------------------------- + + def _scan_directory(self, path: Path, source: str) -> List[PluginManifest]: + """Read ``plugin.yaml`` manifests from subdirectories of *path*.""" + manifests: List[PluginManifest] = [] + if not path.is_dir(): + return manifests + + for child in sorted(path.iterdir()): + if not child.is_dir(): + continue + manifest_file = child / "plugin.yaml" + if not manifest_file.exists(): + manifest_file = child / "plugin.yml" + if not manifest_file.exists(): + logger.debug("Skipping %s (no plugin.yaml)", child) + continue + + try: + if yaml is None: + logger.warning("PyYAML not installed – cannot load %s", manifest_file) + continue + data = yaml.safe_load(manifest_file.read_text()) or {} + manifest = PluginManifest( + name=data.get("name", child.name), + version=str(data.get("version", "")), + description=data.get("description", ""), + author=data.get("author", ""), + requires_env=data.get("requires_env", []), + provides_tools=data.get("provides_tools", []), + provides_hooks=data.get("provides_hooks", []), + source=source, + path=str(child), + ) + manifests.append(manifest) + except Exception as exc: + logger.warning("Failed to parse %s: %s", manifest_file, exc) + + return manifests + + # ----------------------------------------------------------------------- + # Entry-point scanning + # ----------------------------------------------------------------------- + + def _scan_entry_points(self) -> List[PluginManifest]: + """Check ``importlib.metadata`` for pip-installed plugins.""" + manifests: List[PluginManifest] = [] + try: + eps = importlib.metadata.entry_points() + # Python 3.12+ returns a SelectableGroups; earlier returns dict + if hasattr(eps, "select"): + group_eps = eps.select(group=ENTRY_POINTS_GROUP) + elif isinstance(eps, dict): + group_eps = eps.get(ENTRY_POINTS_GROUP, []) + else: + group_eps = [ep for ep in eps if ep.group == ENTRY_POINTS_GROUP] + + for ep in group_eps: + manifest = PluginManifest( + name=ep.name, + source="entrypoint", + path=ep.value, + ) + manifests.append(manifest) + except Exception as exc: + logger.debug("Entry-point scan failed: %s", exc) + + return manifests + + # ----------------------------------------------------------------------- + # Loading + # ----------------------------------------------------------------------- + + def _load_plugin(self, manifest: PluginManifest) -> None: + """Import a plugin module and call its ``register(ctx)`` function.""" + loaded = LoadedPlugin(manifest=manifest) + + try: + if manifest.source in ("user", "project"): + module = self._load_directory_module(manifest) + else: + module = self._load_entrypoint_module(manifest) + + loaded.module = module + + # Call register() + register_fn = getattr(module, "register", None) + if register_fn is None: + loaded.error = "no register() function" + logger.warning("Plugin '%s' has no register() function", manifest.name) + else: + ctx = PluginContext(manifest, self) + register_fn(ctx) + loaded.tools_registered = [ + t for t in self._plugin_tool_names + if t not in { + n + for name, p in self._plugins.items() + for n in p.tools_registered + } + ] + loaded.hooks_registered = list( + { + h + for h, cbs in self._hooks.items() + if cbs # non-empty + } + - { + h + for name, p in self._plugins.items() + for h in p.hooks_registered + } + ) + loaded.enabled = True + + except Exception as exc: + loaded.error = str(exc) + logger.warning("Failed to load plugin '%s': %s", manifest.name, exc) + + self._plugins[manifest.name] = loaded + + def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType: + """Import a directory-based plugin as ``hermes_plugins.``.""" + plugin_dir = Path(manifest.path) # type: ignore[arg-type] + init_file = plugin_dir / "__init__.py" + if not init_file.exists(): + raise FileNotFoundError(f"No __init__.py in {plugin_dir}") + + # Ensure the namespace parent package exists + if _NS_PARENT not in sys.modules: + ns_pkg = types.ModuleType(_NS_PARENT) + ns_pkg.__path__ = [] # type: ignore[attr-defined] + ns_pkg.__package__ = _NS_PARENT + sys.modules[_NS_PARENT] = ns_pkg + + module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}" + spec = importlib.util.spec_from_file_location( + module_name, + init_file, + submodule_search_locations=[str(plugin_dir)], + ) + if spec is None or spec.loader is None: + raise ImportError(f"Cannot create module spec for {init_file}") + + module = importlib.util.module_from_spec(spec) + module.__package__ = module_name + module.__path__ = [str(plugin_dir)] # type: ignore[attr-defined] + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + def _load_entrypoint_module(self, manifest: PluginManifest) -> types.ModuleType: + """Load a pip-installed plugin via its entry-point reference.""" + eps = importlib.metadata.entry_points() + if hasattr(eps, "select"): + group_eps = eps.select(group=ENTRY_POINTS_GROUP) + elif isinstance(eps, dict): + group_eps = eps.get(ENTRY_POINTS_GROUP, []) + else: + group_eps = [ep for ep in eps if ep.group == ENTRY_POINTS_GROUP] + + for ep in group_eps: + if ep.name == manifest.name: + return ep.load() + + raise ImportError( + f"Entry point '{manifest.name}' not found in group '{ENTRY_POINTS_GROUP}'" + ) + + # ----------------------------------------------------------------------- + # Hook invocation + # ----------------------------------------------------------------------- + + def invoke_hook(self, hook_name: str, **kwargs: Any) -> None: + """Call all registered callbacks for *hook_name*. + + Each callback is wrapped in its own try/except so a misbehaving + plugin cannot break the core agent loop. + """ + callbacks = self._hooks.get(hook_name, []) + for cb in callbacks: + try: + cb(**kwargs) + except Exception as exc: + logger.warning( + "Hook '%s' callback %s raised: %s", + hook_name, + getattr(cb, "__name__", repr(cb)), + exc, + ) + + # ----------------------------------------------------------------------- + # Introspection + # ----------------------------------------------------------------------- + + def list_plugins(self) -> List[Dict[str, Any]]: + """Return a list of info dicts for all discovered plugins.""" + result: List[Dict[str, Any]] = [] + for name, loaded in sorted(self._plugins.items()): + result.append( + { + "name": name, + "version": loaded.manifest.version, + "description": loaded.manifest.description, + "source": loaded.manifest.source, + "enabled": loaded.enabled, + "tools": len(loaded.tools_registered), + "hooks": len(loaded.hooks_registered), + "error": loaded.error, + } + ) + return result + + +# --------------------------------------------------------------------------- +# Module-level singleton & convenience functions +# --------------------------------------------------------------------------- + +_plugin_manager: Optional[PluginManager] = None + + +def get_plugin_manager() -> PluginManager: + """Return (and lazily create) the global PluginManager singleton.""" + global _plugin_manager + if _plugin_manager is None: + _plugin_manager = PluginManager() + return _plugin_manager + + +def discover_plugins() -> None: + """Discover and load all plugins (idempotent).""" + get_plugin_manager().discover_and_load() + + +def invoke_hook(hook_name: str, **kwargs: Any) -> None: + """Invoke a lifecycle hook on all loaded plugins.""" + get_plugin_manager().invoke_hook(hook_name, **kwargs) + + +def get_plugin_tool_names() -> Set[str]: + """Return the set of tool names registered by plugins.""" + return get_plugin_manager()._plugin_tool_names diff --git a/model_tools.py b/model_tools.py index be1f5d02f..f95ecddef 100644 --- a/model_tools.py +++ b/model_tools.py @@ -113,6 +113,13 @@ try: except Exception as e: logger.debug("MCP tool discovery failed: %s", e) +# Plugin tool discovery (user/project/pip plugins) +try: + from hermes_cli.plugins import discover_plugins + discover_plugins() +except Exception as e: + logger.debug("Plugin discovery failed: %s", e) + # ============================================================================= # Backward-compat constants (built once after discovery) @@ -222,6 +229,16 @@ def get_tool_definitions( for ts_name in get_all_toolsets(): tools_to_include.update(resolve_toolset(ts_name)) + # Always include plugin-registered tools — they bypass the toolset filter + # because their toolsets are dynamic (created at plugin load time). + try: + from hermes_cli.plugins import get_plugin_tool_names + plugin_tools = get_plugin_tool_names() + if plugin_tools: + tools_to_include.update(plugin_tools) + except Exception: + pass + # Ask the registry for schemas (only returns tools whose check_fn passes) filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode) @@ -300,25 +317,39 @@ def handle_function_call( if function_name in _AGENT_LOOP_TOOLS: return json.dumps({"error": f"{function_name} must be handled by the agent loop"}) + try: + from hermes_cli.plugins import invoke_hook + invoke_hook("pre_tool_call", tool_name=function_name, args=function_args, task_id=task_id or "") + except Exception: + pass + if function_name == "execute_code": # Prefer the caller-provided list so subagents can't overwrite # the parent's tool set via the process-global. sandbox_enabled = enabled_tools if enabled_tools is not None else _last_resolved_tool_names - return registry.dispatch( + result = registry.dispatch( function_name, function_args, task_id=task_id, enabled_tools=sandbox_enabled, honcho_manager=honcho_manager, honcho_session_key=honcho_session_key, ) + else: + result = registry.dispatch( + function_name, function_args, + task_id=task_id, + user_task=user_task, + honcho_manager=honcho_manager, + honcho_session_key=honcho_session_key, + ) - return registry.dispatch( - function_name, function_args, - task_id=task_id, - user_task=user_task, - honcho_manager=honcho_manager, - honcho_session_key=honcho_session_key, - ) + try: + from hermes_cli.plugins import invoke_hook + invoke_hook("post_tool_call", tool_name=function_name, args=function_args, result=result, task_id=task_id or "") + except Exception: + pass + + return result except Exception as e: error_msg = f"Error executing {function_name}: {str(e)}" diff --git a/tests/conftest.py b/tests/conftest.py index 67fad819b..af73fb5cb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch): (fake_home / "memories").mkdir() (fake_home / "skills").mkdir() monkeypatch.setenv("HERMES_HOME", str(fake_home)) + # Reset plugin singleton so tests don't leak plugins from ~/.hermes/plugins/ + try: + import hermes_cli.plugins as _plugins_mod + monkeypatch.setattr(_plugins_mod, "_plugin_manager", None) + except Exception: + pass # Tests should not inherit the agent's current gateway/messaging surface. # Individual tests that need gateway behavior set these explicitly. monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False) diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index cb5a863a3..b3bc98cca 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -13,6 +13,7 @@ EXPECTED_COMMANDS = { "/undo", "/save", "/config", "/cron", "/skills", "/platforms", "/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste", "/reload-mcp", "/rollback", "/stop", "/background", "/skin", "/voice", "/quit", + "/plugins", } diff --git a/tests/test_plugins.py b/tests/test_plugins.py new file mode 100644 index 000000000..88e194ef3 --- /dev/null +++ b/tests/test_plugins.py @@ -0,0 +1,340 @@ +"""Tests for the Hermes plugin system (hermes_cli.plugins).""" + +import logging +import os +import sys +import types +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +import yaml + +from hermes_cli.plugins import ( + ENTRY_POINTS_GROUP, + VALID_HOOKS, + LoadedPlugin, + PluginContext, + PluginManager, + PluginManifest, + get_plugin_manager, + get_plugin_tool_names, + discover_plugins, + invoke_hook, +) + + +# ── Helpers ──────────────────────────────────────────────────────────────── + + +def _make_plugin_dir(base: Path, name: str, *, register_body: str = "pass", + manifest_extra: dict | None = None) -> Path: + """Create a minimal plugin directory with plugin.yaml + __init__.py.""" + plugin_dir = base / name + plugin_dir.mkdir(parents=True, exist_ok=True) + + manifest = {"name": name, "version": "0.1.0", "description": f"Test plugin {name}"} + if manifest_extra: + manifest.update(manifest_extra) + + (plugin_dir / "plugin.yaml").write_text(yaml.dump(manifest)) + (plugin_dir / "__init__.py").write_text( + f"def register(ctx):\n {register_body}\n" + ) + return plugin_dir + + +# ── TestPluginDiscovery ──────────────────────────────────────────────────── + + +class TestPluginDiscovery: + """Tests for plugin discovery from directories and entry points.""" + + def test_discover_user_plugins(self, tmp_path, monkeypatch): + """Plugins in ~/.hermes/plugins/ are discovered.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir(plugins_dir, "hello_plugin") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + assert "hello_plugin" in mgr._plugins + assert mgr._plugins["hello_plugin"].enabled + + def test_discover_project_plugins(self, tmp_path, monkeypatch): + """Plugins in ./.hermes/plugins/ are discovered.""" + project_dir = tmp_path / "project" + project_dir.mkdir() + monkeypatch.chdir(project_dir) + plugins_dir = project_dir / ".hermes" / "plugins" + _make_plugin_dir(plugins_dir, "proj_plugin") + + mgr = PluginManager() + mgr.discover_and_load() + + assert "proj_plugin" in mgr._plugins + assert mgr._plugins["proj_plugin"].enabled + + def test_discover_is_idempotent(self, tmp_path, monkeypatch): + """Calling discover_and_load() twice does not duplicate plugins.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir(plugins_dir, "once_plugin") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + mgr.discover_and_load() # second call should no-op + + assert len(mgr._plugins) == 1 + + def test_discover_skips_dir_without_manifest(self, tmp_path, monkeypatch): + """Directories without plugin.yaml are silently skipped.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + (plugins_dir / "no_manifest").mkdir(parents=True) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + assert len(mgr._plugins) == 0 + + def test_entry_points_scanned(self, tmp_path, monkeypatch): + """Entry-point based plugins are discovered (mocked).""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + fake_module = types.ModuleType("fake_ep_plugin") + fake_module.register = lambda ctx: None # type: ignore[attr-defined] + + fake_ep = MagicMock() + fake_ep.name = "ep_plugin" + fake_ep.value = "fake_ep_plugin:register" + fake_ep.group = ENTRY_POINTS_GROUP + fake_ep.load.return_value = fake_module + + def fake_entry_points(): + result = MagicMock() + result.select = MagicMock(return_value=[fake_ep]) + return result + + with patch("importlib.metadata.entry_points", fake_entry_points): + mgr = PluginManager() + mgr.discover_and_load() + + assert "ep_plugin" in mgr._plugins + + +# ── TestPluginLoading ────────────────────────────────────────────────────── + + +class TestPluginLoading: + """Tests for plugin module loading.""" + + def test_load_missing_init(self, tmp_path, monkeypatch): + """Plugin dir without __init__.py records an error.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + plugin_dir = plugins_dir / "bad_plugin" + plugin_dir.mkdir(parents=True) + (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "bad_plugin"})) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + assert "bad_plugin" in mgr._plugins + assert not mgr._plugins["bad_plugin"].enabled + assert mgr._plugins["bad_plugin"].error is not None + + def test_load_missing_register_fn(self, tmp_path, monkeypatch): + """Plugin without register() function records an error.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + plugin_dir = plugins_dir / "no_reg" + plugin_dir.mkdir(parents=True) + (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "no_reg"})) + (plugin_dir / "__init__.py").write_text("# no register function\n") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + assert "no_reg" in mgr._plugins + assert not mgr._plugins["no_reg"].enabled + assert "no register()" in mgr._plugins["no_reg"].error + + def test_load_registers_namespace_module(self, tmp_path, monkeypatch): + """Directory plugins are importable under hermes_plugins..""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir(plugins_dir, "ns_plugin") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + # Clean up any prior namespace module + sys.modules.pop("hermes_plugins.ns_plugin", None) + + mgr = PluginManager() + mgr.discover_and_load() + + assert "hermes_plugins.ns_plugin" in sys.modules + + +# ── TestPluginHooks ──────────────────────────────────────────────────────── + + +class TestPluginHooks: + """Tests for lifecycle hook registration and invocation.""" + + def test_register_and_invoke_hook(self, tmp_path, monkeypatch): + """Registered hooks are called on invoke_hook().""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir( + plugins_dir, "hook_plugin", + register_body='ctx.register_hook("pre_tool_call", lambda **kw: None)', + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + # Should not raise + mgr.invoke_hook("pre_tool_call", tool_name="test", args={}, task_id="t1") + + def test_hook_exception_does_not_propagate(self, tmp_path, monkeypatch): + """A hook callback that raises does NOT crash the caller.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir( + plugins_dir, "bad_hook", + register_body='ctx.register_hook("post_tool_call", lambda **kw: 1/0)', + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + # Should not raise despite 1/0 + mgr.invoke_hook("post_tool_call", tool_name="x", args={}, result="r", task_id="") + + def test_invalid_hook_name_warns(self, tmp_path, monkeypatch, caplog): + """Registering an unknown hook name logs a warning.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir( + plugins_dir, "warn_plugin", + register_body='ctx.register_hook("on_banana", lambda **kw: None)', + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + with caplog.at_level(logging.WARNING, logger="hermes_cli.plugins"): + mgr = PluginManager() + mgr.discover_and_load() + + assert any("on_banana" in record.message for record in caplog.records) + + +# ── TestPluginContext ────────────────────────────────────────────────────── + + +class TestPluginContext: + """Tests for the PluginContext facade.""" + + def test_register_tool_adds_to_registry(self, tmp_path, monkeypatch): + """PluginContext.register_tool() puts the tool in the global registry.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + plugin_dir = plugins_dir / "tool_plugin" + plugin_dir.mkdir(parents=True) + (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "tool_plugin"})) + (plugin_dir / "__init__.py").write_text( + 'def register(ctx):\n' + ' ctx.register_tool(\n' + ' name="plugin_echo",\n' + ' toolset="plugin_tool_plugin",\n' + ' schema={"name": "plugin_echo", "description": "Echo", "parameters": {"type": "object", "properties": {}}},\n' + ' handler=lambda args, **kw: "echo",\n' + ' )\n' + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + assert "plugin_echo" in mgr._plugin_tool_names + + from tools.registry import registry + assert "plugin_echo" in registry._tools + + +# ── TestPluginToolVisibility ─────────────────────────────────────────────── + + +class TestPluginToolVisibility: + """Plugin-registered tools appear in get_tool_definitions().""" + + def test_plugin_tools_in_definitions(self, tmp_path, monkeypatch): + """Tools from plugins bypass the toolset filter.""" + import hermes_cli.plugins as plugins_mod + + plugins_dir = tmp_path / "hermes_test" / "plugins" + plugin_dir = plugins_dir / "vis_plugin" + plugin_dir.mkdir(parents=True) + (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "vis_plugin"})) + (plugin_dir / "__init__.py").write_text( + 'def register(ctx):\n' + ' ctx.register_tool(\n' + ' name="vis_tool",\n' + ' toolset="plugin_vis_plugin",\n' + ' schema={"name": "vis_tool", "description": "Visible", "parameters": {"type": "object", "properties": {}}},\n' + ' handler=lambda args, **kw: "ok",\n' + ' )\n' + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + monkeypatch.setattr(plugins_mod, "_plugin_manager", mgr) + + from model_tools import get_tool_definitions + tools = get_tool_definitions(enabled_toolsets=["terminal"], quiet_mode=True) + tool_names = [t["function"]["name"] for t in tools] + assert "vis_tool" in tool_names + + +# ── TestPluginManagerList ────────────────────────────────────────────────── + + +class TestPluginManagerList: + """Tests for PluginManager.list_plugins().""" + + def test_list_empty(self): + """Empty manager returns empty list.""" + mgr = PluginManager() + assert mgr.list_plugins() == [] + + def test_list_returns_sorted(self, tmp_path, monkeypatch): + """list_plugins() returns results sorted by name.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir(plugins_dir, "zulu") + _make_plugin_dir(plugins_dir, "alpha") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + listing = mgr.list_plugins() + names = [p["name"] for p in listing] + assert names == sorted(names) + + def test_list_with_plugins(self, tmp_path, monkeypatch): + """list_plugins() returns info dicts for each discovered plugin.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir(plugins_dir, "alpha") + _make_plugin_dir(plugins_dir, "beta") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + listing = mgr.list_plugins() + names = [p["name"] for p in listing] + assert "alpha" in names + assert "beta" in names + for p in listing: + assert "enabled" in p + assert "tools" in p + assert "hooks" in p diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md new file mode 100644 index 000000000..c141f895a --- /dev/null +++ b/website/docs/guides/build-a-hermes-plugin.md @@ -0,0 +1,438 @@ +--- +sidebar_position: 10 +--- + +# Build a Hermes Plugin + +This guide walks through building a complete Hermes plugin from scratch. By the end you'll have a working plugin with multiple tools, lifecycle hooks, shipped data files, and a bundled skill — everything the plugin system supports. + +## What you're building + +A **calculator** plugin with two tools: +- `calculate` — evaluate math expressions (`2**16`, `sqrt(144)`, `pi * 5**2`) +- `unit_convert` — convert between units (`100 F → 37.78 C`, `5 km → 3.11 mi`) + +Plus a hook that logs every tool call, and a bundled skill file. + +## Step 1: Create the plugin directory + +```bash +mkdir -p ~/.hermes/plugins/calculator +cd ~/.hermes/plugins/calculator +``` + +## Step 2: Write the manifest + +Create `plugin.yaml`: + +```yaml +name: calculator +version: 1.0.0 +description: Math calculator — evaluate expressions and convert units +provides: + tools: true + hooks: true +``` + +This tells Hermes: "I'm a plugin called calculator, I provide tools and hooks." That's all the manifest needs. + +Optional fields you could add: +```yaml +author: Your Name +requires_env: # gate loading on env vars + - SOME_API_KEY # plugin disabled if missing +``` + +## Step 3: Write the tool schemas + +Create `schemas.py` — this is what the LLM reads to decide when to call your tools: + +```python +"""Tool schemas — what the LLM sees.""" + +CALCULATE = { + "name": "calculate", + "description": ( + "Evaluate a mathematical expression and return the result. " + "Supports arithmetic (+, -, *, /, **), functions (sqrt, sin, cos, " + "log, abs, round, floor, ceil), and constants (pi, e). " + "Use this for any math the user asks about." + ), + "parameters": { + "type": "object", + "properties": { + "expression": { + "type": "string", + "description": "Math expression to evaluate (e.g., '2**10', 'sqrt(144)')", + }, + }, + "required": ["expression"], + }, +} + +UNIT_CONVERT = { + "name": "unit_convert", + "description": ( + "Convert a value between units. Supports length (m, km, mi, ft, in), " + "weight (kg, lb, oz, g), temperature (C, F, K), data (B, KB, MB, GB, TB), " + "and time (s, min, hr, day)." + ), + "parameters": { + "type": "object", + "properties": { + "value": { + "type": "number", + "description": "The numeric value to convert", + }, + "from_unit": { + "type": "string", + "description": "Source unit (e.g., 'km', 'lb', 'F', 'GB')", + }, + "to_unit": { + "type": "string", + "description": "Target unit (e.g., 'mi', 'kg', 'C', 'MB')", + }, + }, + "required": ["value", "from_unit", "to_unit"], + }, +} +``` + +**Why schemas matter:** The `description` field is how the LLM decides when to use your tool. Be specific about what it does and when to use it. The `parameters` define what arguments the LLM passes. + +## Step 4: Write the tool handlers + +Create `tools.py` — this is the code that actually executes when the LLM calls your tools: + +```python +"""Tool handlers — the code that runs when the LLM calls each tool.""" + +import json +import math + +# Safe globals for expression evaluation — no file/network access +_SAFE_MATH = { + "abs": abs, "round": round, "min": min, "max": max, + "pow": pow, "sqrt": math.sqrt, "sin": math.sin, "cos": math.cos, + "tan": math.tan, "log": math.log, "log2": math.log2, "log10": math.log10, + "floor": math.floor, "ceil": math.ceil, + "pi": math.pi, "e": math.e, + "factorial": math.factorial, +} + + +def calculate(args: dict, **kwargs) -> str: + """Evaluate a math expression safely. + + Rules for handlers: + 1. Receive args (dict) — the parameters the LLM passed + 2. Do the work + 3. Return a JSON string — ALWAYS, even on error + 4. Accept **kwargs for forward compatibility + """ + expression = args.get("expression", "").strip() + if not expression: + return json.dumps({"error": "No expression provided"}) + + try: + result = eval(expression, {"__builtins__": {}}, _SAFE_MATH) + return json.dumps({"expression": expression, "result": result}) + except ZeroDivisionError: + return json.dumps({"expression": expression, "error": "Division by zero"}) + except Exception as e: + return json.dumps({"expression": expression, "error": f"Invalid: {e}"}) + + +# Conversion tables — values are in base units +_LENGTH = {"m": 1, "km": 1000, "mi": 1609.34, "ft": 0.3048, "in": 0.0254, "cm": 0.01} +_WEIGHT = {"kg": 1, "g": 0.001, "lb": 0.453592, "oz": 0.0283495} +_DATA = {"B": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4} +_TIME = {"s": 1, "ms": 0.001, "min": 60, "hr": 3600, "day": 86400} + + +def _convert_temp(value, from_u, to_u): + # Normalize to Celsius + c = {"F": (value - 32) * 5/9, "K": value - 273.15}.get(from_u, value) + # Convert to target + return {"F": c * 9/5 + 32, "K": c + 273.15}.get(to_u, c) + + +def unit_convert(args: dict, **kwargs) -> str: + """Convert between units.""" + value = args.get("value") + from_unit = args.get("from_unit", "").strip() + to_unit = args.get("to_unit", "").strip() + + if value is None or not from_unit or not to_unit: + return json.dumps({"error": "Need value, from_unit, and to_unit"}) + + try: + # Temperature + if from_unit.upper() in {"C","F","K"} and to_unit.upper() in {"C","F","K"}: + result = _convert_temp(float(value), from_unit.upper(), to_unit.upper()) + return json.dumps({"input": f"{value} {from_unit}", "result": round(result, 4), + "output": f"{round(result, 4)} {to_unit}"}) + + # Ratio-based conversions + for table in (_LENGTH, _WEIGHT, _DATA, _TIME): + lc = {k.lower(): v for k, v in table.items()} + if from_unit.lower() in lc and to_unit.lower() in lc: + result = float(value) * lc[from_unit.lower()] / lc[to_unit.lower()] + return json.dumps({"input": f"{value} {from_unit}", + "result": round(result, 6), + "output": f"{round(result, 6)} {to_unit}"}) + + return json.dumps({"error": f"Cannot convert {from_unit} → {to_unit}"}) + except Exception as e: + return json.dumps({"error": f"Conversion failed: {e}"}) +``` + +**Key rules for handlers:** +1. **Signature:** `def my_handler(args: dict, **kwargs) -> str` +2. **Return:** Always a JSON string. Success and errors alike. +3. **Never raise:** Catch all exceptions, return error JSON instead. +4. **Accept `**kwargs`:** Hermes may pass additional context in the future. + +## Step 5: Write the registration + +Create `__init__.py` — this wires schemas to handlers: + +```python +"""Calculator plugin — registration.""" + +import logging + +from . import schemas, tools + +logger = logging.getLogger(__name__) + +# Track tool usage via hooks +_call_log = [] + +def _on_post_tool_call(tool_name, args, result, task_id, **kwargs): + """Hook: runs after every tool call (not just ours).""" + _call_log.append({"tool": tool_name, "session": task_id}) + if len(_call_log) > 100: + _call_log.pop(0) + logger.debug("Tool called: %s (session %s)", tool_name, task_id) + + +def register(ctx): + """Wire schemas to handlers and register hooks.""" + ctx.register_tool(name="calculate", toolset="calculator", + schema=schemas.CALCULATE, handler=tools.calculate) + ctx.register_tool(name="unit_convert", toolset="calculator", + schema=schemas.UNIT_CONVERT, handler=tools.unit_convert) + + # This hook fires for ALL tool calls, not just ours + ctx.register_hook("post_tool_call", _on_post_tool_call) +``` + +**What `register()` does:** +- Called exactly once at startup +- `ctx.register_tool()` puts your tool in the registry — the model sees it immediately +- `ctx.register_hook()` subscribes to lifecycle events +- If this function crashes, the plugin is disabled but Hermes continues fine + +## Step 6: Test it + +Start Hermes: + +```bash +hermes +``` + +You should see `calculator: calculate, unit_convert` in the banner's tool list. + +Try these prompts: +``` +What's 2 to the power of 16? +Convert 100 fahrenheit to celsius +What's the square root of 2 times pi? +How many gigabytes is 1.5 terabytes? +``` + +Check plugin status: +``` +/plugins +``` + +Output: +``` +Plugins (1): + ✓ calculator v1.0.0 (2 tools, 1 hooks) +``` + +## Your plugin's final structure + +``` +~/.hermes/plugins/calculator/ +├── plugin.yaml # "I'm calculator, I provide tools and hooks" +├── __init__.py # Wiring: schemas → handlers, register hooks +├── schemas.py # What the LLM reads (descriptions + parameter specs) +└── tools.py # What runs (calculate, unit_convert functions) +``` + +Four files, clear separation: +- **Manifest** declares what the plugin is +- **Schemas** describe tools for the LLM +- **Handlers** implement the actual logic +- **Registration** connects everything + +## What else can plugins do? + +### Ship data files + +Put any files in your plugin directory and read them at import time: + +```python +# In tools.py or __init__.py +from pathlib import Path + +_PLUGIN_DIR = Path(__file__).parent +_DATA_FILE = _PLUGIN_DIR / "data" / "languages.yaml" + +with open(_DATA_FILE) as f: + _DATA = yaml.safe_load(f) +``` + +### Bundle a skill + +Include a `skill.md` file and install it during registration: + +```python +import shutil +from pathlib import Path + +def _install_skill(): + """Copy our skill to ~/.hermes/skills/ on first load.""" + try: + from hermes_cli.config import get_hermes_home + dest = get_hermes_home() / "skills" / "my-plugin" / "SKILL.md" + except Exception: + dest = Path.home() / ".hermes" / "skills" / "my-plugin" / "SKILL.md" + + if dest.exists(): + return # don't overwrite user edits + + source = Path(__file__).parent / "skill.md" + if source.exists(): + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source, dest) + +def register(ctx): + ctx.register_tool(...) + _install_skill() +``` + +### Gate on environment variables + +If your plugin needs an API key: + +```yaml +# plugin.yaml +requires_env: + - WEATHER_API_KEY +``` + +If `WEATHER_API_KEY` isn't set, the plugin is disabled with a clear message. No crash, no error in the agent — just "Plugin weather disabled (missing: WEATHER_API_KEY)". + +### Conditional tool availability + +For tools that depend on optional libraries: + +```python +ctx.register_tool( + name="my_tool", + schema={...}, + handler=my_handler, + check_fn=lambda: _has_optional_lib(), # False = tool hidden from model +) +``` + +### Register multiple hooks + +```python +def register(ctx): + ctx.register_hook("pre_tool_call", before_any_tool) + ctx.register_hook("post_tool_call", after_any_tool) + ctx.register_hook("on_session_start", on_new_session) + ctx.register_hook("on_session_end", on_session_end) +``` + +Available hooks: + +| Hook | When | Arguments | +|------|------|-----------| +| `pre_tool_call` | Before any tool runs | `tool_name`, `args`, `task_id` | +| `post_tool_call` | After any tool returns | `tool_name`, `args`, `result`, `task_id` | +| `pre_llm_call` | Before LLM API call | `messages`, `model` | +| `post_llm_call` | After LLM response | `messages`, `response`, `model` | +| `on_session_start` | Session begins | `session_id`, `platform` | +| `on_session_end` | Session ends | `session_id`, `platform` | + +Hooks are observers — they can't modify arguments or return values. If a hook crashes, it's logged and skipped; other hooks and the tool continue normally. + +### Distribute via pip + +For sharing plugins publicly, add an entry point to your Python package: + +```toml +# pyproject.toml +[project.entry-points."hermes_agent.plugins"] +my-plugin = "my_plugin_package" +``` + +```bash +pip install hermes-plugin-calculator +# Plugin auto-discovered on next hermes startup +``` + +## Common mistakes + +**Handler doesn't return JSON string:** +```python +# Wrong — returns a dict +def handler(args, **kwargs): + return {"result": 42} + +# Right — returns a JSON string +def handler(args, **kwargs): + return json.dumps({"result": 42}) +``` + +**Missing `**kwargs` in handler signature:** +```python +# Wrong — will break if Hermes passes extra context +def handler(args): + ... + +# Right +def handler(args, **kwargs): + ... +``` + +**Handler raises exceptions:** +```python +# Wrong — exception propagates, tool call fails +def handler(args, **kwargs): + result = 1 / int(args["value"]) # ZeroDivisionError! + return json.dumps({"result": result}) + +# Right — catch and return error JSON +def handler(args, **kwargs): + try: + result = 1 / int(args.get("value", 0)) + return json.dumps({"result": result}) + except Exception as e: + return json.dumps({"error": str(e)}) +``` + +**Schema description too vague:** +```python +# Bad — model doesn't know when to use it +"description": "Does stuff" + +# Good — model knows exactly when and how +"description": "Evaluate a mathematical expression. Use for arithmetic, trig, logarithms. Supports: +, -, *, /, **, sqrt, sin, cos, log, pi, e." +``` diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md new file mode 100644 index 000000000..9b86d5d16 --- /dev/null +++ b/website/docs/user-guide/features/plugins.md @@ -0,0 +1,62 @@ +--- +sidebar_position: 20 +--- + +# Plugins + +Hermes has a plugin system for adding custom tools, hooks, and integrations without modifying core code. + +**→ [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin)** — step-by-step guide with a complete working example. + +## Quick overview + +Drop a directory into `~/.hermes/plugins/` with a `plugin.yaml` and Python code: + +``` +~/.hermes/plugins/my-plugin/ +├── plugin.yaml # manifest +├── __init__.py # register() — wires schemas to handlers +├── schemas.py # tool schemas (what the LLM sees) +└── tools.py # tool handlers (what runs when called) +``` + +Start Hermes — your tools appear alongside built-in tools. The model can call them immediately. + +## What plugins can do + +| Capability | How | +|-----------|-----| +| Add tools | `ctx.register_tool(name, schema, handler)` | +| Add hooks | `ctx.register_hook("post_tool_call", callback)` | +| Ship data files | `Path(__file__).parent / "data" / "file.yaml"` | +| Bundle skills | Copy `skill.md` to `~/.hermes/skills/` at load time | +| Gate on env vars | `requires_env: [API_KEY]` in plugin.yaml | +| Distribute via pip | `[project.entry-points."hermes_agent.plugins"]` | + +## Plugin discovery + +| Source | Path | Use case | +|--------|------|----------| +| User | `~/.hermes/plugins/` | Personal plugins | +| Project | `.hermes/plugins/` | Project-specific plugins | +| pip | `hermes_agent.plugins` entry_points | Distributed packages | + +## Available hooks + +| Hook | Fires when | +|------|-----------| +| `pre_tool_call` | Before any tool executes | +| `post_tool_call` | After any tool returns | +| `pre_llm_call` | Before LLM API request | +| `post_llm_call` | After LLM API response | +| `on_session_start` | Session begins | +| `on_session_end` | Session ends | + +## Managing plugins + +``` +/plugins # list loaded plugins in a session +hermes config set display.show_cost true # show cost in status bar +``` + +See the **[full guide](/docs/guides/build-a-hermes-plugin)** for handler contracts, schema format, hook behavior, error handling, and common mistakes. From 71e35311f59f84b548534831a354b8307a346bf7 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 07:20:43 -0700 Subject: [PATCH 28/29] fix(browser): model waits for user instruction after /browser connect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updated the injected context message to tell the model to await the user's instruction before operating the browser. Typical flow is: user opens Chrome → logs into sites → /browser connect → tells the agent what to do. --- cli.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cli.py b/cli.py index 85566756a..4036eff1d 100755 --- a/cli.py +++ b/cli.py @@ -3589,12 +3589,13 @@ class HermesCLI: # Inject context message so the model knows if hasattr(self, '_pending_input'): self._pending_input.put( - "[System note: The user has connected the browser tools to their live Chrome browser " - "session via Chrome DevTools Protocol. You now have access to their real browser — " - "any pages they have open, their logged-in sessions, bookmarks, etc. " - "Use the browser tools (browser_navigate, browser_snapshot, browser_click, etc.) " - "to interact with their live browser. Be mindful that actions affect their real browser. " - "Ask before closing tabs or navigating away from pages they might be using.]" + "[System note: The user has connected your browser tools to their live Chrome browser " + "via Chrome DevTools Protocol. Your browser_navigate, browser_snapshot, browser_click, " + "and other browser tools now control their real browser — including any pages they have " + "open, logged-in sessions, and cookies. They likely opened specific sites or logged into " + "services before connecting. Please await their instruction before attempting to operate " + "the browser. When you do act, be mindful that your actions affect their real browser — " + "don't close tabs or navigate away from pages without asking.]" ) elif sub == "disconnect": From 5e5c92663dbf8c02e24797106d3a39ea46e1cab6 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 12:36:29 -0700 Subject: [PATCH 29/29] fix: hermes update causes dual gateways on macOS (launchd) (#1567) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add optional smart model routing Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow. * fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s * fix(gateway): avoid recursive ExecStop in user systemd unit * fix: extend ExecStop removal and TimeoutStopSec=60 to system unit The cherry-picked PR #1448 fix only covered the user systemd unit. The system unit had the same TimeoutStopSec=15 and could benefit from the same 60s timeout for clean shutdown. Also adds a regression test for the system unit. --------- Co-authored-by: Ninja * feat(skills): add blender-mcp optional skill for 3D modeling Control a running Blender instance from Hermes via socket connection to the blender-mcp addon (port 9876). Supports creating 3D objects, materials, animations, and running arbitrary bpy code. Placed in optional-skills/ since it requires Blender 4.3+ desktop with a third-party addon manually started each session. * feat(acp): support slash commands in ACP adapter (#1532) Adds /help, /model, /tools, /context, /reset, /compact, /version to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled directly in the server without instantiating the TUI — each command queries agent/session state and returns plain text. Unrecognized /commands fall through to the LLM as normal messages. /model uses detect_provider_for_model() for auto-detection when switching models, matching the CLI and gateway behavior. Fixes #1402 * fix(logging): improve error logging in session search tool (#1533) * fix(gateway): restart on retryable startup failures (#1517) * feat(email): add skip_attachments option via config.yaml * feat(email): add skip_attachments option via config.yaml Adds a config.yaml-driven option to skip email attachments in the gateway email adapter. Useful for malware protection and bandwidth savings. Configure in config.yaml: platforms: email: skip_attachments: true Based on PR #1521 by @an420eth, changed from env var to config.yaml (via PlatformConfig.extra) to match the project's config-first pattern. * docs: document skip_attachments option for email adapter * fix(telegram): retry on transient TLS failures during connect and send Add exponential-backoff retry (3 attempts) around initialize() to handle transient TLS resets during gateway startup. Also catches TimedOut and OSError in addition to NetworkError. Add exponential-backoff retry (3 attempts) around send_message() for NetworkError during message delivery, wrapping the existing Markdown fallback logic. Both imports are guarded with try/except ImportError for test environments where telegram is mocked. Based on PR #1527 by cmd8. Closes #1526. * feat: permissive block_anchor thresholds and unicode normalization (#1539) Salvaged from PR #1528 by an420eth. Closes #517. Improves _strategy_block_anchor in fuzzy_match.py: - Add unicode normalization (smart quotes, em/en-dashes, ellipsis, non-breaking spaces → ASCII) so LLM-produced unicode artifacts don't break anchor line matching - Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for multiple candidates — if first/last lines match exactly, the block is almost certainly correct - Use original (non-normalized) content for offset calculation to preserve correct character positions Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space anchors, very-low-similarity unique matches), zero regressions on all 9 existing fuzzy match tests. Co-authored-by: an420eth * feat(cli): add file path autocomplete in the input prompt (#1545) When typing a path-like token (./ ../ ~/ / or containing /), the CLI now shows filesystem completions in the dropdown menu. Directories show a trailing slash and 'dir' label; files show their size. Completions are case-insensitive and capped at 30 entries. Triggered by tokens like: edit ./src/ma → shows ./src/main.py, ./src/manifest.json, ... check ~/doc → shows ~/docs/, ~/documents/, ... read /etc/hos → shows /etc/hosts, /etc/hostname, ... open tools/reg → shows tools/registry.py Slash command autocomplete (/help, /model, etc.) is unaffected — it still triggers when the input starts with /. Inspired by OpenCode PR #145 (file path completion menu). Implementation: - hermes_cli/commands.py: _extract_path_word() detects path-like tokens, _path_completions() yields filesystem Completions with size labels, get_completions() routes to paths vs slash commands - tests/hermes_cli/test_path_completion.py: 26 tests covering path extraction, prefix filtering, directory markers, home expansion, case-insensitivity, integration with slash commands * feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled Add privacy.redact_pii config option (boolean, default false). When enabled, the gateway redacts personally identifiable information from the system prompt before sending it to the LLM provider: - Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_ - User IDs → hashed to user_ - Chat IDs → numeric portion hashed, platform prefix preserved - Home channel IDs → hashed - Names/usernames → NOT affected (user-chosen, publicly visible) Hashes are deterministic (same user → same hash) so the model can still distinguish users in group chats. Routing and delivery use the original values internally — redaction only affects LLM context. Inspired by OpenClaw PR #47959. * fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs) Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM needs the real ID to tag users. Redaction now only applies to WhatsApp, Signal, and Telegram where IDs are pure routing metadata. Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack. * feat: smart approvals + /stop command (inspired by OpenAI Codex) * feat: smart approvals — LLM-based risk assessment for dangerous commands Adds a 'smart' approval mode that uses the auxiliary LLM to assess whether a flagged command is genuinely dangerous or a false positive, auto-approving low-risk commands without prompting the user. Inspired by OpenAI Codex's Smart Approvals guardian subagent (openai/codex#13860). Config (config.yaml): approvals: mode: manual # manual (default), smart, off Modes: - manual — current behavior, always prompt the user - smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block), or ESCALATE (fall through to manual prompt) - off — skip all approval prompts (equivalent to --yolo) When smart mode auto-approves, the pattern gets session-level approval so subsequent uses of the same pattern don't trigger another LLM call. When it denies, the command is blocked without user prompt. When uncertain, it escalates to the normal manual approval flow. The LLM prompt is carefully scoped: it sees only the command text and the flagged reason, assesses actual risk vs false positive, and returns a single-word verdict. * feat: make smart approval model configurable via config.yaml Adds auxiliary.approval section to config.yaml with the same provider/model/base_url/api_key pattern as other aux tasks (vision, web_extract, compression, etc.). Config: auxiliary: approval: provider: auto model: '' # fast/cheap model recommended base_url: '' api_key: '' Bridged to env vars in both CLI and gateway paths so the aux client picks them up automatically. * feat: add /stop command to kill all background processes Adds a /stop slash command that kills all running background processes at once. Currently users have to process(list) then process(kill) for each one individually. Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current turn) from /stop (cleans up background processes). See openai/codex#14602. Ctrl+C continues to only interrupt the active agent turn — background dev servers, watchers, etc. are preserved. /stop is the explicit way to clean them all up. * feat: first-class plugin architecture + hide status bar cost by default (#1544) The persistent status bar now shows context %, token counts, and duration but NOT $ cost by default. Cost display is opt-in via: display: show_cost: true in config.yaml, or: hermes config set display.show_cost true The /usage command still shows full cost breakdown since the user explicitly asked for it — this only affects the always-visible bar. Status bar without cost: ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m Status bar with show_cost: true: ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m * feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex) * feat: improve memory prioritization — user preferences over procedural knowledge Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493) which focus memory writes on user preferences and recurring patterns rather than procedural task details. Key insight: 'Optimize for reducing future user steering — the most valuable memory prevents the user from having to repeat themselves.' Changes: - MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy and the core principle about reducing user steering - MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put corrections first, added explicit PRIORITY guidance - Memory nudge (run_agent.py): now asks specifically about preferences, corrections, and workflow patterns instead of generic 'anything' - Memory flush (run_agent.py): now instructs to prioritize user preferences and corrections over task-specific details * feat: more aggressive skill creation and update prompting Press harder on skill updates — the agent should proactively patch skills when it encounters issues during use, not wait to be asked. Changes: - SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction to patch skills immediately when found outdated/wrong - Skills header: added instruction to update loaded skills before finishing if they had missing steps or wrong commands - Skill nudge: more assertive ('save the approach' not 'consider saving'), now also prompts for updating existing skills used in the task - Skill nudge interval: lowered default from 15 to 10 iterations - skill_manage schema: added 'patch it immediately' to update triggers * feat: first-class plugin architecture (#1555) Plugin system for extending Hermes with custom tools, hooks, and integrations — no source code changes required. Core system (hermes_cli/plugins.py): - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and pip entry_points (hermes_agent.plugins group) - PluginContext with register_tool() and register_hook() - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call, on_session_start/end - Namespace package handling for relative imports in plugins - Graceful error isolation — broken plugins never crash the agent Integration (model_tools.py): - Plugin discovery runs after built-in + MCP tools - Plugin tools bypass toolset filter via get_plugin_tool_names() - Pre/post tool call hooks fire in handle_function_call() CLI: - /plugins command shows loaded plugins, tool counts, status - Added to COMMANDS dict for autocomplete Docs: - Getting started guide (build-a-hermes-plugin.md) — full tutorial building a calculator plugin step by step - Reference page (features/plugins.md) — quick overview + tables - Covers: file structure, schemas, handlers, hooks, data files, bundled skills, env var gating, pip distribution, common mistakes Tests: 16 tests covering discovery, loading, hooks, tool visibility. * fix: hermes update causes dual gateways on macOS (launchd) Three bugs worked together to create the dual-gateway problem: 1. cmd_update only checked systemd for gateway restart, completely ignoring launchd on macOS. After killing the PID it would print 'Restart it with: hermes gateway run' even when launchd was about to auto-respawn the process. 2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway after SIGTERM (non-zero exit), so the user's manual restart created a second instance. 3. The launchd plist lacked --replace (systemd had it), so the respawned gateway didn't kill stale instances on startup. Fixes: - Add --replace to launchd ProgramArguments (matches systemd) - Add launchd detection to cmd_update's auto-restart logic - Print 'auto-restart via launchd' instead of manual restart hint * fix: add launchd plist auto-refresh + explicit restart in cmd_update Two integration issues with the initial fix: 1. Existing macOS users with old plist (no --replace) would never get the fix until manual uninstall/reinstall. Added refresh_launchd_plist_if_needed() — mirrors the existing refresh_systemd_unit_if_needed(). Called from launchd_start(), launchd_restart(), and cmd_update. 2. cmd_update relied on KeepAlive respawn after SIGTERM rather than explicit launchctl stop/start. This caused races: launchd would respawn the old process before the PID file was cleaned up. Now does explicit stop+start (matching how systemd gets an explicit systemctl restart), with plist refresh first so the new --replace flag is picked up. --------- Co-authored-by: Ninja Co-authored-by: alireza78a Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com> Co-authored-by: JP Lew Co-authored-by: an420eth --- agent/smart_model_routing.py | 184 +++++++++++ cli-config.yaml.example | 14 + cli.py | 98 ++++-- cron/scheduler.py | 24 +- gateway/run.py | 37 ++- hermes_cli/config.py | 32 ++ hermes_cli/gateway.py | 33 ++ hermes_cli/main.py | 74 ++++- tests/agent/test_smart_model_routing.py | 61 ++++ .../hermes_cli/test_update_gateway_restart.py | 305 ++++++++++++++++++ tests/test_cli_provider_resolution.py | 51 +++ website/docs/user-guide/configuration.md | 33 ++ 12 files changed, 903 insertions(+), 43 deletions(-) create mode 100644 agent/smart_model_routing.py create mode 100644 tests/agent/test_smart_model_routing.py create mode 100644 tests/hermes_cli/test_update_gateway_restart.py diff --git a/agent/smart_model_routing.py b/agent/smart_model_routing.py new file mode 100644 index 000000000..249548701 --- /dev/null +++ b/agent/smart_model_routing.py @@ -0,0 +1,184 @@ +"""Helpers for optional cheap-vs-strong model routing.""" + +from __future__ import annotations + +import os +import re +from typing import Any, Dict, Optional + +_COMPLEX_KEYWORDS = { + "debug", + "debugging", + "implement", + "implementation", + "refactor", + "patch", + "traceback", + "stacktrace", + "exception", + "error", + "analyze", + "analysis", + "investigate", + "architecture", + "design", + "compare", + "benchmark", + "optimize", + "optimise", + "review", + "terminal", + "shell", + "tool", + "tools", + "pytest", + "test", + "tests", + "plan", + "planning", + "delegate", + "subagent", + "cron", + "docker", + "kubernetes", +} + +_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE) + + +def _coerce_bool(value: Any, default: bool = False) -> bool: + if value is None: + return default + if isinstance(value, bool): + return value + if isinstance(value, str): + return value.strip().lower() in {"1", "true", "yes", "on"} + return bool(value) + + +def _coerce_int(value: Any, default: int) -> int: + try: + return int(value) + except (TypeError, ValueError): + return default + + +def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + """Return the configured cheap-model route when a message looks simple. + + Conservative by design: if the message has signs of code/tool/debugging/ + long-form work, keep the primary model. + """ + cfg = routing_config or {} + if not _coerce_bool(cfg.get("enabled"), False): + return None + + cheap_model = cfg.get("cheap_model") or {} + if not isinstance(cheap_model, dict): + return None + provider = str(cheap_model.get("provider") or "").strip().lower() + model = str(cheap_model.get("model") or "").strip() + if not provider or not model: + return None + + text = (user_message or "").strip() + if not text: + return None + + max_chars = _coerce_int(cfg.get("max_simple_chars"), 160) + max_words = _coerce_int(cfg.get("max_simple_words"), 28) + + if len(text) > max_chars: + return None + if len(text.split()) > max_words: + return None + if text.count("\n") > 1: + return None + if "```" in text or "`" in text: + return None + if _URL_RE.search(text): + return None + + lowered = text.lower() + words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()} + if words & _COMPLEX_KEYWORDS: + return None + + route = dict(cheap_model) + route["provider"] = provider + route["model"] = model + route["routing_reason"] = "simple_turn" + return route + + +def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]: + """Resolve the effective model/runtime for one turn. + + Returns a dict with model/runtime/signature/label fields. + """ + route = choose_cheap_model_route(user_message, routing_config) + if not route: + return { + "model": primary.get("model"), + "runtime": { + "api_key": primary.get("api_key"), + "base_url": primary.get("base_url"), + "provider": primary.get("provider"), + "api_mode": primary.get("api_mode"), + }, + "label": None, + "signature": ( + primary.get("model"), + primary.get("provider"), + primary.get("base_url"), + primary.get("api_mode"), + ), + } + + from hermes_cli.runtime_provider import resolve_runtime_provider + + explicit_api_key = None + api_key_env = str(route.get("api_key_env") or "").strip() + if api_key_env: + explicit_api_key = os.getenv(api_key_env) or None + + try: + runtime = resolve_runtime_provider( + requested=route.get("provider"), + explicit_api_key=explicit_api_key, + explicit_base_url=route.get("base_url"), + ) + except Exception: + return { + "model": primary.get("model"), + "runtime": { + "api_key": primary.get("api_key"), + "base_url": primary.get("base_url"), + "provider": primary.get("provider"), + "api_mode": primary.get("api_mode"), + }, + "label": None, + "signature": ( + primary.get("model"), + primary.get("provider"), + primary.get("base_url"), + primary.get("api_mode"), + ), + } + + return { + "model": route.get("model"), + "runtime": { + "api_key": runtime.get("api_key"), + "base_url": runtime.get("base_url"), + "provider": runtime.get("provider"), + "api_mode": runtime.get("api_mode"), + }, + "label": f"smart route → {route.get('model')} ({runtime.get('provider')})", + "signature": ( + route.get("model"), + runtime.get("provider"), + runtime.get("base_url"), + runtime.get("api_mode"), + ), + } diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 0c947e479..681405c44 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -51,6 +51,20 @@ model: # # Data policy: "allow" (default) or "deny" to exclude providers that may store data # # data_collection: "deny" +# ============================================================================= +# Smart Model Routing (optional) +# ============================================================================= +# Use a cheaper model for short/simple turns while keeping your main model for +# more complex requests. Disabled by default. +# +# smart_model_routing: +# enabled: true +# max_simple_chars: 160 +# max_simple_words: 28 +# cheap_model: +# provider: openrouter +# model: google/gemini-2.5-flash + # ============================================================================= # Git Worktree Isolation # ============================================================================= diff --git a/cli.py b/cli.py index 7716d6f57..eb0d7b101 100755 --- a/cli.py +++ b/cli.py @@ -176,6 +176,12 @@ def load_cli_config() -> Dict[str, Any]: "threshold": 0.50, # Compress at 50% of model's context limit "summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries }, + "smart_model_routing": { + "enabled": False, + "max_simple_chars": 160, + "max_simple_words": 28, + "cheap_model": {}, + }, "agent": { "max_turns": 90, # Default max tool-calling iterations (shared with subagents) "verbose": False, @@ -1126,6 +1132,10 @@ class HermesCLI: fb = CLI_CONFIG.get("fallback_model") or {} self._fallback_model = fb if fb.get("provider") and fb.get("model") else None + # Optional cheap-vs-strong routing for simple turns + self._smart_model_routing = CLI_CONFIG.get("smart_model_routing", {}) or {} + self._active_agent_route_signature = None + # Agent will be initialized on first use self.agent: Optional[AIAgent] = None self._app = None # prompt_toolkit Application (set in run()) @@ -1537,10 +1547,27 @@ class HermesCLI: # routing, or the effective model changed. if (credentials_changed or routing_changed or model_changed) and self.agent is not None: self.agent = None + self._active_agent_route_signature = None return True - def _init_agent(self) -> bool: + def _resolve_turn_agent_config(self, user_message: str) -> dict: + """Resolve model/runtime overrides for a single user turn.""" + from agent.smart_model_routing import resolve_turn_route + + return resolve_turn_route( + user_message, + self._smart_model_routing, + { + "model": self.model, + "api_key": self.api_key, + "base_url": self.base_url, + "provider": self.provider, + "api_mode": self.api_mode, + }, + ) + + def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None) -> bool: """ Initialize the agent on first use. When resuming a session, restores conversation history from SQLite. @@ -1600,12 +1627,19 @@ class HermesCLI: pass try: + runtime = runtime_override or { + "api_key": self.api_key, + "base_url": self.base_url, + "provider": self.provider, + "api_mode": self.api_mode, + } + effective_model = model_override or self.model self.agent = AIAgent( - model=self.model, - api_key=self.api_key, - base_url=self.base_url, - provider=self.provider, - api_mode=self.api_mode, + model=effective_model, + api_key=runtime.get("api_key"), + base_url=runtime.get("base_url"), + provider=runtime.get("provider"), + api_mode=runtime.get("api_mode"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, verbose_logging=self.verbose, @@ -1632,7 +1666,13 @@ class HermesCLI: pass_session_id=self.pass_session_id, tool_progress_callback=self._on_tool_progress, ) - # Apply any pending title now that the session exists in the DB + self._active_agent_route_signature = ( + effective_model, + runtime.get("provider"), + runtime.get("base_url"), + runtime.get("api_mode"), + ) + if self._pending_title and self._session_db: try: self._session_db.set_session_title(self.session_id, self._pending_title) @@ -3455,14 +3495,16 @@ class HermesCLI: _cprint(f" Task ID: {task_id}") _cprint(f" You can continue chatting — results will appear when done.\n") + turn_route = self._resolve_turn_agent_config(prompt) + def run_background(): try: bg_agent = AIAgent( - model=self.model, - api_key=self.api_key, - base_url=self.base_url, - provider=self.provider, - api_mode=self.api_mode, + model=turn_route["model"], + api_key=turn_route["runtime"].get("api_key"), + base_url=turn_route["runtime"].get("base_url"), + provider=turn_route["runtime"].get("provider"), + api_mode=turn_route["runtime"].get("api_mode"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, quiet_mode=True, @@ -4886,8 +4928,16 @@ class HermesCLI: if not self._ensure_runtime_credentials(): return None + turn_route = self._resolve_turn_agent_config(message) + if turn_route["signature"] != self._active_agent_route_signature: + self.agent = None + # Initialize agent if needed - if not self._init_agent(): + if not self._init_agent( + model_override=turn_route["model"], + runtime_override=turn_route["runtime"], + route_label=turn_route["label"], + ): return None # Pre-process images through the vision tool (Gemini Flash) so the @@ -6616,13 +6666,21 @@ def main( # Quiet mode: suppress banner, spinner, tool previews. # Only print the final response and parseable session info. cli.tool_progress_mode = "off" - if cli._init_agent(): - cli.agent.quiet_mode = True - result = cli.agent.run_conversation(query) - response = result.get("final_response", "") if isinstance(result, dict) else str(result) - if response: - print(response) - print(f"\nsession_id: {cli.session_id}") + if cli._ensure_runtime_credentials(): + turn_route = cli._resolve_turn_agent_config(query) + if turn_route["signature"] != cli._active_agent_route_signature: + cli.agent = None + if cli._init_agent( + model_override=turn_route["model"], + runtime_override=turn_route["runtime"], + route_label=turn_route["label"], + ): + cli.agent.quiet_mode = True + result = cli.agent.run_conversation(query) + response = result.get("final_response", "") if isinstance(result, dict) else str(result) + if response: + print(response) + print(f"\nsession_id: {cli.session_id}") else: cli.show_banner() cli.console.print(f"[bold blue]Query:[/] {query}") diff --git a/cron/scheduler.py b/cron/scheduler.py index 8d75e1a95..ded88ef53 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -315,6 +315,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: # Provider routing pr = _cfg.get("provider_routing", {}) + smart_routing = _cfg.get("smart_model_routing", {}) or {} from hermes_cli.runtime_provider import ( resolve_runtime_provider, @@ -331,12 +332,25 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: message = format_runtime_provider_error(exc) raise RuntimeError(message) from exc + from agent.smart_model_routing import resolve_turn_route + turn_route = resolve_turn_route( + prompt, + smart_routing, + { + "model": model, + "api_key": runtime.get("api_key"), + "base_url": runtime.get("base_url"), + "provider": runtime.get("provider"), + "api_mode": runtime.get("api_mode"), + }, + ) + agent = AIAgent( - model=model, - api_key=runtime.get("api_key"), - base_url=runtime.get("base_url"), - provider=runtime.get("provider"), - api_mode=runtime.get("api_mode"), + model=turn_route["model"], + api_key=turn_route["runtime"].get("api_key"), + base_url=turn_route["runtime"].get("base_url"), + provider=turn_route["runtime"].get("provider"), + api_mode=turn_route["runtime"].get("api_mode"), max_iterations=max_iterations, reasoning_config=reasoning_config, prefill_messages=prefill_messages, diff --git a/gateway/run.py b/gateway/run.py index 50e68eaad..5c0b8b48c 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -324,6 +324,7 @@ class GatewayRunner: self._show_reasoning = self._load_show_reasoning() self._provider_routing = self._load_provider_routing() self._fallback_model = self._load_fallback_model() + self._smart_model_routing = self._load_smart_model_routing() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -593,6 +594,18 @@ class GatewayRunner: group_sessions_per_user=getattr(config, "group_sessions_per_user", True), ) + def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict: + from agent.smart_model_routing import resolve_turn_route + + primary = { + "model": model, + "api_key": runtime_kwargs.get("api_key"), + "base_url": runtime_kwargs.get("base_url"), + "provider": runtime_kwargs.get("provider"), + "api_mode": runtime_kwargs.get("api_mode"), + } + return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary) + async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None: """React to a non-retryable adapter failure after startup.""" logger.error( @@ -795,6 +808,20 @@ class GatewayRunner: pass return None + @staticmethod + def _load_smart_model_routing() -> dict: + """Load optional smart cheap-vs-strong model routing config.""" + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path, encoding="utf-8") as _f: + cfg = _y.safe_load(_f) or {} + return cfg.get("smart_model_routing", {}) or {} + except Exception: + pass + return {} + async def start(self) -> bool: """ Start the gateway and all configured platform adapters. @@ -2931,11 +2958,12 @@ class GatewayRunner: max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90")) reasoning_config = self._load_reasoning_config() self._reasoning_config = reasoning_config + turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs) def run_sync(): agent = AIAgent( - model=model, - **runtime_kwargs, + model=turn_route["model"], + **turn_route["runtime"], max_iterations=max_iterations, quiet_mode=True, verbose_logging=False, @@ -4169,9 +4197,10 @@ class GatewayRunner: honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key) reasoning_config = self._load_reasoning_config() self._reasoning_config = reasoning_config + turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs) agent = AIAgent( - model=model, - **runtime_kwargs, + model=turn_route["model"], + **turn_route["runtime"], max_iterations=max_iterations, quiet_mode=True, verbose_logging=False, diff --git a/hermes_cli/config.py b/hermes_cli/config.py index ee5e54470..1a46a80bb 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -147,6 +147,12 @@ DEFAULT_CONFIG = { "summary_model": "google/gemini-3-flash-preview", "summary_provider": "auto", }, + "smart_model_routing": { + "enabled": False, + "max_simple_chars": 160, + "max_simple_words": 28, + "cheap_model": {}, + }, # Auxiliary model config — provider:model for each side task. # Format: provider is the provider name, model is the model slug. @@ -1010,6 +1016,19 @@ _FALLBACK_COMMENT = """ # fallback_model: # provider: openrouter # model: anthropic/claude-sonnet-4 +# +# ── Smart Model Routing ──────────────────────────────────────────────── +# Optional cheap-vs-strong routing for simple turns. +# Keeps the primary model for complex work, but can route short/simple +# messages to a cheaper model across providers. +# +# smart_model_routing: +# enabled: true +# max_simple_chars: 160 +# max_simple_words: 28 +# cheap_model: +# provider: openrouter +# model: google/gemini-2.5-flash """ @@ -1040,6 +1059,19 @@ _COMMENTED_SECTIONS = """ # fallback_model: # provider: openrouter # model: anthropic/claude-sonnet-4 +# +# ── Smart Model Routing ──────────────────────────────────────────────── +# Optional cheap-vs-strong routing for simple turns. +# Keeps the primary model for complex work, but can route short/simple +# messages to a cheaper model across providers. +# +# smart_model_routing: +# enabled: true +# max_simple_chars: 160 +# max_simple_words: 28 +# cheap_model: +# provider: openrouter +# model: google/gemini-2.5-flash """ diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 2399436c4..4bc068c1e 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -705,6 +705,7 @@ def generate_launchd_plist() -> str: hermes_cli.main gateway run + --replace WorkingDirectory @@ -728,6 +729,36 @@ def generate_launchd_plist() -> str: """ +def launchd_plist_is_current() -> bool: + """Check if the installed launchd plist matches the currently generated one.""" + plist_path = get_launchd_plist_path() + if not plist_path.exists(): + return False + + installed = plist_path.read_text(encoding="utf-8") + expected = generate_launchd_plist() + return _normalize_service_definition(installed) == _normalize_service_definition(expected) + + +def refresh_launchd_plist_if_needed() -> bool: + """Rewrite the installed launchd plist when the generated definition has changed. + + Unlike systemd, launchd picks up plist changes on the next ``launchctl stop``/ + ``launchctl start`` cycle — no daemon-reload is needed. We still unload/reload + to make launchd re-read the updated plist immediately. + """ + plist_path = get_launchd_plist_path() + if not plist_path.exists() or launchd_plist_is_current(): + return False + + plist_path.write_text(generate_launchd_plist(), encoding="utf-8") + # Unload/reload so launchd picks up the new definition + subprocess.run(["launchctl", "unload", str(plist_path)], check=False) + subprocess.run(["launchctl", "load", str(plist_path)], check=False) + print("↻ Updated gateway launchd service definition to match the current Hermes install") + return True + + def launchd_install(force: bool = False): plist_path = get_launchd_plist_path() @@ -760,6 +791,7 @@ def launchd_uninstall(): print("✓ Service uninstalled") def launchd_start(): + refresh_launchd_plist_if_needed() subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True) print("✓ Service started") @@ -768,6 +800,7 @@ def launchd_stop(): print("✓ Service stopped") def launchd_restart(): + refresh_launchd_plist_if_needed() launchd_stop() launchd_start() diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 2cb220df6..845ae207e 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2306,12 +2306,16 @@ def cmd_update(args): # installation's gateway — safe with multiple installations. try: from gateway.status import get_running_pid, remove_pid_file - from hermes_cli.gateway import get_service_name + from hermes_cli.gateway import ( + get_service_name, get_launchd_plist_path, is_macos, + refresh_launchd_plist_if_needed, + ) import signal as _signal _gw_service_name = get_service_name() existing_pid = get_running_pid() has_systemd_service = False + has_launchd_service = False try: check = subprocess.run( @@ -2322,23 +2326,36 @@ def cmd_update(args): except (FileNotFoundError, subprocess.TimeoutExpired): pass - if existing_pid or has_systemd_service: + # Check for macOS launchd service + if is_macos(): + try: + plist_path = get_launchd_plist_path() + if plist_path.exists(): + check = subprocess.run( + ["launchctl", "list", "ai.hermes.gateway"], + capture_output=True, text=True, timeout=5, + ) + has_launchd_service = check.returncode == 0 + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + if existing_pid or has_systemd_service or has_launchd_service: print() - # Kill the PID-file-tracked process (may be manual or systemd) - if existing_pid: - try: - os.kill(existing_pid, _signal.SIGTERM) - print(f"→ Stopped gateway process (PID {existing_pid})") - except ProcessLookupError: - pass # Already gone - except PermissionError: - print(f"⚠ Permission denied killing gateway PID {existing_pid}") - remove_pid_file() - - # Restart the systemd service (starts a fresh process) + # When a service manager is handling the gateway, let it + # manage the lifecycle — don't manually SIGTERM the PID + # (launchd KeepAlive would respawn immediately, causing races). if has_systemd_service: import time as _time + if existing_pid: + try: + os.kill(existing_pid, _signal.SIGTERM) + print(f"→ Stopped gateway process (PID {existing_pid})") + except ProcessLookupError: + pass + except PermissionError: + print(f"⚠ Permission denied killing gateway PID {existing_pid}") + remove_pid_file() _time.sleep(1) # Brief pause for port/socket release print("→ Restarting gateway service...") restart = subprocess.run( @@ -2350,7 +2367,36 @@ def cmd_update(args): else: print(f"⚠ Gateway restart failed: {restart.stderr.strip()}") print(" Try manually: hermes gateway restart") + elif has_launchd_service: + # Refresh the plist first (picks up --replace and other + # changes from the update we just pulled). + refresh_launchd_plist_if_needed() + # Explicit stop+start — don't rely on KeepAlive respawn + # after a manual SIGTERM, which would race with the + # PID file cleanup. + print("→ Restarting gateway service...") + stop = subprocess.run( + ["launchctl", "stop", "ai.hermes.gateway"], + capture_output=True, text=True, timeout=10, + ) + start = subprocess.run( + ["launchctl", "start", "ai.hermes.gateway"], + capture_output=True, text=True, timeout=10, + ) + if start.returncode == 0: + print("✓ Gateway restarted via launchd.") + else: + print(f"⚠ Gateway restart failed: {start.stderr.strip()}") + print(" Try manually: hermes gateway restart") elif existing_pid: + try: + os.kill(existing_pid, _signal.SIGTERM) + print(f"→ Stopped gateway process (PID {existing_pid})") + except ProcessLookupError: + pass # Already gone + except PermissionError: + print(f"⚠ Permission denied killing gateway PID {existing_pid}") + remove_pid_file() print(" ℹ️ Gateway was running manually (not as a service).") print(" Restart it with: hermes gateway run") except Exception as e: diff --git a/tests/agent/test_smart_model_routing.py b/tests/agent/test_smart_model_routing.py new file mode 100644 index 000000000..7e9025609 --- /dev/null +++ b/tests/agent/test_smart_model_routing.py @@ -0,0 +1,61 @@ +from agent.smart_model_routing import choose_cheap_model_route + + +_BASE_CONFIG = { + "enabled": True, + "cheap_model": { + "provider": "openrouter", + "model": "google/gemini-2.5-flash", + }, +} + + +def test_returns_none_when_disabled(): + cfg = {**_BASE_CONFIG, "enabled": False} + assert choose_cheap_model_route("what time is it in tokyo?", cfg) is None + + +def test_routes_short_simple_prompt(): + result = choose_cheap_model_route("what time is it in tokyo?", _BASE_CONFIG) + assert result is not None + assert result["provider"] == "openrouter" + assert result["model"] == "google/gemini-2.5-flash" + assert result["routing_reason"] == "simple_turn" + + +def test_skips_long_prompt(): + prompt = "please summarize this carefully " * 20 + assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None + + +def test_skips_code_like_prompt(): + prompt = "debug this traceback: ```python\nraise ValueError('bad')\n```" + assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None + + +def test_skips_tool_heavy_prompt_keywords(): + prompt = "implement a patch for this docker error" + assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None + + +def test_resolve_turn_route_falls_back_to_primary_when_route_runtime_cannot_be_resolved(monkeypatch): + from agent.smart_model_routing import resolve_turn_route + + monkeypatch.setattr( + "hermes_cli.runtime_provider.resolve_runtime_provider", + lambda **kwargs: (_ for _ in ()).throw(RuntimeError("bad route")), + ) + result = resolve_turn_route( + "what time is it in tokyo?", + _BASE_CONFIG, + { + "model": "anthropic/claude-sonnet-4", + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_mode": "chat_completions", + "api_key": "sk-primary", + }, + ) + assert result["model"] == "anthropic/claude-sonnet-4" + assert result["runtime"]["provider"] == "openrouter" + assert result["label"] is None diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py new file mode 100644 index 000000000..b9cdecaa0 --- /dev/null +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -0,0 +1,305 @@ +"""Tests for cmd_update gateway auto-restart — systemd + launchd coverage. + +Ensures ``hermes update`` correctly detects running gateways managed by +systemd (Linux) or launchd (macOS) and restarts/informs the user properly, +rather than leaving zombie processes or telling users to manually restart +when launchd will auto-respawn. +""" + +import subprocess +from types import SimpleNamespace +from unittest.mock import patch, MagicMock + +import pytest + +import hermes_cli.gateway as gateway_cli +from hermes_cli.main import cmd_update + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_run_side_effect( + branch="main", + verify_ok=True, + commit_count="3", + systemd_active=False, + launchctl_loaded=False, +): + """Build a subprocess.run side_effect that simulates git + service commands.""" + + def side_effect(cmd, **kwargs): + joined = " ".join(str(c) for c in cmd) + + # git rev-parse --abbrev-ref HEAD + if "rev-parse" in joined and "--abbrev-ref" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout=f"{branch}\n", stderr="") + + # git rev-parse --verify origin/{branch} + if "rev-parse" in joined and "--verify" in joined: + rc = 0 if verify_ok else 128 + return subprocess.CompletedProcess(cmd, rc, stdout="", stderr="") + + # git rev-list HEAD..origin/{branch} --count + if "rev-list" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="") + + # systemctl --user is-active + if "systemctl" in joined and "is-active" in joined: + if systemd_active: + return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="") + return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="") + + # systemctl --user restart + if "systemctl" in joined and "restart" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + # launchctl list ai.hermes.gateway + if "launchctl" in joined and "list" in joined: + if launchctl_loaded: + return subprocess.CompletedProcess(cmd, 0, stdout="PID\tStatus\tLabel\n123\t0\tai.hermes.gateway\n", stderr="") + return subprocess.CompletedProcess(cmd, 113, stdout="", stderr="Could not find service") + + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + return side_effect + + +@pytest.fixture +def mock_args(): + return SimpleNamespace() + + +# --------------------------------------------------------------------------- +# Launchd plist includes --replace +# --------------------------------------------------------------------------- + + +class TestLaunchdPlistReplace: + """The generated launchd plist must include --replace so respawned + gateways kill stale instances.""" + + def test_plist_contains_replace_flag(self): + plist = gateway_cli.generate_launchd_plist() + assert "--replace" in plist + + def test_plist_program_arguments_order(self): + """--replace comes after 'run' in the ProgramArguments.""" + plist = gateway_cli.generate_launchd_plist() + lines = [line.strip() for line in plist.splitlines()] + # Find 'run' and '--replace' in the string entries + string_values = [ + line.replace("", "").replace("", "") + for line in lines + if "" in line and "" in line + ] + assert "run" in string_values + assert "--replace" in string_values + run_idx = string_values.index("run") + replace_idx = string_values.index("--replace") + assert replace_idx == run_idx + 1 + + +# --------------------------------------------------------------------------- +# cmd_update — macOS launchd detection +# --------------------------------------------------------------------------- + + +class TestLaunchdPlistRefresh: + """refresh_launchd_plist_if_needed rewrites stale plists (like systemd's + refresh_systemd_unit_if_needed).""" + + def test_refresh_rewrites_stale_plist(self, tmp_path, monkeypatch): + plist_path = tmp_path / "ai.hermes.gateway.plist" + plist_path.write_text("old content") + + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + calls = [] + def fake_run(cmd, check=False, **kwargs): + calls.append(cmd) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + + result = gateway_cli.refresh_launchd_plist_if_needed() + + assert result is True + # Plist should now contain the generated content (which includes --replace) + assert "--replace" in plist_path.read_text() + # Should have unloaded then reloaded + assert any("unload" in str(c) for c in calls) + assert any("load" in str(c) for c in calls) + + def test_refresh_skips_when_current(self, tmp_path, monkeypatch): + plist_path = tmp_path / "ai.hermes.gateway.plist" + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + # Write the current expected content + plist_path.write_text(gateway_cli.generate_launchd_plist()) + + calls = [] + monkeypatch.setattr( + gateway_cli.subprocess, "run", + lambda cmd, **kw: calls.append(cmd) or SimpleNamespace(returncode=0), + ) + + result = gateway_cli.refresh_launchd_plist_if_needed() + + assert result is False + assert len(calls) == 0 # No launchctl calls needed + + def test_refresh_skips_when_no_plist(self, tmp_path, monkeypatch): + plist_path = tmp_path / "nonexistent.plist" + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + result = gateway_cli.refresh_launchd_plist_if_needed() + assert result is False + + def test_launchd_start_calls_refresh(self, tmp_path, monkeypatch): + """launchd_start refreshes the plist before starting.""" + plist_path = tmp_path / "ai.hermes.gateway.plist" + plist_path.write_text("old") + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + calls = [] + def fake_run(cmd, check=False, **kwargs): + calls.append(cmd) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + + gateway_cli.launchd_start() + + # First calls should be refresh (unload/load), then start + cmd_strs = [" ".join(c) for c in calls] + assert any("unload" in s for s in cmd_strs) + assert any("start" in s for s in cmd_strs) + + +class TestCmdUpdateLaunchdRestart: + """cmd_update correctly detects and handles launchd on macOS.""" + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_detects_launchd_and_skips_manual_restart_message( + self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch, + ): + """When launchd is running the gateway, update should print + 'auto-restart via launchd' instead of 'Restart it with: hermes gateway run'.""" + # Create a fake launchd plist so is_macos + plist.exists() passes + plist_path = tmp_path / "ai.hermes.gateway.plist" + plist_path.write_text("") + + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: True, + ) + monkeypatch.setattr( + gateway_cli, "get_launchd_plist_path", lambda: plist_path, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + launchctl_loaded=True, + ) + + # Mock get_running_pid to return a PID + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Gateway restarted via launchd" in captured + assert "Restart it with: hermes gateway run" not in captured + # Verify launchctl stop + start were called (not manual SIGTERM) + launchctl_calls = [ + c for c in mock_run.call_args_list + if len(c.args[0]) > 0 and c.args[0][0] == "launchctl" + ] + stop_calls = [c for c in launchctl_calls if "stop" in c.args[0]] + start_calls = [c for c in launchctl_calls if "start" in c.args[0]] + assert len(stop_calls) >= 1 + assert len(start_calls) >= 1 + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_without_launchd_shows_manual_restart( + self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch, + ): + """When no service manager is running, update should show the manual restart hint.""" + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: True, + ) + plist_path = tmp_path / "ai.hermes.gateway.plist" + # plist does NOT exist — no launchd service + monkeypatch.setattr( + gateway_cli, "get_launchd_plist_path", lambda: plist_path, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + launchctl_loaded=False, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"), \ + patch("os.kill"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Restart it with: hermes gateway run" in captured + assert "Gateway restarted via launchd" not in captured + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_with_systemd_still_restarts_via_systemd( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """On Linux with systemd active, update should restart via systemctl.""" + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: False, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=True, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"), \ + patch("os.kill"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Gateway restarted" in captured + # Verify systemctl restart was called + restart_calls = [ + c for c in mock_run.call_args_list + if "restart" in " ".join(str(a) for a in c.args[0]) + and "systemctl" in " ".join(str(a) for a in c.args[0]) + ] + assert len(restart_calls) == 1 + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_no_gateway_running_skips_restart( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """When no gateway is running, update should skip the restart section entirely.""" + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: False, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=False, + ) + + with patch("gateway.status.get_running_pid", return_value=None): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Stopped gateway" not in captured + assert "Gateway restarted" not in captured + assert "Gateway restarted via launchd" not in captured diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 3144bed80..99d8830fa 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -162,6 +162,57 @@ def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch): assert shell.api_mode == "codex_responses" +def test_cli_turn_routing_uses_primary_when_disabled(monkeypatch): + cli = _import_cli() + shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1) + shell.provider = "openrouter" + shell.api_mode = "chat_completions" + shell.base_url = "https://openrouter.ai/api/v1" + shell.api_key = "sk-primary" + shell._smart_model_routing = {"enabled": False} + + result = shell._resolve_turn_agent_config("what time is it in tokyo?") + + assert result["model"] == "gpt-5" + assert result["runtime"]["provider"] == "openrouter" + assert result["label"] is None + + +def test_cli_turn_routing_uses_cheap_model_when_simple(monkeypatch): + cli = _import_cli() + + def _runtime_resolve(**kwargs): + assert kwargs["requested"] == "zai" + return { + "provider": "zai", + "api_mode": "chat_completions", + "base_url": "https://open.z.ai/api/v1", + "api_key": "cheap-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + + shell = cli.HermesCLI(model="anthropic/claude-sonnet-4", compact=True, max_turns=1) + shell.provider = "openrouter" + shell.api_mode = "chat_completions" + shell.base_url = "https://openrouter.ai/api/v1" + shell.api_key = "primary-key" + shell._smart_model_routing = { + "enabled": True, + "cheap_model": {"provider": "zai", "model": "glm-5-air"}, + "max_simple_chars": 160, + "max_simple_words": 28, + } + + result = shell._resolve_turn_agent_config("what time is it in tokyo?") + + assert result["model"] == "glm-5-air" + assert result["runtime"]["provider"] == "zai" + assert result["runtime"]["api_key"] == "cheap-key" + assert result["label"] is not None + + def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch): cli = _import_cli() diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 1df9c491f..56a2ab125 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -441,6 +441,39 @@ Supported providers: `openrouter`, `nous`, `openai-codex`, `anthropic`, `zai`, ` Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers). ::: +## Smart Model Routing + +Optional cheap-vs-strong routing lets Hermes keep your main model for complex work while sending very short/simple turns to a cheaper model. + +```yaml +smart_model_routing: + enabled: true + max_simple_chars: 160 + max_simple_words: 28 + cheap_model: + provider: openrouter + model: google/gemini-2.5-flash + # base_url: http://localhost:8000/v1 # optional custom endpoint + # api_key_env: MY_CUSTOM_KEY # optional env var name for that endpoint's API key +``` + +How it works: +- If a turn is short, single-line, and does not look code/tool/debug heavy, Hermes may route it to `cheap_model` +- If the turn looks complex, Hermes stays on your primary model/provider +- If the cheap route cannot be resolved cleanly, Hermes falls back to the primary model automatically + +This is intentionally conservative. It is meant for quick, low-stakes turns like: +- short factual questions +- quick rewrites +- lightweight summaries + +It will avoid routing prompts that look like: +- coding/debugging work +- tool-heavy requests +- long or multi-line analysis asks + +Use this when you want lower latency or cost without fully changing your default model. + ## Terminal Backend Configuration Configure which environment the agent uses for terminal commands: