diff --git a/acp_adapter/server.py b/acp_adapter/server.py index 6e8ec3b49..1081104e9 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -42,7 +42,7 @@ from acp_adapter.events import ( make_tool_progress_cb, ) from acp_adapter.permissions import make_approval_callback -from acp_adapter.session import SessionManager +from acp_adapter.session import SessionManager, SessionState logger = logging.getLogger(__name__) @@ -226,10 +226,19 @@ class HermesACPAgent(acp.Agent): logger.error("prompt: session %s not found", session_id) return PromptResponse(stop_reason="refusal") - user_text = _extract_text(prompt) - if not user_text.strip(): + user_text = _extract_text(prompt).strip() + if not user_text: return PromptResponse(stop_reason="end_turn") + # Intercept slash commands — handle locally without calling the LLM + if user_text.startswith("/"): + response_text = self._handle_slash_command(user_text, state) + if response_text is not None: + if self._conn: + update = acp.update_agent_message_text(response_text) + await self._conn.session_update(session_id, update) + return PromptResponse(stop_reason="end_turn") + logger.info("Prompt on session %s: %s", session_id, user_text[:100]) conn = self._conn @@ -315,12 +324,149 @@ class HermesACPAgent(acp.Agent): stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn" return PromptResponse(stop_reason=stop_reason, usage=usage) - # ---- Model switching ---------------------------------------------------- + # ---- Slash commands (headless) ------------------------------------------- + + _SLASH_COMMANDS = { + "help": "Show available commands", + "model": "Show or change current model", + "tools": "List available tools", + "context": "Show conversation context info", + "reset": "Clear conversation history", + "compact": "Compress conversation context", + "version": "Show Hermes version", + } + + def _handle_slash_command(self, text: str, state: SessionState) -> str | None: + """Dispatch a slash command and return the response text. + + Returns ``None`` for unrecognized commands so they fall through + to the LLM (the user may have typed ``/something`` as prose). + """ + parts = text.split(maxsplit=1) + cmd = parts[0].lstrip("/").lower() + args = parts[1].strip() if len(parts) > 1 else "" + + handler = { + "help": self._cmd_help, + "model": self._cmd_model, + "tools": self._cmd_tools, + "context": self._cmd_context, + "reset": self._cmd_reset, + "compact": self._cmd_compact, + "version": self._cmd_version, + }.get(cmd) + + if handler is None: + return None # not a known command — let the LLM handle it + + try: + return handler(args, state) + except Exception as e: + logger.error("Slash command /%s error: %s", cmd, e, exc_info=True) + return f"Error executing /{cmd}: {e}" + + def _cmd_help(self, args: str, state: SessionState) -> str: + lines = ["Available commands:", ""] + for cmd, desc in self._SLASH_COMMANDS.items(): + lines.append(f" /{cmd:10s} {desc}") + lines.append("") + lines.append("Unrecognized /commands are sent to the model as normal messages.") + return "\n".join(lines) + + def _cmd_model(self, args: str, state: SessionState) -> str: + if not args: + model = state.model or getattr(state.agent, "model", "unknown") + provider = getattr(state.agent, "provider", None) or "auto" + return f"Current model: {model}\nProvider: {provider}" + + new_model = args.strip() + target_provider = None + + # Auto-detect provider for the requested model + try: + from hermes_cli.models import parse_model_input, detect_provider_for_model + current_provider = getattr(state.agent, "provider", None) or "openrouter" + target_provider, new_model = parse_model_input(new_model, current_provider) + if target_provider == current_provider: + detected = detect_provider_for_model(new_model, current_provider) + if detected: + target_provider, new_model = detected + except Exception: + logger.debug("Provider detection failed, using model as-is", exc_info=True) + + state.model = new_model + state.agent = self.session_manager._make_agent( + session_id=state.session_id, + cwd=state.cwd, + model=new_model, + ) + provider_label = target_provider or getattr(state.agent, "provider", "auto") + logger.info("Session %s: model switched to %s", state.session_id, new_model) + return f"Model switched to: {new_model}\nProvider: {provider_label}" + + def _cmd_tools(self, args: str, state: SessionState) -> str: + try: + from model_tools import get_tool_definitions + toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"] + tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True) + if not tools: + return "No tools available." + lines = [f"Available tools ({len(tools)}):"] + for t in tools: + name = t.get("function", {}).get("name", "?") + desc = t.get("function", {}).get("description", "") + # Truncate long descriptions + if len(desc) > 80: + desc = desc[:77] + "..." + lines.append(f" {name}: {desc}") + return "\n".join(lines) + except Exception as e: + return f"Could not list tools: {e}" + + def _cmd_context(self, args: str, state: SessionState) -> str: + n_messages = len(state.history) + if n_messages == 0: + return "Conversation is empty (no messages yet)." + # Count by role + roles: dict[str, int] = {} + for msg in state.history: + role = msg.get("role", "unknown") + roles[role] = roles.get(role, 0) + 1 + lines = [ + f"Conversation: {n_messages} messages", + f" user: {roles.get('user', 0)}, assistant: {roles.get('assistant', 0)}, " + f"tool: {roles.get('tool', 0)}, system: {roles.get('system', 0)}", + ] + model = state.model or getattr(state.agent, "model", "") + if model: + lines.append(f"Model: {model}") + return "\n".join(lines) + + def _cmd_reset(self, args: str, state: SessionState) -> str: + state.history.clear() + return "Conversation history cleared." + + def _cmd_compact(self, args: str, state: SessionState) -> str: + if not state.history: + return "Nothing to compress — conversation is empty." + try: + agent = state.agent + if hasattr(agent, "compress_context"): + agent.compress_context(state.history) + return f"Context compressed. Messages: {len(state.history)}" + return "Context compression not available for this agent." + except Exception as e: + return f"Compression failed: {e}" + + def _cmd_version(self, args: str, state: SessionState) -> str: + return f"Hermes Agent v{HERMES_VERSION}" + + # ---- Model switching (ACP protocol method) ------------------------------- async def set_session_model( self, model_id: str, session_id: str, **kwargs: Any ): - """Switch the model for a session.""" + """Switch the model for a session (called by ACP protocol).""" state = self.session_manager.get_session(session_id) if state: state.model = model_id diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 06d636320..b71a96293 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -73,9 +73,15 @@ DEFAULT_AGENT_IDENTITY = ( MEMORY_GUIDANCE = ( "You have persistent memory across sessions. Save durable facts using the memory " "tool: user preferences, environment details, tool quirks, and stable conventions. " - "Memory is injected into every turn, so keep it compact. Do NOT save task progress, " - "session outcomes, or completed-work logs to memory; use session_search to recall " - "those from past transcripts." + "Memory is injected into every turn, so keep it compact and focused on facts that " + "will still matter later.\n" + "Prioritize what reduces future user steering — the most valuable memory is one " + "that prevents the user from having to correct or remind you again. " + "User preferences and recurring corrections matter more than procedural task details.\n" + "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO " + "state to memory; use session_search to recall those from past transcripts. " + "If you've discovered a new way to do something, solved a problem that could be " + "necessary later, save it as a skill with the skill tool." ) SESSION_SEARCH_GUIDANCE = ( @@ -86,8 +92,11 @@ SESSION_SEARCH_GUIDANCE = ( SKILLS_GUIDANCE = ( "After completing a complex task (5+ tool calls), fixing a tricky error, " - "or discovering a non-trivial workflow, consider saving the approach as a " - "skill with skill_manage so you can reuse it next time." + "or discovering a non-trivial workflow, save the approach as a " + "skill with skill_manage so you can reuse it next time.\n" + "When using a skill and finding it outdated, incomplete, or wrong, " + "patch it immediately with skill_manage(action='patch') — don't wait to be asked. " + "Skills that aren't maintained become liabilities." ) PLATFORM_HINTS = { @@ -326,6 +335,9 @@ def build_skills_system_prompt( "Before replying, scan the skills below. If one clearly matches your task, " "load it with skill_view(name) and follow its instructions. " "If a skill has issues, fix it with skill_manage(action='patch').\n" + "After difficult/iterative tasks, offer to save as a skill. " + "If a skill you loaded was missing steps, had wrong commands, or needed " + "pitfalls you discovered, update it before finishing.\n" "\n" "\n" + "\n".join(index_lines) + "\n" diff --git a/agent/smart_model_routing.py b/agent/smart_model_routing.py new file mode 100644 index 000000000..249548701 --- /dev/null +++ b/agent/smart_model_routing.py @@ -0,0 +1,184 @@ +"""Helpers for optional cheap-vs-strong model routing.""" + +from __future__ import annotations + +import os +import re +from typing import Any, Dict, Optional + +_COMPLEX_KEYWORDS = { + "debug", + "debugging", + "implement", + "implementation", + "refactor", + "patch", + "traceback", + "stacktrace", + "exception", + "error", + "analyze", + "analysis", + "investigate", + "architecture", + "design", + "compare", + "benchmark", + "optimize", + "optimise", + "review", + "terminal", + "shell", + "tool", + "tools", + "pytest", + "test", + "tests", + "plan", + "planning", + "delegate", + "subagent", + "cron", + "docker", + "kubernetes", +} + +_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE) + + +def _coerce_bool(value: Any, default: bool = False) -> bool: + if value is None: + return default + if isinstance(value, bool): + return value + if isinstance(value, str): + return value.strip().lower() in {"1", "true", "yes", "on"} + return bool(value) + + +def _coerce_int(value: Any, default: int) -> int: + try: + return int(value) + except (TypeError, ValueError): + return default + + +def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + """Return the configured cheap-model route when a message looks simple. + + Conservative by design: if the message has signs of code/tool/debugging/ + long-form work, keep the primary model. + """ + cfg = routing_config or {} + if not _coerce_bool(cfg.get("enabled"), False): + return None + + cheap_model = cfg.get("cheap_model") or {} + if not isinstance(cheap_model, dict): + return None + provider = str(cheap_model.get("provider") or "").strip().lower() + model = str(cheap_model.get("model") or "").strip() + if not provider or not model: + return None + + text = (user_message or "").strip() + if not text: + return None + + max_chars = _coerce_int(cfg.get("max_simple_chars"), 160) + max_words = _coerce_int(cfg.get("max_simple_words"), 28) + + if len(text) > max_chars: + return None + if len(text.split()) > max_words: + return None + if text.count("\n") > 1: + return None + if "```" in text or "`" in text: + return None + if _URL_RE.search(text): + return None + + lowered = text.lower() + words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()} + if words & _COMPLEX_KEYWORDS: + return None + + route = dict(cheap_model) + route["provider"] = provider + route["model"] = model + route["routing_reason"] = "simple_turn" + return route + + +def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]: + """Resolve the effective model/runtime for one turn. + + Returns a dict with model/runtime/signature/label fields. + """ + route = choose_cheap_model_route(user_message, routing_config) + if not route: + return { + "model": primary.get("model"), + "runtime": { + "api_key": primary.get("api_key"), + "base_url": primary.get("base_url"), + "provider": primary.get("provider"), + "api_mode": primary.get("api_mode"), + }, + "label": None, + "signature": ( + primary.get("model"), + primary.get("provider"), + primary.get("base_url"), + primary.get("api_mode"), + ), + } + + from hermes_cli.runtime_provider import resolve_runtime_provider + + explicit_api_key = None + api_key_env = str(route.get("api_key_env") or "").strip() + if api_key_env: + explicit_api_key = os.getenv(api_key_env) or None + + try: + runtime = resolve_runtime_provider( + requested=route.get("provider"), + explicit_api_key=explicit_api_key, + explicit_base_url=route.get("base_url"), + ) + except Exception: + return { + "model": primary.get("model"), + "runtime": { + "api_key": primary.get("api_key"), + "base_url": primary.get("base_url"), + "provider": primary.get("provider"), + "api_mode": primary.get("api_mode"), + }, + "label": None, + "signature": ( + primary.get("model"), + primary.get("provider"), + primary.get("base_url"), + primary.get("api_mode"), + ), + } + + return { + "model": route.get("model"), + "runtime": { + "api_key": runtime.get("api_key"), + "base_url": runtime.get("base_url"), + "provider": runtime.get("provider"), + "api_mode": runtime.get("api_mode"), + }, + "label": f"smart route → {route.get('model')} ({runtime.get('provider')})", + "signature": ( + route.get("model"), + runtime.get("provider"), + runtime.get("base_url"), + runtime.get("api_mode"), + ), + } diff --git a/cli-config.yaml.example b/cli-config.yaml.example index ea5ba6f80..4623ccfbf 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -51,6 +51,20 @@ model: # # Data policy: "allow" (default) or "deny" to exclude providers that may store data # # data_collection: "deny" +# ============================================================================= +# Smart Model Routing (optional) +# ============================================================================= +# Use a cheaper model for short/simple turns while keeping your main model for +# more complex requests. Disabled by default. +# +# smart_model_routing: +# enabled: true +# max_simple_chars: 160 +# max_simple_words: 28 +# cheap_model: +# provider: openrouter +# model: google/gemini-2.5-flash + # ============================================================================= # Git Worktree Isolation # ============================================================================= @@ -76,8 +90,9 @@ model: # - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home) terminal: backend: "local" - cwd: "." # For local backend: "." = current directory. Ignored for remote backends. + cwd: "." # For local backend: "." = current directory. Ignored for remote backends unless a backend documents otherwise. timeout: 180 + docker_mount_cwd_to_workspace: false # SECURITY: off by default. Opt in to mount the launch cwd into Docker /workspace. lifetime_seconds: 300 # sudo_password: "" # Enable sudo commands (pipes via sudo -S) - SECURITY WARNING: plaintext! @@ -107,6 +122,7 @@ terminal: # timeout: 180 # lifetime_seconds: 300 # docker_image: "nikolaik/python-nodejs:python3.11-nodejs20" +# docker_mount_cwd_to_workspace: true # Explicit opt-in: mount your launch cwd into /workspace # ----------------------------------------------------------------------------- # OPTION 4: Singularity/Apptainer container @@ -759,3 +775,14 @@ display: # tool_prefix: "╎" # Tool output line prefix (default: ┊) # skin: default + +# ============================================================================= +# Privacy +# ============================================================================= +# privacy: +# # Redact PII from the LLM context prompt. +# # When true, phone numbers are stripped and user/chat IDs are replaced +# # with deterministic hashes before being sent to the model. +# # Names and usernames are NOT affected (user-chosen, publicly visible). +# # Routing/delivery still uses the original values internally. +# redact_pii: false diff --git a/cli.py b/cli.py index c24b67e07..3fa210ffa 100755 --- a/cli.py +++ b/cli.py @@ -165,6 +165,7 @@ def load_cli_config() -> Dict[str, Any]: "modal_image": "python:3.11", "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20", "docker_volumes": [], # host:container volume mounts for Docker backend + "docker_mount_cwd_to_workspace": False, # explicit opt-in only; default off for sandbox isolation }, "browser": { "inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min @@ -175,6 +176,12 @@ def load_cli_config() -> Dict[str, Any]: "threshold": 0.50, # Compress at 50% of model's context limit "summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries }, + "smart_model_routing": { + "enabled": False, + "max_simple_chars": 160, + "max_simple_words": 28, + "cheap_model": {}, + }, "agent": { "max_turns": 90, # Default max tool-calling iterations (shared with subagents) "verbose": False, @@ -204,6 +211,7 @@ def load_cli_config() -> Dict[str, Any]: "resume_display": "full", "show_reasoning": False, "streaming": False, + "show_cost": False, "skin": "default", }, "clarify": { @@ -331,6 +339,7 @@ def load_cli_config() -> Dict[str, Any]: "container_disk": "TERMINAL_CONTAINER_DISK", "container_persistent": "TERMINAL_CONTAINER_PERSISTENT", "docker_volumes": "TERMINAL_DOCKER_VOLUMES", + "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "sandbox_dir": "TERMINAL_SANDBOX_DIR", # Persistent shell (non-local backends) "persistent_shell": "TERMINAL_PERSISTENT_SHELL", @@ -394,7 +403,13 @@ def load_cli_config() -> Dict[str, Any]: "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER", "model": "AUXILIARY_WEB_EXTRACT_MODEL", "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL", - "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY", + "api_key": "AUXILI..._KEY", + }, + "approval": { + "provider": "AUXILIARY_APPROVAL_PROVIDER", + "model": "AUXILIARY_APPROVAL_MODEL", + "base_url": "AUXILIARY_APPROVAL_BASE_URL", + "api_key": "AUXILIARY_APPROVAL_API_KEY", }, } @@ -1016,6 +1031,8 @@ class HermesCLI: self.bell_on_complete = CLI_CONFIG["display"].get("bell_on_complete", False) # show_reasoning: display model thinking/reasoning before the response self.show_reasoning = CLI_CONFIG["display"].get("show_reasoning", False) + # show_cost: display $ cost in the status bar (off by default) + self.show_cost = CLI_CONFIG["display"].get("show_cost", False) self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose") # streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml) @@ -1124,6 +1141,10 @@ class HermesCLI: fb = CLI_CONFIG.get("fallback_model") or {} self._fallback_model = fb if fb.get("provider") and fb.get("model") else None + # Optional cheap-vs-strong routing for simple turns + self._smart_model_routing = CLI_CONFIG.get("smart_model_routing", {}) or {} + self._active_agent_route_signature = None + # Agent will be initialized on first use self.agent: Optional[AIAgent] = None self._app = None # prompt_toolkit Application (set in run()) @@ -1277,13 +1298,22 @@ class HermesCLI: width = width or shutil.get_terminal_size((80, 24)).columns percent = snapshot["context_percent"] percent_label = f"{percent}%" if percent is not None else "--" - cost_label = f"${snapshot['session_cost']:.2f}" if snapshot["pricing_known"] else "cost n/a" duration_label = snapshot["duration"] + show_cost = getattr(self, "show_cost", False) + + if show_cost: + cost_label = f"${snapshot['session_cost']:.2f}" if snapshot["pricing_known"] else "cost n/a" + else: + cost_label = None if width < 52: return f"⚕ {snapshot['model_short']} · {duration_label}" if width < 76: - return f"⚕ {snapshot['model_short']} · {percent_label} · {cost_label} · {duration_label}" + parts = [f"⚕ {snapshot['model_short']}", percent_label] + if cost_label: + parts.append(cost_label) + parts.append(duration_label) + return " · ".join(parts) if snapshot["context_length"]: ctx_total = _format_context_length(snapshot["context_length"]) @@ -1292,7 +1322,11 @@ class HermesCLI: else: context_label = "ctx --" - return f"⚕ {snapshot['model_short']} │ {context_label} │ {percent_label} │ {cost_label} │ {duration_label}" + parts = [f"⚕ {snapshot['model_short']}", context_label, percent_label] + if cost_label: + parts.append(cost_label) + parts.append(duration_label) + return " │ ".join(parts) except Exception: return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}" @@ -1300,8 +1334,13 @@ class HermesCLI: try: snapshot = self._get_status_bar_snapshot() width = shutil.get_terminal_size((80, 24)).columns - cost_label = f"${snapshot['session_cost']:.2f}" if snapshot["pricing_known"] else "cost n/a" duration_label = snapshot["duration"] + show_cost = getattr(self, "show_cost", False) + + if show_cost: + cost_label = f"${snapshot['session_cost']:.2f}" if snapshot["pricing_known"] else "cost n/a" + else: + cost_label = None if width < 52: return [ @@ -1315,17 +1354,23 @@ class HermesCLI: percent = snapshot["context_percent"] percent_label = f"{percent}%" if percent is not None else "--" if width < 76: - return [ + frags = [ ("class:status-bar", " ⚕ "), ("class:status-bar-strong", snapshot["model_short"]), ("class:status-bar-dim", " · "), (self._status_bar_context_style(percent), percent_label), - ("class:status-bar-dim", " · "), - ("class:status-bar-dim", cost_label), + ] + if cost_label: + frags.extend([ + ("class:status-bar-dim", " · "), + ("class:status-bar-dim", cost_label), + ]) + frags.extend([ ("class:status-bar-dim", " · "), ("class:status-bar-dim", duration_label), ("class:status-bar", " "), - ] + ]) + return frags if snapshot["context_length"]: ctx_total = _format_context_length(snapshot["context_length"]) @@ -1335,7 +1380,7 @@ class HermesCLI: context_label = "ctx --" bar_style = self._status_bar_context_style(percent) - return [ + frags = [ ("class:status-bar", " ⚕ "), ("class:status-bar-strong", snapshot["model_short"]), ("class:status-bar-dim", " │ "), @@ -1344,12 +1389,18 @@ class HermesCLI: (bar_style, self._build_context_bar(percent)), ("class:status-bar-dim", " "), (bar_style, percent_label), - ("class:status-bar-dim", " │ "), - ("class:status-bar-dim", cost_label), + ] + if cost_label: + frags.extend([ + ("class:status-bar-dim", " │ "), + ("class:status-bar-dim", cost_label), + ]) + frags.extend([ ("class:status-bar-dim", " │ "), ("class:status-bar-dim", duration_label), ("class:status-bar", " "), - ] + ]) + return frags except Exception: return [("class:status-bar", f" {self._build_status_bar_text()} ")] @@ -1598,6 +1649,8 @@ class HermesCLI: return "Processing skills command..." if cmd_lower == "/reload-mcp": return "Reloading MCP servers..." + if cmd_lower.startswith("/browser"): + return "Configuring browser..." return "Processing command..." def _command_spinner_frame(self) -> str: @@ -1674,10 +1727,27 @@ class HermesCLI: # routing, or the effective model changed. if (credentials_changed or routing_changed or model_changed) and self.agent is not None: self.agent = None + self._active_agent_route_signature = None return True - def _init_agent(self) -> bool: + def _resolve_turn_agent_config(self, user_message: str) -> dict: + """Resolve model/runtime overrides for a single user turn.""" + from agent.smart_model_routing import resolve_turn_route + + return resolve_turn_route( + user_message, + self._smart_model_routing, + { + "model": self.model, + "api_key": self.api_key, + "base_url": self.base_url, + "provider": self.provider, + "api_mode": self.api_mode, + }, + ) + + def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None) -> bool: """ Initialize the agent on first use. When resuming a session, restores conversation history from SQLite. @@ -1737,12 +1807,19 @@ class HermesCLI: pass try: + runtime = runtime_override or { + "api_key": self.api_key, + "base_url": self.base_url, + "provider": self.provider, + "api_mode": self.api_mode, + } + effective_model = model_override or self.model self.agent = AIAgent( - model=self.model, - api_key=self.api_key, - base_url=self.base_url, - provider=self.provider, - api_mode=self.api_mode, + model=effective_model, + api_key=runtime.get("api_key"), + base_url=runtime.get("base_url"), + provider=runtime.get("provider"), + api_mode=runtime.get("api_mode"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, verbose_logging=self.verbose, @@ -1774,7 +1851,13 @@ class HermesCLI: tool_progress_callback=self._on_tool_progress, stream_delta_callback=self._stream_delta if self.streaming_enabled else None, ) - # Apply any pending title now that the session exists in the DB + self._active_agent_route_signature = ( + effective_model, + runtime.get("provider"), + runtime.get("base_url"), + runtime.get("api_mode"), + ) + if self._pending_title and self._session_db: try: self._session_db.set_session_title(self.session_id, self._pending_title) @@ -2170,6 +2253,26 @@ class HermesCLI: # Treat as a git hash return ref + def _handle_stop_command(self): + """Handle /stop — kill all running background processes. + + Inspired by OpenAI Codex's separation of interrupt (stop current turn) + from /stop (clean up background processes). See openai/codex#14602. + """ + from tools.process_registry import get_registry + + registry = get_registry() + processes = registry.list_processes() + running = [p for p in processes if p.get("status") == "running"] + + if not running: + print(" No running background processes.") + return + + print(f" Stopping {len(running)} background process(es)...") + killed = registry.kill_all() + print(f" ✅ Stopped {killed} process(es).") + def _handle_paste_command(self): """Handle /paste — explicitly check clipboard for an image. @@ -3418,8 +3521,33 @@ class HermesCLI: elif cmd_lower == "/reload-mcp": with self._busy_command(self._slow_command_status(cmd_original)): self._reload_mcp() + elif cmd_lower.startswith("/browser"): + self._handle_browser_command(cmd_original) + elif cmd_lower == "/plugins": + try: + from hermes_cli.plugins import get_plugin_manager + mgr = get_plugin_manager() + plugins = mgr.list_plugins() + if not plugins: + print("No plugins installed.") + print(f"Drop plugin directories into ~/.hermes/plugins/ to get started.") + else: + print(f"Plugins ({len(plugins)}):") + for p in plugins: + status = "✓" if p["enabled"] else "✗" + version = f" v{p['version']}" if p["version"] else "" + tools = f"{p['tools']} tools" if p["tools"] else "" + hooks = f"{p['hooks']} hooks" if p["hooks"] else "" + parts = [x for x in [tools, hooks] if x] + detail = f" ({', '.join(parts)})" if parts else "" + error = f" — {p['error']}" if p["error"] else "" + print(f" {status} {p['name']}{version}{detail}{error}") + except Exception as e: + print(f"Plugin system error: {e}") elif cmd_lower.startswith("/rollback"): self._handle_rollback_command(cmd_original) + elif cmd_lower == "/stop": + self._handle_stop_command() elif cmd_lower.startswith("/background"): self._handle_background_command(cmd_original) elif cmd_lower.startswith("/skin"): @@ -3552,14 +3680,16 @@ class HermesCLI: _cprint(f" Task ID: {task_id}") _cprint(f" You can continue chatting — results will appear when done.\n") + turn_route = self._resolve_turn_agent_config(prompt) + def run_background(): try: bg_agent = AIAgent( - model=self.model, - api_key=self.api_key, - base_url=self.base_url, - provider=self.provider, - api_mode=self.api_mode, + model=turn_route["model"], + api_key=turn_route["runtime"].get("api_key"), + base_url=turn_route["runtime"].get("base_url"), + provider=turn_route["runtime"].get("provider"), + api_mode=turn_route["runtime"].get("api_mode"), max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, quiet_mode=True, @@ -3634,6 +3764,210 @@ class HermesCLI: self._background_tasks[task_id] = thread thread.start() + @staticmethod + def _try_launch_chrome_debug(port: int, system: str) -> bool: + """Try to launch Chrome/Chromium with remote debugging enabled. + + Returns True if a launch command was executed (doesn't guarantee success). + """ + import shutil + import subprocess as _sp + + candidates = [] + if system == "Darwin": + # macOS: try common app bundle locations + for app in ( + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Chromium.app/Contents/MacOS/Chromium", + "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser", + "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge", + ): + if os.path.isfile(app): + candidates.append(app) + else: + # Linux: try common binary names + for name in ("google-chrome", "google-chrome-stable", "chromium-browser", + "chromium", "brave-browser", "microsoft-edge"): + path = shutil.which(name) + if path: + candidates.append(path) + + if not candidates: + return False + + chrome = candidates[0] + try: + _sp.Popen( + [chrome, f"--remote-debugging-port={port}"], + stdout=_sp.DEVNULL, + stderr=_sp.DEVNULL, + start_new_session=True, # detach from terminal + ) + return True + except Exception: + return False + + def _handle_browser_command(self, cmd: str): + """Handle /browser connect|disconnect|status — manage live Chrome CDP connection.""" + import platform as _plat + import subprocess as _sp + + parts = cmd.strip().split(None, 1) + sub = parts[1].lower().strip() if len(parts) > 1 else "status" + + _DEFAULT_CDP = "ws://localhost:9222" + current = os.environ.get("BROWSER_CDP_URL", "").strip() + + if sub.startswith("connect"): + # Optionally accept a custom CDP URL: /browser connect ws://host:port + connect_parts = cmd.strip().split(None, 2) # ["/browser", "connect", "ws://..."] + cdp_url = connect_parts[2].strip() if len(connect_parts) > 2 else _DEFAULT_CDP + + # Clear any existing browser sessions so the next tool call uses the new backend + try: + from tools.browser_tool import cleanup_all_browsers + cleanup_all_browsers() + except Exception: + pass + + print() + + # Extract port for connectivity checks + _port = 9222 + try: + _port = int(cdp_url.rsplit(":", 1)[-1].split("/")[0]) + except (ValueError, IndexError): + pass + + # Check if Chrome is already listening on the debug port + import socket + _already_open = False + try: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(1) + s.connect(("127.0.0.1", _port)) + s.close() + _already_open = True + except (OSError, socket.timeout): + pass + + if _already_open: + print(f" ✓ Chrome is already listening on port {_port}") + elif cdp_url == _DEFAULT_CDP: + # Try to auto-launch Chrome with remote debugging + print(" Chrome isn't running with remote debugging — attempting to launch...") + _launched = self._try_launch_chrome_debug(_port, _plat.system()) + if _launched: + # Wait for the port to come up + import time as _time + for _wait in range(10): + try: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(1) + s.connect(("127.0.0.1", _port)) + s.close() + _already_open = True + break + except (OSError, socket.timeout): + _time.sleep(0.5) + if _already_open: + print(f" ✓ Chrome launched and listening on port {_port}") + else: + print(f" ⚠ Chrome launched but port {_port} isn't responding yet") + print(" You may need to close existing Chrome windows first and retry") + else: + print(f" ⚠ Could not auto-launch Chrome") + # Show manual instructions as fallback + sys_name = _plat.system() + if sys_name == "Darwin": + chrome_cmd = 'open -a "Google Chrome" --args --remote-debugging-port=9222' + elif sys_name == "Windows": + chrome_cmd = 'chrome.exe --remote-debugging-port=9222' + else: + chrome_cmd = "google-chrome --remote-debugging-port=9222" + print(f" Launch Chrome manually: {chrome_cmd}") + else: + print(f" ⚠ Port {_port} is not reachable at {cdp_url}") + + os.environ["BROWSER_CDP_URL"] = cdp_url + print() + print("🌐 Browser connected to live Chrome via CDP") + print(f" Endpoint: {cdp_url}") + print() + + # Inject context message so the model knows + if hasattr(self, '_pending_input'): + self._pending_input.put( + "[System note: The user has connected your browser tools to their live Chrome browser " + "via Chrome DevTools Protocol. Your browser_navigate, browser_snapshot, browser_click, " + "and other browser tools now control their real browser — including any pages they have " + "open, logged-in sessions, and cookies. They likely opened specific sites or logged into " + "services before connecting. Please await their instruction before attempting to operate " + "the browser. When you do act, be mindful that your actions affect their real browser — " + "don't close tabs or navigate away from pages without asking.]" + ) + + elif sub == "disconnect": + if current: + os.environ.pop("BROWSER_CDP_URL", None) + try: + from tools.browser_tool import cleanup_all_browsers + cleanup_all_browsers() + except Exception: + pass + print() + print("🌐 Browser disconnected from live Chrome") + print(" Browser tools reverted to default mode (local headless or Browserbase)") + print() + + if hasattr(self, '_pending_input'): + self._pending_input.put( + "[System note: The user has disconnected the browser tools from their live Chrome. " + "Browser tools are back to default mode (headless local browser or Browserbase cloud).]" + ) + else: + print() + print("Browser is not connected to live Chrome (already using default mode)") + print() + + elif sub == "status": + print() + if current: + print(f"🌐 Browser: connected to live Chrome via CDP") + print(f" Endpoint: {current}") + + _port = 9222 + try: + _port = int(current.rsplit(":", 1)[-1].split("/")[0]) + except (ValueError, IndexError): + pass + try: + import socket + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(1) + s.connect(("127.0.0.1", _port)) + s.close() + print(f" Status: ✓ reachable") + except (OSError, Exception): + print(f" Status: ⚠ not reachable (Chrome may not be running)") + elif os.environ.get("BROWSERBASE_API_KEY"): + print("🌐 Browser: Browserbase (cloud)") + else: + print("🌐 Browser: local headless Chromium (agent-browser)") + print() + print(" /browser connect — connect to your live Chrome") + print(" /browser disconnect — revert to default") + print() + + else: + print() + print("Usage: /browser connect|disconnect|status") + print() + print(" connect Connect browser tools to your live Chrome session") + print(" disconnect Revert to default browser backend") + print(" status Show current browser mode") + print() + def _handle_skin_command(self, cmd: str): """Handle /skin [name] — show or change the display skin.""" try: @@ -4779,8 +5113,16 @@ class HermesCLI: if not self._ensure_runtime_credentials(): return None + turn_route = self._resolve_turn_agent_config(message) + if turn_route["signature"] != self._active_agent_route_signature: + self.agent = None + # Initialize agent if needed - if not self._init_agent(): + if not self._init_agent( + model_override=turn_route["model"], + runtime_override=turn_route["runtime"], + route_label=turn_route["label"], + ): return None # Pre-process images through the vision tool (Gemini Flash) so the @@ -6521,13 +6863,21 @@ def main( # Quiet mode: suppress banner, spinner, tool previews. # Only print the final response and parseable session info. cli.tool_progress_mode = "off" - if cli._init_agent(): - cli.agent.quiet_mode = True - result = cli.agent.run_conversation(query) - response = result.get("final_response", "") if isinstance(result, dict) else str(result) - if response: - print(response) - print(f"\nsession_id: {cli.session_id}") + if cli._ensure_runtime_credentials(): + turn_route = cli._resolve_turn_agent_config(query) + if turn_route["signature"] != cli._active_agent_route_signature: + cli.agent = None + if cli._init_agent( + model_override=turn_route["model"], + runtime_override=turn_route["runtime"], + route_label=turn_route["label"], + ): + cli.agent.quiet_mode = True + result = cli.agent.run_conversation(query) + response = result.get("final_response", "") if isinstance(result, dict) else str(result) + if response: + print(response) + print(f"\nsession_id: {cli.session_id}") else: cli.show_banner() cli.console.print(f"[bold blue]Query:[/] {query}") diff --git a/cron/scheduler.py b/cron/scheduler.py index 8d75e1a95..ded88ef53 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -315,6 +315,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: # Provider routing pr = _cfg.get("provider_routing", {}) + smart_routing = _cfg.get("smart_model_routing", {}) or {} from hermes_cli.runtime_provider import ( resolve_runtime_provider, @@ -331,12 +332,25 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: message = format_runtime_provider_error(exc) raise RuntimeError(message) from exc + from agent.smart_model_routing import resolve_turn_route + turn_route = resolve_turn_route( + prompt, + smart_routing, + { + "model": model, + "api_key": runtime.get("api_key"), + "base_url": runtime.get("base_url"), + "provider": runtime.get("provider"), + "api_mode": runtime.get("api_mode"), + }, + ) + agent = AIAgent( - model=model, - api_key=runtime.get("api_key"), - base_url=runtime.get("base_url"), - provider=runtime.get("provider"), - api_mode=runtime.get("api_mode"), + model=turn_route["model"], + api_key=turn_route["runtime"].get("api_key"), + base_url=turn_route["runtime"].get("base_url"), + provider=turn_route["runtime"].get("provider"), + api_mode=turn_route["runtime"].get("api_mode"), max_iterations=max_iterations, reasoning_config=reasoning_config, prefill_messages=prefill_messages, diff --git a/gateway/platforms/email.py b/gateway/platforms/email.py index 36d34f98e..d37348c99 100644 --- a/gateway/platforms/email.py +++ b/gateway/platforms/email.py @@ -135,14 +135,23 @@ def _extract_email_address(raw: str) -> str: return raw.strip().lower() -def _extract_attachments(msg: email_lib.message.Message) -> List[Dict[str, Any]]: - """Extract attachment metadata and cache files locally.""" +def _extract_attachments( + msg: email_lib.message.Message, + skip_attachments: bool = False, +) -> List[Dict[str, Any]]: + """Extract attachment metadata and cache files locally. + + When *skip_attachments* is True, all attachment/inline parts are ignored + (useful for malware protection or bandwidth savings). + """ attachments = [] if not msg.is_multipart(): return attachments for part in msg.walk(): disposition = str(part.get("Content-Disposition", "")) + if skip_attachments and ("attachment" in disposition or "inline" in disposition): + continue if "attachment" not in disposition and "inline" not in disposition: continue # Skip text/plain and text/html body parts @@ -196,6 +205,13 @@ class EmailAdapter(BasePlatformAdapter): self._smtp_port = int(os.getenv("EMAIL_SMTP_PORT", "587")) self._poll_interval = int(os.getenv("EMAIL_POLL_INTERVAL", "15")) + # Skip attachments — configured via config.yaml: + # platforms: + # email: + # skip_attachments: true + extra = config.extra or {} + self._skip_attachments = extra.get("skip_attachments", False) + # Track message IDs we've already processed to avoid duplicates self._seen_uids: set = set() self._poll_task: Optional[asyncio.Task] = None @@ -306,7 +322,7 @@ class EmailAdapter(BasePlatformAdapter): message_id = msg.get("Message-ID", "") in_reply_to = msg.get("In-Reply-To", "") body = _extract_text_body(msg) - attachments = _extract_attachments(msg) + attachments = _extract_attachments(msg, skip_attachments=self._skip_attachments) results.append({ "uid": uid, diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 08750faed..47d2ae551 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -202,8 +202,26 @@ class TelegramAdapter(BasePlatformAdapter): self._handle_media_message )) - # Start polling in background - await self._app.initialize() + # Start polling — retry initialize() for transient TLS resets + try: + from telegram.error import NetworkError, TimedOut + except ImportError: + NetworkError = TimedOut = OSError # type: ignore[misc,assignment] + _max_connect = 3 + for _attempt in range(_max_connect): + try: + await self._app.initialize() + break + except (NetworkError, TimedOut, OSError) as init_err: + if _attempt < _max_connect - 1: + wait = 2 ** _attempt + logger.warning( + "[%s] Connect attempt %d/%d failed: %s — retrying in %ds", + self.name, _attempt + 1, _max_connect, init_err, wait, + ) + await asyncio.sleep(wait) + else: + raise await self._app.start() loop = asyncio.get_running_loop() @@ -265,6 +283,8 @@ class TelegramAdapter(BasePlatformAdapter): release_scoped_lock("telegram-bot-token", self._token_lock_identity) except Exception: pass + message = f"Telegram startup failed: {e}" + self._set_fatal_error("telegram_connect_error", message, retryable=True) logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True) return False @@ -334,32 +354,47 @@ class TelegramAdapter(BasePlatformAdapter): message_ids = [] thread_id = metadata.get("thread_id") if metadata else None + try: + from telegram.error import NetworkError as _NetErr + except ImportError: + _NetErr = OSError # type: ignore[misc,assignment] + for i, chunk in enumerate(chunks): - # Try Markdown first, fall back to plain text if it fails - try: - msg = await self._bot.send_message( - chat_id=int(chat_id), - text=chunk, - parse_mode=ParseMode.MARKDOWN_V2, - reply_to_message_id=int(reply_to) if reply_to and i == 0 else None, - message_thread_id=int(thread_id) if thread_id else None, - ) - except Exception as md_error: - # Markdown parsing failed, try plain text - if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower(): - logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error) - # Strip MDV2 escape backslashes so the user doesn't - # see raw backslashes littered through the message. - plain_chunk = _strip_mdv2(chunk) - msg = await self._bot.send_message( - chat_id=int(chat_id), - text=plain_chunk, - parse_mode=None, # Plain text - reply_to_message_id=int(reply_to) if reply_to and i == 0 else None, - message_thread_id=int(thread_id) if thread_id else None, - ) - else: - raise # Re-raise if not a parse error + msg = None + for _send_attempt in range(3): + try: + # Try Markdown first, fall back to plain text if it fails + try: + msg = await self._bot.send_message( + chat_id=int(chat_id), + text=chunk, + parse_mode=ParseMode.MARKDOWN_V2, + reply_to_message_id=int(reply_to) if reply_to and i == 0 else None, + message_thread_id=int(thread_id) if thread_id else None, + ) + except Exception as md_error: + # Markdown parsing failed, try plain text + if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower(): + logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error) + plain_chunk = _strip_mdv2(chunk) + msg = await self._bot.send_message( + chat_id=int(chat_id), + text=plain_chunk, + parse_mode=None, + reply_to_message_id=int(reply_to) if reply_to and i == 0 else None, + message_thread_id=int(thread_id) if thread_id else None, + ) + else: + raise + break # success + except _NetErr as send_err: + if _send_attempt < 2: + wait = 2 ** _send_attempt + logger.warning("[%s] Network error on send (attempt %d/3), retrying in %ds: %s", + self.name, _send_attempt + 1, wait, send_err) + await asyncio.sleep(wait) + else: + raise message_ids.append(str(msg.message_id)) return SendResult( diff --git a/gateway/run.py b/gateway/run.py index 71f453d88..66c4f0502 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -157,6 +157,12 @@ if _config_path.exists(): "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL", "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY", }, + "approval": { + "provider": "AUXILIARY_APPROVAL_PROVIDER", + "model": "AUXILIARY_APPROVAL_MODEL", + "base_url": "AUXILIARY_APPROVAL_BASE_URL", + "api_key": "AUXILIARY_APPROVAL_API_KEY", + }, } for _task_key, _env_map in _aux_task_env.items(): _task_cfg = _auxiliary_cfg.get(_task_key, {}) @@ -318,6 +324,7 @@ class GatewayRunner: self._show_reasoning = self._load_show_reasoning() self._provider_routing = self._load_provider_routing() self._fallback_model = self._load_fallback_model() + self._smart_model_routing = self._load_smart_model_routing() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -587,6 +594,18 @@ class GatewayRunner: group_sessions_per_user=getattr(config, "group_sessions_per_user", True), ) + def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict: + from agent.smart_model_routing import resolve_turn_route + + primary = { + "model": model, + "api_key": runtime_kwargs.get("api_key"), + "base_url": runtime_kwargs.get("base_url"), + "provider": runtime_kwargs.get("provider"), + "api_mode": runtime_kwargs.get("api_mode"), + } + return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary) + async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None: """React to a non-retryable adapter failure after startup.""" logger.error( @@ -789,6 +808,20 @@ class GatewayRunner: pass return None + @staticmethod + def _load_smart_model_routing() -> dict: + """Load optional smart cheap-vs-strong model routing config.""" + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path, encoding="utf-8") as _f: + cfg = _y.safe_load(_f) or {} + return cfg.get("smart_model_routing", {}) or {} + except Exception: + pass + return {} + async def start(self) -> bool: """ Start the gateway and all configured platform adapters. @@ -831,12 +864,15 @@ class GatewayRunner: logger.warning("Process checkpoint recovery: %s", e) connected_count = 0 + enabled_platform_count = 0 startup_nonretryable_errors: list[str] = [] + startup_retryable_errors: list[str] = [] # Initialize and connect each configured platform for platform, platform_config in self.config.platforms.items(): if not platform_config.enabled: continue + enabled_platform_count += 1 adapter = self._create_adapter(platform, platform_config) if not adapter: @@ -858,12 +894,22 @@ class GatewayRunner: logger.info("✓ %s connected", platform.value) else: logger.warning("✗ %s failed to connect", platform.value) - if adapter.has_fatal_error and not adapter.fatal_error_retryable: - startup_nonretryable_errors.append( + if adapter.has_fatal_error: + target = ( + startup_retryable_errors + if adapter.fatal_error_retryable + else startup_nonretryable_errors + ) + target.append( f"{platform.value}: {adapter.fatal_error_message}" ) + else: + startup_retryable_errors.append( + f"{platform.value}: failed to connect" + ) except Exception as e: logger.error("✗ %s error: %s", platform.value, e) + startup_retryable_errors.append(f"{platform.value}: {e}") if connected_count == 0: if startup_nonretryable_errors: @@ -876,7 +922,16 @@ class GatewayRunner: pass self._request_clean_exit(reason) return True - logger.warning("No messaging platforms connected.") + if enabled_platform_count > 0: + reason = "; ".join(startup_retryable_errors) or "all configured messaging platforms failed to connect" + logger.error("Gateway failed to connect any configured messaging platform: %s", reason) + try: + from gateway.status import write_runtime_status + write_runtime_status(gateway_state="startup_failed", exit_reason=reason) + except Exception: + pass + return False + logger.warning("No messaging platforms enabled.") logger.info("Gateway will continue running for cron job execution.") # Update delivery router with adapters @@ -1430,8 +1485,17 @@ class GatewayRunner: # Set environment variables for tools self._set_session_env(context) + # Read privacy.redact_pii from config (re-read per message) + _redact_pii = False + try: + with open(_config_path, encoding="utf-8") as _pf: + _pcfg = yaml.safe_load(_pf) or {} + _redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False)) + except Exception: + pass + # Build the context prompt to inject - context_prompt = build_session_context_prompt(context) + context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii) # If the previous session expired and was auto-reset, prepend a notice # so the agent knows this is a fresh conversation (not an intentional /reset). @@ -2899,11 +2963,12 @@ class GatewayRunner: max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90")) reasoning_config = self._load_reasoning_config() self._reasoning_config = reasoning_config + turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs) def run_sync(): agent = AIAgent( - model=model, - **runtime_kwargs, + model=turn_route["model"], + **turn_route["runtime"], max_iterations=max_iterations, quiet_mode=True, verbose_logging=False, @@ -3625,13 +3690,9 @@ class GatewayRunner: 1. Immediately understand what the user sent (no extra tool call). 2. Re-examine the image with vision_analyze if it needs more detail. - Athabasca persistence should happen through Athabasca's own POST - /api/uploads flow, using the returned asset.publicUrl rather than local - cache paths. - Args: - user_text: The user's original caption / message text. - image_paths: List of local file paths to cached images. + user_text: The user's original caption / message text. + image_paths: List of local file paths to cached images. Returns: The enriched message string with vision descriptions prepended. @@ -3656,16 +3717,10 @@ class GatewayRunner: result = _json.loads(result_json) if result.get("success"): description = result.get("analysis", "") - athabasca_note = ( - "\n[If this image needs to persist in Athabasca state, upload the cached file " - "through Athabasca POST /api/uploads and use the returned asset.publicUrl. " - "Do not store the local cache path as the canonical imageUrl.]" - ) enriched_parts.append( f"[The user sent an image~ Here's what I can see:\n{description}]\n" f"[If you need a closer look, use vision_analyze with " f"image_url: {path} ~]" - f"{athabasca_note}" ) else: enriched_parts.append( @@ -4177,9 +4232,10 @@ class GatewayRunner: except Exception as _sc_err: logger.debug("Could not set up stream consumer: %s", _sc_err) + turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs) agent = AIAgent( - model=model, - **runtime_kwargs, + model=turn_route["model"], + **turn_route["runtime"], max_iterations=max_iterations, quiet_mode=True, verbose_logging=False, diff --git a/gateway/session.py b/gateway/session.py index 23971a912..d0bf0cfe4 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -8,9 +8,11 @@ Handles: - Dynamic system prompt injection (agent knows its context) """ +import hashlib import logging import os import json +import re import uuid from pathlib import Path from datetime import datetime, timedelta @@ -19,6 +21,41 @@ from typing import Dict, List, Optional, Any logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# PII redaction helpers +# --------------------------------------------------------------------------- + +_PHONE_RE = re.compile(r"^\+?\d[\d\-\s]{6,}$") + + +def _hash_id(value: str) -> str: + """Deterministic 12-char hex hash of an identifier.""" + return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12] + + +def _hash_sender_id(value: str) -> str: + """Hash a sender ID to ``user_<12hex>``.""" + return f"user_{_hash_id(value)}" + + +def _hash_chat_id(value: str) -> str: + """Hash the numeric portion of a chat ID, preserving platform prefix. + + ``telegram:12345`` → ``telegram:`` + ``12345`` → ```` + """ + colon = value.find(":") + if colon > 0: + prefix = value[:colon] + return f"{prefix}:{_hash_id(value[colon + 1:])}" + return _hash_id(value) + + +def _looks_like_phone(value: str) -> bool: + """Return True if *value* looks like a phone number (E.164 or similar).""" + return bool(_PHONE_RE.match(value.strip())) + from .config import ( Platform, GatewayConfig, @@ -146,7 +183,21 @@ class SessionContext: } -def build_session_context_prompt(context: SessionContext) -> str: +_PII_SAFE_PLATFORMS = frozenset({ + Platform.WHATSAPP, + Platform.SIGNAL, + Platform.TELEGRAM, +}) +"""Platforms where user IDs can be safely redacted (no in-message mention system +that requires raw IDs). Discord is excluded because mentions use ``<@user_id>`` +and the LLM needs the real ID to tag users.""" + + +def build_session_context_prompt( + context: SessionContext, + *, + redact_pii: bool = False, +) -> str: """ Build the dynamic system prompt section that tells the agent about its context. @@ -154,7 +205,15 @@ def build_session_context_prompt(context: SessionContext) -> str: - Where messages are coming from - What platforms are connected - Where it can deliver scheduled task outputs + + When *redact_pii* is True **and** the source platform is in + ``_PII_SAFE_PLATFORMS``, phone numbers are stripped and user/chat IDs + are replaced with deterministic hashes before being sent to the LLM. + Platforms like Discord are excluded because mentions need real IDs. + Routing still uses the original values (they stay in SessionSource). """ + # Only apply redaction on platforms where IDs aren't needed for mentions + redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS lines = [ "## Current Session Context", "", @@ -165,7 +224,25 @@ def build_session_context_prompt(context: SessionContext) -> str: if context.source.platform == Platform.LOCAL: lines.append(f"**Source:** {platform_name} (the machine running this agent)") else: - lines.append(f"**Source:** {platform_name} ({context.source.description})") + # Build a description that respects PII redaction + src = context.source + if redact_pii: + # Build a safe description without raw IDs + _uname = src.user_name or ( + _hash_sender_id(src.user_id) if src.user_id else "user" + ) + _cname = src.chat_name or _hash_chat_id(src.chat_id) + if src.chat_type == "dm": + desc = f"DM with {_uname}" + elif src.chat_type == "group": + desc = f"group: {_cname}" + elif src.chat_type == "channel": + desc = f"channel: {_cname}" + else: + desc = _cname + else: + desc = src.description + lines.append(f"**Source:** {platform_name} ({desc})") # Channel topic (if available - provides context about the channel's purpose) if context.source.chat_topic: @@ -175,7 +252,10 @@ def build_session_context_prompt(context: SessionContext) -> str: if context.source.user_name: lines.append(f"**User:** {context.source.user_name}") elif context.source.user_id: - lines.append(f"**User ID:** {context.source.user_id}") + uid = context.source.user_id + if redact_pii: + uid = _hash_sender_id(uid) + lines.append(f"**User ID:** {uid}") # Platform-specific behavioral notes if context.source.platform == Platform.SLACK: @@ -210,7 +290,8 @@ def build_session_context_prompt(context: SessionContext) -> str: lines.append("") lines.append("**Home Channels (default destinations):**") for platform, home in context.home_channels.items(): - lines.append(f" - {platform.value}: {home.name} (ID: {home.chat_id})") + hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id + lines.append(f" - {platform.value}: {home.name} (ID: {hc_id})") # Delivery options for scheduled tasks lines.append("") @@ -220,7 +301,10 @@ def build_session_context_prompt(context: SessionContext) -> str: if context.source.platform == Platform.LOCAL: lines.append("- `\"origin\"` → Local output (saved to files)") else: - lines.append(f"- `\"origin\"` → Back to this chat ({context.source.chat_name or context.source.chat_id})") + _origin_label = context.source.chat_name or ( + _hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id + ) + lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})") # Local always available lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)") diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 7e964bd4e..baeb767c0 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -7,7 +7,9 @@ interactive CLI. from __future__ import annotations +import os from collections.abc import Callable, Mapping +from pathlib import Path from typing import Any from prompt_toolkit.completion import Completer, Completion @@ -26,6 +28,7 @@ COMMANDS_BY_CATEGORY = { "/title": "Set a title for the current session (usage: /title My Session Name)", "/compress": "Manually compress conversation context (flush memories + summarize)", "/rollback": "List or restore filesystem checkpoints (usage: /rollback [number])", + "/stop": "Kill all running background processes", "/background": "Run a prompt in the background (usage: /background )", }, "Configuration": { @@ -45,6 +48,8 @@ COMMANDS_BY_CATEGORY = { "/skills": "Search, install, inspect, or manage skills from online registries", "/cron": "Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove)", "/reload-mcp": "Reload MCP servers from config.yaml", + "/browser": "Connect browser tools to your live Chrome (usage: /browser connect|disconnect|status)", + "/plugins": "List installed plugins and their status", }, "Info": { "/help": "Show this help message", @@ -92,9 +97,88 @@ class SlashCommandCompleter(Completer): """ return f"{cmd_name} " if cmd_name == word else cmd_name + @staticmethod + def _extract_path_word(text: str) -> str | None: + """Extract the current word if it looks like a file path. + + Returns the path-like token under the cursor, or None if the + current word doesn't look like a path. A word is path-like when + it starts with ``./``, ``../``, ``~/``, ``/``, or contains a + ``/`` separator (e.g. ``src/main.py``). + """ + if not text: + return None + # Walk backwards to find the start of the current "word". + # Words are delimited by spaces, but paths can contain almost anything. + i = len(text) - 1 + while i >= 0 and text[i] != " ": + i -= 1 + word = text[i + 1:] + if not word: + return None + # Only trigger path completion for path-like tokens + if word.startswith(("./", "../", "~/", "/")) or "/" in word: + return word + return None + + @staticmethod + def _path_completions(word: str, limit: int = 30): + """Yield Completion objects for file paths matching *word*.""" + expanded = os.path.expanduser(word) + # Split into directory part and prefix to match inside it + if expanded.endswith("/"): + search_dir = expanded + prefix = "" + else: + search_dir = os.path.dirname(expanded) or "." + prefix = os.path.basename(expanded) + + try: + entries = os.listdir(search_dir) + except OSError: + return + + count = 0 + prefix_lower = prefix.lower() + for entry in sorted(entries): + if prefix and not entry.lower().startswith(prefix_lower): + continue + if count >= limit: + break + + full_path = os.path.join(search_dir, entry) + is_dir = os.path.isdir(full_path) + + # Build the completion text (what replaces the typed word) + if word.startswith("~"): + display_path = "~/" + os.path.relpath(full_path, os.path.expanduser("~")) + elif os.path.isabs(word): + display_path = full_path + else: + # Keep relative + display_path = os.path.relpath(full_path) + + if is_dir: + display_path += "/" + + suffix = "/" if is_dir else "" + meta = "dir" if is_dir else _file_size_label(full_path) + + yield Completion( + display_path, + start_position=-len(word), + display=entry + suffix, + display_meta=meta, + ) + count += 1 + def get_completions(self, document, complete_event): text = document.text_before_cursor if not text.startswith("/"): + # Try file path completion for non-slash input + path_word = self._extract_path_word(text) + if path_word is not None: + yield from self._path_completions(path_word) return word = text[1:] @@ -120,3 +204,18 @@ class SlashCommandCompleter(Completer): display=cmd, display_meta=f"⚡ {short_desc}", ) + + +def _file_size_label(path: str) -> str: + """Return a compact human-readable file size, or '' on error.""" + try: + size = os.path.getsize(path) + except OSError: + return "" + if size < 1024: + return f"{size}B" + if size < 1024 * 1024: + return f"{size / 1024:.0f}K" + if size < 1024 * 1024 * 1024: + return f"{size / (1024 * 1024):.1f}M" + return f"{size / (1024 * 1024 * 1024):.1f}G" diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 9c07153df..c3a4c701a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -118,6 +118,9 @@ DEFAULT_CONFIG = { # Each entry is "host_path:container_path" (standard Docker -v syntax). # Example: ["/home/user/projects:/workspace/projects", "/data:/data"] "docker_volumes": [], + # Explicit opt-in: mount the host cwd into /workspace for Docker sessions. + # Default off because passing host directories into a sandbox weakens isolation. + "docker_mount_cwd_to_workspace": False, # Persistent shell — keep a long-lived bash shell across execute() calls # so cwd/env vars/shell variables survive between commands. # Enabled by default for non-local backends (SSH); local is always opt-in @@ -144,6 +147,12 @@ DEFAULT_CONFIG = { "summary_model": "google/gemini-3-flash-preview", "summary_provider": "auto", }, + "smart_model_routing": { + "enabled": False, + "max_simple_chars": 160, + "max_simple_words": 28, + "cheap_model": {}, + }, # Auxiliary model config — provider:model for each side task. # Format: provider is the provider name, model is the model slug. @@ -182,6 +191,12 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", }, + "approval": { + "provider": "auto", + "model": "", # fast/cheap model recommended (e.g. gemini-flash, haiku) + "base_url": "", + "api_key": "", + }, "mcp": { "provider": "auto", "model": "", @@ -203,8 +218,14 @@ DEFAULT_CONFIG = { "bell_on_complete": False, "show_reasoning": False, "streaming": False, + "show_cost": False, # Show $ cost in the status bar (off by default) "skin": "default", }, + + # Privacy settings + "privacy": { + "redact_pii": False, # When True, hash user IDs and strip phone numbers from LLM context + }, # Text-to-speech configuration "tts": { @@ -289,6 +310,14 @@ DEFAULT_CONFIG = { "auto_thread": True, # Auto-create threads on @mention in channels (like Slack) }, + # Approval mode for dangerous commands: + # manual — always prompt the user (default) + # smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk + # off — skip all approval prompts (equivalent to --yolo) + "approvals": { + "mode": "manual", + }, + # Permanently allowed dangerous command patterns (added via "always" approval) "command_allowlist": [], # User-defined quick commands that bypass the agent loop (type: exec only) @@ -988,6 +1017,19 @@ _FALLBACK_COMMENT = """ # fallback_model: # provider: openrouter # model: anthropic/claude-sonnet-4 +# +# ── Smart Model Routing ──────────────────────────────────────────────── +# Optional cheap-vs-strong routing for simple turns. +# Keeps the primary model for complex work, but can route short/simple +# messages to a cheaper model across providers. +# +# smart_model_routing: +# enabled: true +# max_simple_chars: 160 +# max_simple_words: 28 +# cheap_model: +# provider: openrouter +# model: google/gemini-2.5-flash """ @@ -1018,6 +1060,19 @@ _COMMENTED_SECTIONS = """ # fallback_model: # provider: openrouter # model: anthropic/claude-sonnet-4 +# +# ── Smart Model Routing ──────────────────────────────────────────────── +# Optional cheap-vs-strong routing for simple turns. +# Keeps the primary model for complex work, but can route short/simple +# messages to a cheaper model across providers. +# +# smart_model_routing: +# enabled: true +# max_simple_chars: 160 +# max_simple_words: 28 +# cheap_model: +# provider: openrouter +# model: google/gemini-2.5-flash """ @@ -1408,6 +1463,7 @@ def set_config_value(key: str, value: str): "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE", "terminal.modal_image": "TERMINAL_MODAL_IMAGE", "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE", + "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "terminal.cwd": "TERMINAL_CWD", "terminal.timeout": "TERMINAL_TIMEOUT", "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR", diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index df9694843..4bc068c1e 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -119,14 +119,35 @@ def is_windows() -> bool: # Service Configuration # ============================================================================= -SERVICE_NAME = "hermes-gateway" +_SERVICE_BASE = "hermes-gateway" SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration" +def get_service_name() -> str: + """Derive a systemd service name scoped to this HERMES_HOME. + + Default ``~/.hermes`` returns ``hermes-gateway`` (backward compatible). + Any other HERMES_HOME appends a short hash so multiple installations + can each have their own systemd service without conflicting. + """ + import hashlib + from pathlib import Path as _Path # local import to avoid monkeypatch interference + home = _Path(os.getenv("HERMES_HOME", _Path.home() / ".hermes")).resolve() + default = (_Path.home() / ".hermes").resolve() + if home == default: + return _SERVICE_BASE + suffix = hashlib.sha256(str(home).encode()).hexdigest()[:8] + return f"{_SERVICE_BASE}-{suffix}" + + +SERVICE_NAME = _SERVICE_BASE # backward-compat for external importers; prefer get_service_name() + + def get_systemd_unit_path(system: bool = False) -> Path: + name = get_service_name() if system: - return Path("/etc/systemd/system") / f"{SERVICE_NAME}.service" - return Path.home() / ".config" / "systemd" / "user" / f"{SERVICE_NAME}.service" + return Path("/etc/systemd/system") / f"{name}.service" + return Path.home() / ".config" / "systemd" / "user" / f"{name}.service" def _systemctl_cmd(system: bool = False) -> list[str]: @@ -350,8 +371,6 @@ def get_hermes_cli_path() -> str: # ============================================================================= def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str: - import shutil - python_path = get_python_path() working_dir = str(PROJECT_ROOT) venv_dir = str(PROJECT_ROOT / "venv") @@ -360,7 +379,8 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) # Build a PATH that includes the venv, node_modules, and standard system dirs sane_path = f"{venv_bin}:{node_bin}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - hermes_cli = shutil.which("hermes") or f"{python_path} -m hermes_cli.main" + + hermes_home = str(Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")).resolve()) if system: username, group_name, home_dir = _system_service_identity(run_as_user) @@ -380,11 +400,12 @@ Environment="USER={username}" Environment="LOGNAME={username}" Environment="PATH={sane_path}" Environment="VIRTUAL_ENV={venv_dir}" +Environment="HERMES_HOME={hermes_home}" Restart=on-failure RestartSec=10 KillMode=mixed KillSignal=SIGTERM -TimeoutStopSec=15 +TimeoutStopSec=60 StandardOutput=journal StandardError=journal @@ -399,15 +420,15 @@ After=network.target [Service] Type=simple ExecStart={python_path} -m hermes_cli.main gateway run --replace -ExecStop={hermes_cli} gateway stop WorkingDirectory={working_dir} Environment="PATH={sane_path}" Environment="VIRTUAL_ENV={venv_dir}" +Environment="HERMES_HOME={hermes_home}" Restart=on-failure RestartSec=10 KillMode=mixed KillSignal=SIGTERM -TimeoutStopSec=15 +TimeoutStopSec=60 StandardOutput=journal StandardError=journal @@ -455,7 +476,7 @@ def _print_linger_enable_warning(username: str, detail: str | None = None) -> No print(f" sudo loginctl enable-linger {username}") print() print(" Then restart the gateway:") - print(f" systemctl --user restart {SERVICE_NAME}.service") + print(f" systemctl --user restart {get_service_name()}.service") print() @@ -526,7 +547,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str unit_path.write_text(generate_systemd_unit(system=system, run_as_user=run_as_user), encoding="utf-8") subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True) - subprocess.run(_systemctl_cmd(system) + ["enable", SERVICE_NAME], check=True) + subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True) print() print(f"✓ {_service_scope_label(system).capitalize()} service installed and enabled!") @@ -534,7 +555,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str print("Next steps:") print(f" {'sudo ' if system else ''}hermes gateway start{scope_flag} # Start the service") print(f" {'sudo ' if system else ''}hermes gateway status{scope_flag} # Check status") - print(f" {'journalctl' if system else 'journalctl --user'} -u {SERVICE_NAME} -f # View logs") + print(f" {'journalctl' if system else 'journalctl --user'} -u {get_service_name()} -f # View logs") print() if system: @@ -552,8 +573,8 @@ def systemd_uninstall(system: bool = False): if system: _require_root_for_system_service("uninstall") - subprocess.run(_systemctl_cmd(system) + ["stop", SERVICE_NAME], check=False) - subprocess.run(_systemctl_cmd(system) + ["disable", SERVICE_NAME], check=False) + subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=False) + subprocess.run(_systemctl_cmd(system) + ["disable", get_service_name()], check=False) unit_path = get_systemd_unit_path(system=system) if unit_path.exists(): @@ -569,7 +590,7 @@ def systemd_start(system: bool = False): if system: _require_root_for_system_service("start") refresh_systemd_unit_if_needed(system=system) - subprocess.run(_systemctl_cmd(system) + ["start", SERVICE_NAME], check=True) + subprocess.run(_systemctl_cmd(system) + ["start", get_service_name()], check=True) print(f"✓ {_service_scope_label(system).capitalize()} service started") @@ -578,7 +599,7 @@ def systemd_stop(system: bool = False): system = _select_systemd_scope(system) if system: _require_root_for_system_service("stop") - subprocess.run(_systemctl_cmd(system) + ["stop", SERVICE_NAME], check=True) + subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=True) print(f"✓ {_service_scope_label(system).capitalize()} service stopped") @@ -588,7 +609,7 @@ def systemd_restart(system: bool = False): if system: _require_root_for_system_service("restart") refresh_systemd_unit_if_needed(system=system) - subprocess.run(_systemctl_cmd(system) + ["restart", SERVICE_NAME], check=True) + subprocess.run(_systemctl_cmd(system) + ["restart", get_service_name()], check=True) print(f"✓ {_service_scope_label(system).capitalize()} service restarted") @@ -613,12 +634,12 @@ def systemd_status(deep: bool = False, system: bool = False): print() subprocess.run( - _systemctl_cmd(system) + ["status", SERVICE_NAME, "--no-pager"], + _systemctl_cmd(system) + ["status", get_service_name(), "--no-pager"], capture_output=False, ) result = subprocess.run( - _systemctl_cmd(system) + ["is-active", SERVICE_NAME], + _systemctl_cmd(system) + ["is-active", get_service_name()], capture_output=True, text=True, ) @@ -657,7 +678,7 @@ def systemd_status(deep: bool = False, system: bool = False): if deep: print() print("Recent logs:") - subprocess.run(_journalctl_cmd(system) + ["-u", SERVICE_NAME, "-n", "20", "--no-pager"]) + subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]) # ============================================================================= @@ -684,6 +705,7 @@ def generate_launchd_plist() -> str: hermes_cli.main gateway run + --replace WorkingDirectory @@ -707,6 +729,36 @@ def generate_launchd_plist() -> str: """ +def launchd_plist_is_current() -> bool: + """Check if the installed launchd plist matches the currently generated one.""" + plist_path = get_launchd_plist_path() + if not plist_path.exists(): + return False + + installed = plist_path.read_text(encoding="utf-8") + expected = generate_launchd_plist() + return _normalize_service_definition(installed) == _normalize_service_definition(expected) + + +def refresh_launchd_plist_if_needed() -> bool: + """Rewrite the installed launchd plist when the generated definition has changed. + + Unlike systemd, launchd picks up plist changes on the next ``launchctl stop``/ + ``launchctl start`` cycle — no daemon-reload is needed. We still unload/reload + to make launchd re-read the updated plist immediately. + """ + plist_path = get_launchd_plist_path() + if not plist_path.exists() or launchd_plist_is_current(): + return False + + plist_path.write_text(generate_launchd_plist(), encoding="utf-8") + # Unload/reload so launchd picks up the new definition + subprocess.run(["launchctl", "unload", str(plist_path)], check=False) + subprocess.run(["launchctl", "load", str(plist_path)], check=False) + print("↻ Updated gateway launchd service definition to match the current Hermes install") + return True + + def launchd_install(force: bool = False): plist_path = get_launchd_plist_path() @@ -739,6 +791,7 @@ def launchd_uninstall(): print("✓ Service uninstalled") def launchd_start(): + refresh_launchd_plist_if_needed() subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True) print("✓ Service started") @@ -747,6 +800,7 @@ def launchd_stop(): print("✓ Service stopped") def launchd_restart(): + refresh_launchd_plist_if_needed() launchd_stop() launchd_start() @@ -1118,7 +1172,7 @@ def _is_service_running() -> bool: if user_unit_exists: result = subprocess.run( - _systemctl_cmd(False) + ["is-active", SERVICE_NAME], + _systemctl_cmd(False) + ["is-active", get_service_name()], capture_output=True, text=True ) if result.stdout.strip() == "active": @@ -1126,7 +1180,7 @@ def _is_service_running() -> bool: if system_unit_exists: result = subprocess.run( - _systemctl_cmd(True) + ["is-active", SERVICE_NAME], + _systemctl_cmd(True) + ["is-active", get_service_name()], capture_output=True, text=True ) if result.stdout.strip() == "active": diff --git a/hermes_cli/main.py b/hermes_cli/main.py index b835efb0f..845ae207e 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2301,26 +2301,106 @@ def cmd_update(args): print() print("✓ Update complete!") - # Auto-restart gateway if it's running as a systemd service + # Auto-restart gateway if it's running. + # Uses the PID file (scoped to HERMES_HOME) to find this + # installation's gateway — safe with multiple installations. try: - check = subprocess.run( - ["systemctl", "--user", "is-active", "hermes-gateway"], - capture_output=True, text=True, timeout=5, + from gateway.status import get_running_pid, remove_pid_file + from hermes_cli.gateway import ( + get_service_name, get_launchd_plist_path, is_macos, + refresh_launchd_plist_if_needed, ) - if check.stdout.strip() == "active": - print() - print("→ Gateway service is running — restarting to pick up changes...") - restart = subprocess.run( - ["systemctl", "--user", "restart", "hermes-gateway"], - capture_output=True, text=True, timeout=15, + import signal as _signal + + _gw_service_name = get_service_name() + existing_pid = get_running_pid() + has_systemd_service = False + has_launchd_service = False + + try: + check = subprocess.run( + ["systemctl", "--user", "is-active", _gw_service_name], + capture_output=True, text=True, timeout=5, ) - if restart.returncode == 0: - print("✓ Gateway restarted.") - else: - print(f"⚠ Gateway restart failed: {restart.stderr.strip()}") - print(" Try manually: hermes gateway restart") - except (FileNotFoundError, subprocess.TimeoutExpired): - pass # No systemd (macOS, WSL1, etc.) — skip silently + has_systemd_service = check.stdout.strip() == "active" + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + # Check for macOS launchd service + if is_macos(): + try: + plist_path = get_launchd_plist_path() + if plist_path.exists(): + check = subprocess.run( + ["launchctl", "list", "ai.hermes.gateway"], + capture_output=True, text=True, timeout=5, + ) + has_launchd_service = check.returncode == 0 + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + if existing_pid or has_systemd_service or has_launchd_service: + print() + + # When a service manager is handling the gateway, let it + # manage the lifecycle — don't manually SIGTERM the PID + # (launchd KeepAlive would respawn immediately, causing races). + if has_systemd_service: + import time as _time + if existing_pid: + try: + os.kill(existing_pid, _signal.SIGTERM) + print(f"→ Stopped gateway process (PID {existing_pid})") + except ProcessLookupError: + pass + except PermissionError: + print(f"⚠ Permission denied killing gateway PID {existing_pid}") + remove_pid_file() + _time.sleep(1) # Brief pause for port/socket release + print("→ Restarting gateway service...") + restart = subprocess.run( + ["systemctl", "--user", "restart", _gw_service_name], + capture_output=True, text=True, timeout=15, + ) + if restart.returncode == 0: + print("✓ Gateway restarted.") + else: + print(f"⚠ Gateway restart failed: {restart.stderr.strip()}") + print(" Try manually: hermes gateway restart") + elif has_launchd_service: + # Refresh the plist first (picks up --replace and other + # changes from the update we just pulled). + refresh_launchd_plist_if_needed() + # Explicit stop+start — don't rely on KeepAlive respawn + # after a manual SIGTERM, which would race with the + # PID file cleanup. + print("→ Restarting gateway service...") + stop = subprocess.run( + ["launchctl", "stop", "ai.hermes.gateway"], + capture_output=True, text=True, timeout=10, + ) + start = subprocess.run( + ["launchctl", "start", "ai.hermes.gateway"], + capture_output=True, text=True, timeout=10, + ) + if start.returncode == 0: + print("✓ Gateway restarted via launchd.") + else: + print(f"⚠ Gateway restart failed: {start.stderr.strip()}") + print(" Try manually: hermes gateway restart") + elif existing_pid: + try: + os.kill(existing_pid, _signal.SIGTERM) + print(f"→ Stopped gateway process (PID {existing_pid})") + except ProcessLookupError: + pass # Already gone + except PermissionError: + print(f"⚠ Permission denied killing gateway PID {existing_pid}") + remove_pid_file() + print(" ℹ️ Gateway was running manually (not as a service).") + print(" Restart it with: hermes gateway run") + except Exception as e: + logger.debug("Gateway restart during update failed: %s", e) print() print("Tip: You can now select a provider and model:") diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py new file mode 100644 index 000000000..30fb28d1c --- /dev/null +++ b/hermes_cli/plugins.py @@ -0,0 +1,449 @@ +""" +Hermes Plugin System +==================== + +Discovers, loads, and manages plugins from three sources: + +1. **User plugins** – ``~/.hermes/plugins//`` +2. **Project plugins** – ``./.hermes/plugins//`` +3. **Pip plugins** – packages that expose the ``hermes_agent.plugins`` + entry-point group. + +Each directory plugin must contain a ``plugin.yaml`` manifest **and** an +``__init__.py`` with a ``register(ctx)`` function. + +Lifecycle hooks +--------------- +Plugins may register callbacks for any of the hooks in ``VALID_HOOKS``. +The agent core calls ``invoke_hook(name, **kwargs)`` at the appropriate +points. + +Tool registration +----------------- +``PluginContext.register_tool()`` delegates to ``tools.registry.register()`` +so plugin-defined tools appear alongside the built-in tools. +""" + +from __future__ import annotations + +import importlib +import importlib.metadata +import importlib.util +import logging +import os +import sys +import types +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Set + +try: + import yaml +except ImportError: # pragma: no cover – yaml is optional at import time + yaml = None # type: ignore[assignment] + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +VALID_HOOKS: Set[str] = { + "pre_tool_call", + "post_tool_call", + "pre_llm_call", + "post_llm_call", + "on_session_start", + "on_session_end", +} + +ENTRY_POINTS_GROUP = "hermes_agent.plugins" + +_NS_PARENT = "hermes_plugins" + + +# --------------------------------------------------------------------------- +# Data classes +# --------------------------------------------------------------------------- + +@dataclass +class PluginManifest: + """Parsed representation of a plugin.yaml manifest.""" + + name: str + version: str = "" + description: str = "" + author: str = "" + requires_env: List[str] = field(default_factory=list) + provides_tools: List[str] = field(default_factory=list) + provides_hooks: List[str] = field(default_factory=list) + source: str = "" # "user", "project", or "entrypoint" + path: Optional[str] = None + + +@dataclass +class LoadedPlugin: + """Runtime state for a single loaded plugin.""" + + manifest: PluginManifest + module: Optional[types.ModuleType] = None + tools_registered: List[str] = field(default_factory=list) + hooks_registered: List[str] = field(default_factory=list) + enabled: bool = False + error: Optional[str] = None + + +# --------------------------------------------------------------------------- +# PluginContext – handed to each plugin's ``register()`` function +# --------------------------------------------------------------------------- + +class PluginContext: + """Facade given to plugins so they can register tools and hooks.""" + + def __init__(self, manifest: PluginManifest, manager: "PluginManager"): + self.manifest = manifest + self._manager = manager + + # -- tool registration -------------------------------------------------- + + def register_tool( + self, + name: str, + toolset: str, + schema: dict, + handler: Callable, + check_fn: Callable | None = None, + requires_env: list | None = None, + is_async: bool = False, + description: str = "", + emoji: str = "", + ) -> None: + """Register a tool in the global registry **and** track it as plugin-provided.""" + from tools.registry import registry + + registry.register( + name=name, + toolset=toolset, + schema=schema, + handler=handler, + check_fn=check_fn, + requires_env=requires_env, + is_async=is_async, + description=description, + emoji=emoji, + ) + self._manager._plugin_tool_names.add(name) + logger.debug("Plugin %s registered tool: %s", self.manifest.name, name) + + # -- hook registration -------------------------------------------------- + + def register_hook(self, hook_name: str, callback: Callable) -> None: + """Register a lifecycle hook callback. + + Unknown hook names produce a warning but are still stored so + forward-compatible plugins don't break. + """ + if hook_name not in VALID_HOOKS: + logger.warning( + "Plugin '%s' registered unknown hook '%s' " + "(valid: %s)", + self.manifest.name, + hook_name, + ", ".join(sorted(VALID_HOOKS)), + ) + self._manager._hooks.setdefault(hook_name, []).append(callback) + logger.debug("Plugin %s registered hook: %s", self.manifest.name, hook_name) + + +# --------------------------------------------------------------------------- +# PluginManager +# --------------------------------------------------------------------------- + +class PluginManager: + """Central manager that discovers, loads, and invokes plugins.""" + + def __init__(self) -> None: + self._plugins: Dict[str, LoadedPlugin] = {} + self._hooks: Dict[str, List[Callable]] = {} + self._plugin_tool_names: Set[str] = set() + self._discovered: bool = False + + # ----------------------------------------------------------------------- + # Public + # ----------------------------------------------------------------------- + + def discover_and_load(self) -> None: + """Scan all plugin sources and load each plugin found.""" + if self._discovered: + return + self._discovered = True + + manifests: List[PluginManifest] = [] + + # 1. User plugins (~/.hermes/plugins/) + hermes_home = os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes")) + user_dir = Path(hermes_home) / "plugins" + manifests.extend(self._scan_directory(user_dir, source="user")) + + # 2. Project plugins (./.hermes/plugins/) + project_dir = Path.cwd() / ".hermes" / "plugins" + manifests.extend(self._scan_directory(project_dir, source="project")) + + # 3. Pip / entry-point plugins + manifests.extend(self._scan_entry_points()) + + # Load each manifest + for manifest in manifests: + self._load_plugin(manifest) + + if manifests: + logger.info( + "Plugin discovery complete: %d found, %d enabled", + len(self._plugins), + sum(1 for p in self._plugins.values() if p.enabled), + ) + + # ----------------------------------------------------------------------- + # Directory scanning + # ----------------------------------------------------------------------- + + def _scan_directory(self, path: Path, source: str) -> List[PluginManifest]: + """Read ``plugin.yaml`` manifests from subdirectories of *path*.""" + manifests: List[PluginManifest] = [] + if not path.is_dir(): + return manifests + + for child in sorted(path.iterdir()): + if not child.is_dir(): + continue + manifest_file = child / "plugin.yaml" + if not manifest_file.exists(): + manifest_file = child / "plugin.yml" + if not manifest_file.exists(): + logger.debug("Skipping %s (no plugin.yaml)", child) + continue + + try: + if yaml is None: + logger.warning("PyYAML not installed – cannot load %s", manifest_file) + continue + data = yaml.safe_load(manifest_file.read_text()) or {} + manifest = PluginManifest( + name=data.get("name", child.name), + version=str(data.get("version", "")), + description=data.get("description", ""), + author=data.get("author", ""), + requires_env=data.get("requires_env", []), + provides_tools=data.get("provides_tools", []), + provides_hooks=data.get("provides_hooks", []), + source=source, + path=str(child), + ) + manifests.append(manifest) + except Exception as exc: + logger.warning("Failed to parse %s: %s", manifest_file, exc) + + return manifests + + # ----------------------------------------------------------------------- + # Entry-point scanning + # ----------------------------------------------------------------------- + + def _scan_entry_points(self) -> List[PluginManifest]: + """Check ``importlib.metadata`` for pip-installed plugins.""" + manifests: List[PluginManifest] = [] + try: + eps = importlib.metadata.entry_points() + # Python 3.12+ returns a SelectableGroups; earlier returns dict + if hasattr(eps, "select"): + group_eps = eps.select(group=ENTRY_POINTS_GROUP) + elif isinstance(eps, dict): + group_eps = eps.get(ENTRY_POINTS_GROUP, []) + else: + group_eps = [ep for ep in eps if ep.group == ENTRY_POINTS_GROUP] + + for ep in group_eps: + manifest = PluginManifest( + name=ep.name, + source="entrypoint", + path=ep.value, + ) + manifests.append(manifest) + except Exception as exc: + logger.debug("Entry-point scan failed: %s", exc) + + return manifests + + # ----------------------------------------------------------------------- + # Loading + # ----------------------------------------------------------------------- + + def _load_plugin(self, manifest: PluginManifest) -> None: + """Import a plugin module and call its ``register(ctx)`` function.""" + loaded = LoadedPlugin(manifest=manifest) + + try: + if manifest.source in ("user", "project"): + module = self._load_directory_module(manifest) + else: + module = self._load_entrypoint_module(manifest) + + loaded.module = module + + # Call register() + register_fn = getattr(module, "register", None) + if register_fn is None: + loaded.error = "no register() function" + logger.warning("Plugin '%s' has no register() function", manifest.name) + else: + ctx = PluginContext(manifest, self) + register_fn(ctx) + loaded.tools_registered = [ + t for t in self._plugin_tool_names + if t not in { + n + for name, p in self._plugins.items() + for n in p.tools_registered + } + ] + loaded.hooks_registered = list( + { + h + for h, cbs in self._hooks.items() + if cbs # non-empty + } + - { + h + for name, p in self._plugins.items() + for h in p.hooks_registered + } + ) + loaded.enabled = True + + except Exception as exc: + loaded.error = str(exc) + logger.warning("Failed to load plugin '%s': %s", manifest.name, exc) + + self._plugins[manifest.name] = loaded + + def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType: + """Import a directory-based plugin as ``hermes_plugins.``.""" + plugin_dir = Path(manifest.path) # type: ignore[arg-type] + init_file = plugin_dir / "__init__.py" + if not init_file.exists(): + raise FileNotFoundError(f"No __init__.py in {plugin_dir}") + + # Ensure the namespace parent package exists + if _NS_PARENT not in sys.modules: + ns_pkg = types.ModuleType(_NS_PARENT) + ns_pkg.__path__ = [] # type: ignore[attr-defined] + ns_pkg.__package__ = _NS_PARENT + sys.modules[_NS_PARENT] = ns_pkg + + module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}" + spec = importlib.util.spec_from_file_location( + module_name, + init_file, + submodule_search_locations=[str(plugin_dir)], + ) + if spec is None or spec.loader is None: + raise ImportError(f"Cannot create module spec for {init_file}") + + module = importlib.util.module_from_spec(spec) + module.__package__ = module_name + module.__path__ = [str(plugin_dir)] # type: ignore[attr-defined] + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + def _load_entrypoint_module(self, manifest: PluginManifest) -> types.ModuleType: + """Load a pip-installed plugin via its entry-point reference.""" + eps = importlib.metadata.entry_points() + if hasattr(eps, "select"): + group_eps = eps.select(group=ENTRY_POINTS_GROUP) + elif isinstance(eps, dict): + group_eps = eps.get(ENTRY_POINTS_GROUP, []) + else: + group_eps = [ep for ep in eps if ep.group == ENTRY_POINTS_GROUP] + + for ep in group_eps: + if ep.name == manifest.name: + return ep.load() + + raise ImportError( + f"Entry point '{manifest.name}' not found in group '{ENTRY_POINTS_GROUP}'" + ) + + # ----------------------------------------------------------------------- + # Hook invocation + # ----------------------------------------------------------------------- + + def invoke_hook(self, hook_name: str, **kwargs: Any) -> None: + """Call all registered callbacks for *hook_name*. + + Each callback is wrapped in its own try/except so a misbehaving + plugin cannot break the core agent loop. + """ + callbacks = self._hooks.get(hook_name, []) + for cb in callbacks: + try: + cb(**kwargs) + except Exception as exc: + logger.warning( + "Hook '%s' callback %s raised: %s", + hook_name, + getattr(cb, "__name__", repr(cb)), + exc, + ) + + # ----------------------------------------------------------------------- + # Introspection + # ----------------------------------------------------------------------- + + def list_plugins(self) -> List[Dict[str, Any]]: + """Return a list of info dicts for all discovered plugins.""" + result: List[Dict[str, Any]] = [] + for name, loaded in sorted(self._plugins.items()): + result.append( + { + "name": name, + "version": loaded.manifest.version, + "description": loaded.manifest.description, + "source": loaded.manifest.source, + "enabled": loaded.enabled, + "tools": len(loaded.tools_registered), + "hooks": len(loaded.hooks_registered), + "error": loaded.error, + } + ) + return result + + +# --------------------------------------------------------------------------- +# Module-level singleton & convenience functions +# --------------------------------------------------------------------------- + +_plugin_manager: Optional[PluginManager] = None + + +def get_plugin_manager() -> PluginManager: + """Return (and lazily create) the global PluginManager singleton.""" + global _plugin_manager + if _plugin_manager is None: + _plugin_manager = PluginManager() + return _plugin_manager + + +def discover_plugins() -> None: + """Discover and load all plugins (idempotent).""" + get_plugin_manager().discover_and_load() + + +def invoke_hook(hook_name: str, **kwargs: Any) -> None: + """Invoke a lifecycle hook on all loaded plugins.""" + get_plugin_manager().invoke_hook(hook_name, **kwargs) + + +def get_plugin_tool_names() -> Set[str]: + """Return the set of tool names registered by plugins.""" + return get_plugin_manager()._plugin_tool_names diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 1f57d86d0..e751811a1 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -743,6 +743,7 @@ def setup_model_provider(config: dict): selected_provider = ( None # "nous", "openai-codex", "openrouter", "custom", or None (keep) ) + selected_base_url = None # deferred until after model selection nous_models = [] # populated if Nous login succeeds if provider_idx == 0: # Nous Portal (OAuth) @@ -1025,8 +1026,8 @@ def setup_model_provider(config: dict): if existing_custom: save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - _update_config_for_provider("zai", zai_base_url, default_model="glm-5") _set_model_provider(config, "zai", zai_base_url) + selected_base_url = zai_base_url elif provider_idx == 5: # Kimi / Moonshot selected_provider = "kimi-coding" @@ -1058,8 +1059,8 @@ def setup_model_provider(config: dict): if existing_custom: save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - _update_config_for_provider("kimi-coding", pconfig.inference_base_url, default_model="kimi-k2.5") _set_model_provider(config, "kimi-coding", pconfig.inference_base_url) + selected_base_url = pconfig.inference_base_url elif provider_idx == 6: # MiniMax selected_provider = "minimax" @@ -1091,8 +1092,8 @@ def setup_model_provider(config: dict): if existing_custom: save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - _update_config_for_provider("minimax", pconfig.inference_base_url, default_model="MiniMax-M2.5") _set_model_provider(config, "minimax", pconfig.inference_base_url) + selected_base_url = pconfig.inference_base_url elif provider_idx == 7: # MiniMax China selected_provider = "minimax-cn" @@ -1124,8 +1125,8 @@ def setup_model_provider(config: dict): if existing_custom: save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - _update_config_for_provider("minimax-cn", pconfig.inference_base_url, default_model="MiniMax-M2.5") _set_model_provider(config, "minimax-cn", pconfig.inference_base_url) + selected_base_url = pconfig.inference_base_url elif provider_idx == 8: # Anthropic selected_provider = "anthropic" @@ -1228,8 +1229,8 @@ def setup_model_provider(config: dict): save_env_value("OPENAI_API_KEY", "") # Don't save base_url for Anthropic — resolve_runtime_provider() # always hardcodes it. Stale base_urls contaminate other providers. - _update_config_for_provider("anthropic", "", default_model="claude-opus-4-6") _set_model_provider(config, "anthropic") + selected_base_url = "" # else: provider_idx == 9 (Keep current) — only shown when a provider already exists # Normalize "keep current" to an explicit provider so downstream logic @@ -1459,6 +1460,12 @@ def setup_model_provider(config: dict): ) print_success(f"Model set to: {_display}") + # Write provider+base_url to config.yaml only after model selection is complete. + # This prevents a race condition where the gateway picks up a new provider + # before the model name has been updated to match. + if selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn", "anthropic") and selected_base_url is not None: + _update_config_for_provider(selected_provider, selected_base_url) + save_config(config) diff --git a/hermes_cli/status.py b/hermes_cli/status.py index db7ce0641..be490e930 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -275,8 +275,13 @@ def show_status(args): print(color("◆ Gateway Service", Colors.CYAN, Colors.BOLD)) if sys.platform.startswith('linux'): + try: + from hermes_cli.gateway import get_service_name + _gw_svc = get_service_name() + except Exception: + _gw_svc = "hermes-gateway" result = subprocess.run( - ["systemctl", "--user", "is-active", "hermes-gateway"], + ["systemctl", "--user", "is-active", _gw_svc], capture_output=True, text=True ) diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py index d70405ce3..40ff75f16 100644 --- a/hermes_cli/uninstall.py +++ b/hermes_cli/uninstall.py @@ -133,7 +133,13 @@ def uninstall_gateway_service(): if platform.system() != "Linux": return False - service_file = Path.home() / ".config" / "systemd" / "user" / "hermes-gateway.service" + try: + from hermes_cli.gateway import get_service_name + svc_name = get_service_name() + except Exception: + svc_name = "hermes-gateway" + + service_file = Path.home() / ".config" / "systemd" / "user" / f"{svc_name}.service" if not service_file.exists(): return False @@ -141,14 +147,14 @@ def uninstall_gateway_service(): try: # Stop the service subprocess.run( - ["systemctl", "--user", "stop", "hermes-gateway"], + ["systemctl", "--user", "stop", svc_name], capture_output=True, check=False ) # Disable the service subprocess.run( - ["systemctl", "--user", "disable", "hermes-gateway"], + ["systemctl", "--user", "disable", svc_name], capture_output=True, check=False ) diff --git a/model_tools.py b/model_tools.py index be1f5d02f..f95ecddef 100644 --- a/model_tools.py +++ b/model_tools.py @@ -113,6 +113,13 @@ try: except Exception as e: logger.debug("MCP tool discovery failed: %s", e) +# Plugin tool discovery (user/project/pip plugins) +try: + from hermes_cli.plugins import discover_plugins + discover_plugins() +except Exception as e: + logger.debug("Plugin discovery failed: %s", e) + # ============================================================================= # Backward-compat constants (built once after discovery) @@ -222,6 +229,16 @@ def get_tool_definitions( for ts_name in get_all_toolsets(): tools_to_include.update(resolve_toolset(ts_name)) + # Always include plugin-registered tools — they bypass the toolset filter + # because their toolsets are dynamic (created at plugin load time). + try: + from hermes_cli.plugins import get_plugin_tool_names + plugin_tools = get_plugin_tool_names() + if plugin_tools: + tools_to_include.update(plugin_tools) + except Exception: + pass + # Ask the registry for schemas (only returns tools whose check_fn passes) filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode) @@ -300,25 +317,39 @@ def handle_function_call( if function_name in _AGENT_LOOP_TOOLS: return json.dumps({"error": f"{function_name} must be handled by the agent loop"}) + try: + from hermes_cli.plugins import invoke_hook + invoke_hook("pre_tool_call", tool_name=function_name, args=function_args, task_id=task_id or "") + except Exception: + pass + if function_name == "execute_code": # Prefer the caller-provided list so subagents can't overwrite # the parent's tool set via the process-global. sandbox_enabled = enabled_tools if enabled_tools is not None else _last_resolved_tool_names - return registry.dispatch( + result = registry.dispatch( function_name, function_args, task_id=task_id, enabled_tools=sandbox_enabled, honcho_manager=honcho_manager, honcho_session_key=honcho_session_key, ) + else: + result = registry.dispatch( + function_name, function_args, + task_id=task_id, + user_task=user_task, + honcho_manager=honcho_manager, + honcho_session_key=honcho_session_key, + ) - return registry.dispatch( - function_name, function_args, - task_id=task_id, - user_task=user_task, - honcho_manager=honcho_manager, - honcho_session_key=honcho_session_key, - ) + try: + from hermes_cli.plugins import invoke_hook + invoke_hook("post_tool_call", tool_name=function_name, args=function_args, result=result, task_id=task_id or "") + except Exception: + pass + + return result except Exception as e: error_msg = f"Error executing {function_name}: {str(e)}" diff --git a/optional-skills/creative/blender-mcp/SKILL.md b/optional-skills/creative/blender-mcp/SKILL.md new file mode 100644 index 000000000..bdcb98a3c --- /dev/null +++ b/optional-skills/creative/blender-mcp/SKILL.md @@ -0,0 +1,116 @@ +--- +name: blender-mcp +description: Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. Use when user wants to create or modify anything in Blender. +version: 1.0.0 +requires: Blender 4.3+ (desktop instance required, headless not supported) +author: alireza78a +tags: [blender, 3d, animation, modeling, bpy, mcp] +--- + +# Blender MCP + +Control a running Blender instance from Hermes via socket on TCP port 9876. + +## Setup (one-time) + +### 1. Install the Blender addon + + curl -sL https://raw.githubusercontent.com/ahujasid/blender-mcp/main/addon.py -o ~/Desktop/blender_mcp_addon.py + +In Blender: + Edit > Preferences > Add-ons > Install > select blender_mcp_addon.py + Enable "Interface: Blender MCP" + +### 2. Start the socket server in Blender + +Press N in Blender viewport to open sidebar. +Find "BlenderMCP" tab and click "Start Server". + +### 3. Verify connection + + nc -z -w2 localhost 9876 && echo "OPEN" || echo "CLOSED" + +## Protocol + +Plain UTF-8 JSON over TCP -- no length prefix. + +Send: {"type": "", "params": {}} +Receive: {"status": "success", "result": } + {"status": "error", "message": ""} + +## Available Commands + +| type | params | description | +|-------------------------|-------------------|---------------------------------| +| execute_code | code (str) | Run arbitrary bpy Python code | +| get_scene_info | (none) | List all objects in scene | +| get_object_info | object_name (str) | Details on a specific object | +| get_viewport_screenshot | (none) | Screenshot of current viewport | + +## Python Helper + +Use this inside execute_code tool calls: + + import socket, json + + def blender_exec(code: str, host="localhost", port=9876, timeout=15): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((host, port)) + s.settimeout(timeout) + payload = json.dumps({"type": "execute_code", "params": {"code": code}}) + s.sendall(payload.encode("utf-8")) + buf = b"" + while True: + try: + chunk = s.recv(4096) + if not chunk: + break + buf += chunk + try: + json.loads(buf.decode("utf-8")) + break + except json.JSONDecodeError: + continue + except socket.timeout: + break + s.close() + return json.loads(buf.decode("utf-8")) + +## Common bpy Patterns + +### Clear scene + bpy.ops.object.select_all(action='SELECT') + bpy.ops.object.delete() + +### Add mesh objects + bpy.ops.mesh.primitive_uv_sphere_add(radius=1, location=(0, 0, 0)) + bpy.ops.mesh.primitive_cube_add(size=2, location=(3, 0, 0)) + bpy.ops.mesh.primitive_cylinder_add(radius=0.5, depth=2, location=(-3, 0, 0)) + +### Create and assign material + mat = bpy.data.materials.new(name="MyMat") + mat.use_nodes = True + bsdf = mat.node_tree.nodes.get("Principled BSDF") + bsdf.inputs["Base Color"].default_value = (R, G, B, 1.0) + bsdf.inputs["Roughness"].default_value = 0.3 + bsdf.inputs["Metallic"].default_value = 0.0 + obj.data.materials.append(mat) + +### Keyframe animation + obj.location = (0, 0, 0) + obj.keyframe_insert(data_path="location", frame=1) + obj.location = (0, 0, 3) + obj.keyframe_insert(data_path="location", frame=60) + +### Render to file + bpy.context.scene.render.filepath = "/tmp/render.png" + bpy.context.scene.render.engine = 'CYCLES' + bpy.ops.render.render(write_still=True) + +## Pitfalls + +- Must check socket is open before running (nc -z localhost 9876) +- Addon server must be started inside Blender each session (N-panel > BlenderMCP > Connect) +- Break complex scenes into multiple smaller execute_code calls to avoid timeouts +- Render output path must be absolute (/tmp/...) not relative +- shade_smooth() requires object to be selected and in object mode diff --git a/run_agent.py b/run_agent.py index a9088732a..2c95e3121 100644 --- a/run_agent.py +++ b/run_agent.py @@ -814,7 +814,7 @@ class AIAgent: logger.debug("peer %s memory_mode=honcho: local USER.md writes disabled", _hcfg.peer_name or "user") # Skills config: nudge interval for skill creation reminders - self._skill_nudge_interval = 15 + self._skill_nudge_interval = 10 try: from hermes_cli.config import load_config as _load_skills_config skills_config = _load_skills_config().get("skills", {}) @@ -3699,7 +3699,8 @@ class AIAgent: flush_content = ( "[System: The session is being compressed. " - "Please save anything worth remembering to your memories.]" + "Save anything worth remembering — prioritize user preferences, " + "corrections, and recurring patterns over task-specific details.]" ) _sentinel = f"__flush_{id(self)}_{time.monotonic()}" flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel} @@ -4698,8 +4699,9 @@ class AIAgent: self._turns_since_memory += 1 if self._turns_since_memory >= self._memory_nudge_interval: user_message += ( - "\n\n[System: You've had several exchanges in this session. " - "Consider whether there's anything worth saving to your memories.]" + "\n\n[System: You've had several exchanges. Consider: " + "has the user shared preferences, corrected you, or revealed " + "something about their workflow worth remembering for future sessions?]" ) self._turns_since_memory = 0 @@ -4709,8 +4711,9 @@ class AIAgent: and self._iters_since_skill >= self._skill_nudge_interval and "skill_manage" in self.valid_tool_names): user_message += ( - "\n\n[System: The previous task involved many steps. " - "If you discovered a reusable workflow, consider saving it as a skill.]" + "\n\n[System: The previous task involved many tool calls. " + "Save the approach as a skill if it's reusable, or update " + "any existing skill you used if it was wrong or incomplete.]" ) self._iters_since_skill = 0 diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py index 96475c67c..341f4b758 100644 --- a/tests/acp/test_server.py +++ b/tests/acp/test_server.py @@ -295,3 +295,97 @@ class TestOnConnect: mock_conn = MagicMock(spec=acp.Client) agent.on_connect(mock_conn) assert agent._conn is mock_conn + + +# --------------------------------------------------------------------------- +# Slash commands +# --------------------------------------------------------------------------- + + +class TestSlashCommands: + """Test slash command dispatch in the ACP adapter.""" + + def _make_state(self, mock_manager): + state = mock_manager.create_session(cwd="/tmp") + state.agent.model = "test-model" + state.agent.provider = "openrouter" + state.model = "test-model" + return state + + def test_help_lists_commands(self, agent, mock_manager): + state = self._make_state(mock_manager) + result = agent._handle_slash_command("/help", state) + assert result is not None + assert "/help" in result + assert "/model" in result + assert "/tools" in result + assert "/reset" in result + + def test_model_shows_current(self, agent, mock_manager): + state = self._make_state(mock_manager) + result = agent._handle_slash_command("/model", state) + assert "test-model" in result + + def test_context_empty(self, agent, mock_manager): + state = self._make_state(mock_manager) + state.history = [] + result = agent._handle_slash_command("/context", state) + assert "empty" in result.lower() + + def test_context_with_messages(self, agent, mock_manager): + state = self._make_state(mock_manager) + state.history = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ] + result = agent._handle_slash_command("/context", state) + assert "2 messages" in result + assert "user: 1" in result + + def test_reset_clears_history(self, agent, mock_manager): + state = self._make_state(mock_manager) + state.history = [{"role": "user", "content": "hello"}] + result = agent._handle_slash_command("/reset", state) + assert "cleared" in result.lower() + assert len(state.history) == 0 + + def test_version(self, agent, mock_manager): + state = self._make_state(mock_manager) + result = agent._handle_slash_command("/version", state) + assert HERMES_VERSION in result + + def test_unknown_command_returns_none(self, agent, mock_manager): + state = self._make_state(mock_manager) + result = agent._handle_slash_command("/nonexistent", state) + assert result is None + + @pytest.mark.asyncio + async def test_slash_command_intercepted_in_prompt(self, agent, mock_manager): + """Slash commands should be handled without calling the LLM.""" + new_resp = await agent.new_session(cwd="/tmp") + mock_conn = AsyncMock(spec=acp.Client) + agent._conn = mock_conn + + prompt = [TextContentBlock(type="text", text="/help")] + resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id) + + assert resp.stop_reason == "end_turn" + mock_conn.session_update.assert_called_once() + + @pytest.mark.asyncio + async def test_unknown_slash_falls_through_to_llm(self, agent, mock_manager): + """Unknown /commands should be sent to the LLM, not intercepted.""" + new_resp = await agent.new_session(cwd="/tmp") + mock_conn = AsyncMock(spec=acp.Client) + agent._conn = mock_conn + + # Mock run_in_executor to avoid actually running the agent + with patch("asyncio.get_running_loop") as mock_loop: + mock_loop.return_value.run_in_executor = AsyncMock(return_value={ + "final_response": "I processed /foo", + "messages": [], + }) + prompt = [TextContentBlock(type="text", text="/foo bar")] + resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id) + + assert resp.stop_reason == "end_turn" diff --git a/tests/agent/test_smart_model_routing.py b/tests/agent/test_smart_model_routing.py new file mode 100644 index 000000000..7e9025609 --- /dev/null +++ b/tests/agent/test_smart_model_routing.py @@ -0,0 +1,61 @@ +from agent.smart_model_routing import choose_cheap_model_route + + +_BASE_CONFIG = { + "enabled": True, + "cheap_model": { + "provider": "openrouter", + "model": "google/gemini-2.5-flash", + }, +} + + +def test_returns_none_when_disabled(): + cfg = {**_BASE_CONFIG, "enabled": False} + assert choose_cheap_model_route("what time is it in tokyo?", cfg) is None + + +def test_routes_short_simple_prompt(): + result = choose_cheap_model_route("what time is it in tokyo?", _BASE_CONFIG) + assert result is not None + assert result["provider"] == "openrouter" + assert result["model"] == "google/gemini-2.5-flash" + assert result["routing_reason"] == "simple_turn" + + +def test_skips_long_prompt(): + prompt = "please summarize this carefully " * 20 + assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None + + +def test_skips_code_like_prompt(): + prompt = "debug this traceback: ```python\nraise ValueError('bad')\n```" + assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None + + +def test_skips_tool_heavy_prompt_keywords(): + prompt = "implement a patch for this docker error" + assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None + + +def test_resolve_turn_route_falls_back_to_primary_when_route_runtime_cannot_be_resolved(monkeypatch): + from agent.smart_model_routing import resolve_turn_route + + monkeypatch.setattr( + "hermes_cli.runtime_provider.resolve_runtime_provider", + lambda **kwargs: (_ for _ in ()).throw(RuntimeError("bad route")), + ) + result = resolve_turn_route( + "what time is it in tokyo?", + _BASE_CONFIG, + { + "model": "anthropic/claude-sonnet-4", + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_mode": "chat_completions", + "api_key": "sk-primary", + }, + ) + assert result["model"] == "anthropic/claude-sonnet-4" + assert result["runtime"]["provider"] == "openrouter" + assert result["label"] is None diff --git a/tests/conftest.py b/tests/conftest.py index 67fad819b..af73fb5cb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch): (fake_home / "memories").mkdir() (fake_home / "skills").mkdir() monkeypatch.setenv("HERMES_HOME", str(fake_home)) + # Reset plugin singleton so tests don't leak plugins from ~/.hermes/plugins/ + try: + import hermes_cli.plugins as _plugins_mod + monkeypatch.setattr(_plugins_mod, "_plugin_manager", None) + except Exception: + pass # Tests should not inherit the agent's current gateway/messaging surface. # Individual tests that need gateway behavior set these explicitly. monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False) diff --git a/tests/gateway/test_image_enrichment.py b/tests/gateway/test_image_enrichment.py deleted file mode 100644 index d3c7b72c8..000000000 --- a/tests/gateway/test_image_enrichment.py +++ /dev/null @@ -1,25 +0,0 @@ -from unittest.mock import patch - -import pytest - - -@pytest.mark.asyncio -async def test_image_enrichment_uses_athabasca_upload_guidance_without_stale_r2_warning(): - from gateway.run import GatewayRunner - - runner = object.__new__(GatewayRunner) - - with patch( - "tools.vision_tools.vision_analyze_tool", - return_value='{"success": true, "analysis": "A painted serpent warrior."}', - ): - enriched = await runner._enrich_message_with_vision( - "caption", - ["/tmp/test.jpg"], - ) - - assert "R2 not configured" not in enriched - assert "Gateway media URL available for reference" not in enriched - assert "POST /api/uploads" in enriched - assert "Do not store the local cache path" in enriched - assert "caption" in enriched diff --git a/tests/gateway/test_pii_redaction.py b/tests/gateway/test_pii_redaction.py new file mode 100644 index 000000000..1982f5e88 --- /dev/null +++ b/tests/gateway/test_pii_redaction.py @@ -0,0 +1,156 @@ +"""Tests for PII redaction in gateway session context prompts.""" + +from gateway.session import ( + SessionContext, + SessionSource, + build_session_context_prompt, + _hash_id, + _hash_sender_id, + _hash_chat_id, + _looks_like_phone, +) +from gateway.config import Platform, HomeChannel + + +# --------------------------------------------------------------------------- +# Low-level helpers +# --------------------------------------------------------------------------- + +class TestHashHelpers: + def test_hash_id_deterministic(self): + assert _hash_id("12345") == _hash_id("12345") + + def test_hash_id_12_hex_chars(self): + h = _hash_id("user-abc") + assert len(h) == 12 + assert all(c in "0123456789abcdef" for c in h) + + def test_hash_sender_id_prefix(self): + assert _hash_sender_id("12345").startswith("user_") + assert len(_hash_sender_id("12345")) == 17 # "user_" + 12 + + def test_hash_chat_id_preserves_prefix(self): + result = _hash_chat_id("telegram:12345") + assert result.startswith("telegram:") + assert "12345" not in result + + def test_hash_chat_id_no_prefix(self): + result = _hash_chat_id("12345") + assert len(result) == 12 + assert "12345" not in result + + def test_looks_like_phone(self): + assert _looks_like_phone("+15551234567") + assert _looks_like_phone("15551234567") + assert _looks_like_phone("+1-555-123-4567") + assert not _looks_like_phone("alice") + assert not _looks_like_phone("user-123") + assert not _looks_like_phone("") + + +# --------------------------------------------------------------------------- +# Integration: build_session_context_prompt +# --------------------------------------------------------------------------- + +def _make_context( + user_id="user-123", + user_name=None, + chat_id="telegram:99999", + platform=Platform.TELEGRAM, + home_channels=None, +): + source = SessionSource( + platform=platform, + chat_id=chat_id, + chat_type="dm", + user_id=user_id, + user_name=user_name, + ) + return SessionContext( + source=source, + connected_platforms=[platform], + home_channels=home_channels or {}, + ) + + +class TestBuildSessionContextPromptRedaction: + def test_no_redaction_by_default(self): + ctx = _make_context(user_id="user-123") + prompt = build_session_context_prompt(ctx) + assert "user-123" in prompt + + def test_user_id_hashed_when_redact_pii(self): + ctx = _make_context(user_id="user-123") + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "user-123" not in prompt + assert "user_" in prompt # hashed ID present + + def test_user_name_not_redacted(self): + ctx = _make_context(user_id="user-123", user_name="Alice") + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "Alice" in prompt + # user_id should not appear when user_name is present (name takes priority) + assert "user-123" not in prompt + + def test_home_channel_id_hashed(self): + hc = { + Platform.TELEGRAM: HomeChannel( + platform=Platform.TELEGRAM, + chat_id="telegram:99999", + name="Home Chat", + ) + } + ctx = _make_context(home_channels=hc) + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "99999" not in prompt + assert "telegram:" in prompt # prefix preserved + assert "Home Chat" in prompt # name not redacted + + def test_home_channel_id_preserved_without_redaction(self): + hc = { + Platform.TELEGRAM: HomeChannel( + platform=Platform.TELEGRAM, + chat_id="telegram:99999", + name="Home Chat", + ) + } + ctx = _make_context(home_channels=hc) + prompt = build_session_context_prompt(ctx, redact_pii=False) + assert "99999" in prompt + + def test_redaction_is_deterministic(self): + ctx = _make_context(user_id="+15551234567") + prompt1 = build_session_context_prompt(ctx, redact_pii=True) + prompt2 = build_session_context_prompt(ctx, redact_pii=True) + assert prompt1 == prompt2 + + def test_different_ids_produce_different_hashes(self): + ctx1 = _make_context(user_id="user-A") + ctx2 = _make_context(user_id="user-B") + p1 = build_session_context_prompt(ctx1, redact_pii=True) + p2 = build_session_context_prompt(ctx2, redact_pii=True) + assert p1 != p2 + + def test_discord_ids_not_redacted_even_with_flag(self): + """Discord needs real IDs for <@user_id> mentions.""" + ctx = _make_context(user_id="123456789", platform=Platform.DISCORD) + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "123456789" in prompt + + def test_whatsapp_ids_redacted(self): + ctx = _make_context(user_id="+15551234567", platform=Platform.WHATSAPP) + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "+15551234567" not in prompt + assert "user_" in prompt + + def test_signal_ids_redacted(self): + ctx = _make_context(user_id="+15551234567", platform=Platform.SIGNAL) + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "+15551234567" not in prompt + assert "user_" in prompt + + def test_slack_ids_not_redacted(self): + """Slack may need IDs for mentions too.""" + ctx = _make_context(user_id="U12345ABC", platform=Platform.SLACK) + prompt = build_session_context_prompt(ctx, redact_pii=True) + assert "U12345ABC" in prompt diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py new file mode 100644 index 000000000..315f26568 --- /dev/null +++ b/tests/gateway/test_runner_startup_failures.py @@ -0,0 +1,89 @@ +import pytest + +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import BasePlatformAdapter +from gateway.run import GatewayRunner +from gateway.status import read_runtime_status + + +class _RetryableFailureAdapter(BasePlatformAdapter): + def __init__(self): + super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM) + + async def connect(self) -> bool: + self._set_fatal_error( + "telegram_connect_error", + "Telegram startup failed: temporary DNS resolution failure.", + retryable=True, + ) + return False + + async def disconnect(self) -> None: + self._mark_disconnected() + + async def send(self, chat_id, content, reply_to=None, metadata=None): + raise NotImplementedError + + async def get_chat_info(self, chat_id): + return {"id": chat_id} + + +class _DisabledAdapter(BasePlatformAdapter): + def __init__(self): + super().__init__(PlatformConfig(enabled=False, token="***"), Platform.TELEGRAM) + + async def connect(self) -> bool: + raise AssertionError("connect should not be called for disabled platforms") + + async def disconnect(self) -> None: + self._mark_disconnected() + + async def send(self, chat_id, content, reply_to=None, metadata=None): + raise NotImplementedError + + async def get_chat_info(self, chat_id): + return {"id": chat_id} + + +@pytest.mark.asyncio +async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch, tmp_path): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + config = GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="***") + }, + sessions_dir=tmp_path / "sessions", + ) + runner = GatewayRunner(config) + + monkeypatch.setattr(runner, "_create_adapter", lambda platform, platform_config: _RetryableFailureAdapter()) + + ok = await runner.start() + + assert ok is False + assert runner.should_exit_cleanly is False + state = read_runtime_status() + assert state["gateway_state"] == "startup_failed" + assert "temporary DNS resolution failure" in state["exit_reason"] + assert state["platforms"]["telegram"]["state"] == "fatal" + assert state["platforms"]["telegram"]["error_code"] == "telegram_connect_error" + + +@pytest.mark.asyncio +async def test_runner_allows_cron_only_mode_when_no_platforms_are_enabled(monkeypatch, tmp_path): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + config = GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=False, token="***") + }, + sessions_dir=tmp_path / "sessions", + ) + runner = GatewayRunner(config) + + ok = await runner.start() + + assert ok is True + assert runner.should_exit_cleanly is False + assert runner.adapters == {} + state = read_runtime_status() + assert state["gateway_state"] == "running" diff --git a/tests/gateway/test_telegram_conflict.py b/tests/gateway/test_telegram_conflict.py index 86dc509d5..440aa99d8 100644 --- a/tests/gateway/test_telegram_conflict.py +++ b/tests/gateway/test_telegram_conflict.py @@ -100,6 +100,39 @@ async def test_polling_conflict_stops_polling_and_notifies_handler(monkeypatch): fatal_handler.assert_awaited_once() +@pytest.mark.asyncio +async def test_connect_marks_retryable_fatal_error_for_startup_network_failure(monkeypatch): + adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***")) + + monkeypatch.setattr( + "gateway.status.acquire_scoped_lock", + lambda scope, identity, metadata=None: (True, None), + ) + monkeypatch.setattr( + "gateway.status.release_scoped_lock", + lambda scope, identity: None, + ) + + builder = MagicMock() + builder.token.return_value = builder + app = SimpleNamespace( + bot=SimpleNamespace(), + updater=SimpleNamespace(), + add_handler=MagicMock(), + initialize=AsyncMock(side_effect=RuntimeError("Temporary failure in name resolution")), + start=AsyncMock(), + ) + builder.build.return_value = app + monkeypatch.setattr("gateway.platforms.telegram.Application", SimpleNamespace(builder=MagicMock(return_value=builder))) + + ok = await adapter.connect() + + assert ok is False + assert adapter.fatal_error_code == "telegram_connect_error" + assert adapter.fatal_error_retryable is True + assert "Temporary failure in name resolution" in adapter.fatal_error_message + + @pytest.mark.asyncio async def test_disconnect_skips_inactive_updater_and_app(monkeypatch): adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***")) diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index 218059434..db6fbc607 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -12,7 +12,8 @@ EXPECTED_COMMANDS = { "/personality", "/clear", "/history", "/new", "/reset", "/retry", "/undo", "/save", "/config", "/cron", "/skills", "/platforms", "/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste", - "/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/quit", + "/reload-mcp", "/rollback", "/stop", "/background", "/skin", "/voice", "/browser", "/quit", + "/plugins", } diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py index 29da657e2..52d43fd08 100644 --- a/tests/hermes_cli/test_gateway.py +++ b/tests/hermes_cli/test_gateway.py @@ -39,7 +39,7 @@ def test_systemd_status_warns_when_linger_disabled(monkeypatch, tmp_path, capsys monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (False, "")) def fake_run(cmd, capture_output=False, text=False, check=False): - if cmd[:4] == ["systemctl", "--user", "status", gateway.SERVICE_NAME]: + if cmd[:4] == ["systemctl", "--user", "status", gateway.get_service_name()]: return SimpleNamespace(returncode=0, stdout="", stderr="") if cmd[:3] == ["systemctl", "--user", "is-active"]: return SimpleNamespace(returncode=0, stdout="active\n", stderr="") @@ -76,7 +76,7 @@ def test_systemd_install_checks_linger_status(monkeypatch, tmp_path, capsys): assert unit_path.exists() assert [cmd for cmd, _ in calls] == [ ["systemctl", "--user", "daemon-reload"], - ["systemctl", "--user", "enable", gateway.SERVICE_NAME], + ["systemctl", "--user", "enable", gateway.get_service_name()], ] assert helper_calls == [True] assert "User service installed and enabled" in out @@ -110,7 +110,7 @@ def test_systemd_install_system_scope_skips_linger_and_uses_systemctl(monkeypatc assert unit_path.read_text(encoding="utf-8") == "scope=True user=alice\n" assert [cmd for cmd, _ in calls] == [ ["systemctl", "daemon-reload"], - ["systemctl", "enable", gateway.SERVICE_NAME], + ["systemctl", "enable", gateway.get_service_name()], ] assert helper_calls == [] assert "Configured to run as: alice" not in out # generated test unit has no User= line diff --git a/tests/hermes_cli/test_gateway_linger.py b/tests/hermes_cli/test_gateway_linger.py index cdc07f95f..b21e3f762 100644 --- a/tests/hermes_cli/test_gateway_linger.py +++ b/tests/hermes_cli/test_gateway_linger.py @@ -114,7 +114,7 @@ def test_systemd_install_calls_linger_helper(monkeypatch, tmp_path, capsys): assert unit_path.exists() assert [cmd for cmd, _ in calls] == [ ["systemctl", "--user", "daemon-reload"], - ["systemctl", "--user", "enable", gateway.SERVICE_NAME], + ["systemctl", "--user", "enable", gateway.get_service_name()], ] assert helper_calls == [True] assert "User service installed and enabled" in out diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index ce41a57a1..ffd381788 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -26,7 +26,7 @@ class TestSystemdServiceRefresh: assert unit_path.read_text(encoding="utf-8") == "new unit\n" assert calls[:2] == [ ["systemctl", "--user", "daemon-reload"], - ["systemctl", "--user", "start", gateway_cli.SERVICE_NAME], + ["systemctl", "--user", "start", gateway_cli.get_service_name()], ] def test_systemd_restart_refreshes_outdated_unit(self, tmp_path, monkeypatch): @@ -49,10 +49,27 @@ class TestSystemdServiceRefresh: assert unit_path.read_text(encoding="utf-8") == "new unit\n" assert calls[:2] == [ ["systemctl", "--user", "daemon-reload"], - ["systemctl", "--user", "restart", gateway_cli.SERVICE_NAME], + ["systemctl", "--user", "restart", gateway_cli.get_service_name()], ] +class TestGeneratedSystemdUnits: + def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self): + unit = gateway_cli.generate_systemd_unit(system=False) + + assert "ExecStart=" in unit + assert "ExecStop=" not in unit + assert "TimeoutStopSec=60" in unit + + def test_system_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self): + unit = gateway_cli.generate_systemd_unit(system=True) + + assert "ExecStart=" in unit + assert "ExecStop=" not in unit + assert "TimeoutStopSec=60" in unit + assert "WantedBy=multi-user.target" in unit + + class TestGatewayStopCleanup: def test_stop_sweeps_manual_gateway_processes_after_service_stop(self, tmp_path, monkeypatch): unit_path = tmp_path / "hermes-gateway.service" @@ -92,9 +109,9 @@ class TestGatewayServiceDetection: ) def fake_run(cmd, capture_output=True, text=True, **kwargs): - if cmd == ["systemctl", "--user", "is-active", gateway_cli.SERVICE_NAME]: + if cmd == ["systemctl", "--user", "is-active", gateway_cli.get_service_name()]: return SimpleNamespace(returncode=0, stdout="inactive\n", stderr="") - if cmd == ["systemctl", "is-active", gateway_cli.SERVICE_NAME]: + if cmd == ["systemctl", "is-active", gateway_cli.get_service_name()]: return SimpleNamespace(returncode=0, stdout="active\n", stderr="") raise AssertionError(f"Unexpected command: {cmd}") diff --git a/tests/hermes_cli/test_path_completion.py b/tests/hermes_cli/test_path_completion.py new file mode 100644 index 000000000..b41a36e2e --- /dev/null +++ b/tests/hermes_cli/test_path_completion.py @@ -0,0 +1,184 @@ +"""Tests for file path autocomplete in the CLI completer.""" + +import os +from unittest.mock import MagicMock + +import pytest +from prompt_toolkit.document import Document +from prompt_toolkit.formatted_text import to_plain_text + +from hermes_cli.commands import SlashCommandCompleter, _file_size_label + + +def _display_names(completions): + """Extract plain-text display names from a list of Completion objects.""" + return [to_plain_text(c.display) for c in completions] + + +def _display_metas(completions): + """Extract plain-text display_meta from a list of Completion objects.""" + return [to_plain_text(c.display_meta) if c.display_meta else "" for c in completions] + + +@pytest.fixture +def completer(): + return SlashCommandCompleter() + + +class TestExtractPathWord: + def test_relative_path(self): + assert SlashCommandCompleter._extract_path_word("look at ./src/main.py") == "./src/main.py" + + def test_home_path(self): + assert SlashCommandCompleter._extract_path_word("edit ~/docs/") == "~/docs/" + + def test_absolute_path(self): + assert SlashCommandCompleter._extract_path_word("read /etc/hosts") == "/etc/hosts" + + def test_parent_path(self): + assert SlashCommandCompleter._extract_path_word("check ../config.yaml") == "../config.yaml" + + def test_path_with_slash_in_middle(self): + assert SlashCommandCompleter._extract_path_word("open src/utils/helpers.py") == "src/utils/helpers.py" + + def test_plain_word_not_path(self): + assert SlashCommandCompleter._extract_path_word("hello world") is None + + def test_empty_string(self): + assert SlashCommandCompleter._extract_path_word("") is None + + def test_single_word_no_slash(self): + assert SlashCommandCompleter._extract_path_word("README.md") is None + + def test_word_after_space(self): + assert SlashCommandCompleter._extract_path_word("fix the bug in ./tools/") == "./tools/" + + def test_just_dot_slash(self): + assert SlashCommandCompleter._extract_path_word("./") == "./" + + def test_just_tilde_slash(self): + assert SlashCommandCompleter._extract_path_word("~/") == "~/" + + +class TestPathCompletions: + def test_lists_current_directory(self, tmp_path): + (tmp_path / "file_a.py").touch() + (tmp_path / "file_b.txt").touch() + (tmp_path / "subdir").mkdir() + + old_cwd = os.getcwd() + os.chdir(tmp_path) + try: + completions = list(SlashCommandCompleter._path_completions("./")) + names = _display_names(completions) + assert "file_a.py" in names + assert "file_b.txt" in names + assert "subdir/" in names + finally: + os.chdir(old_cwd) + + def test_filters_by_prefix(self, tmp_path): + (tmp_path / "alpha.py").touch() + (tmp_path / "beta.py").touch() + (tmp_path / "alpha_test.py").touch() + + completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/alpha")) + names = _display_names(completions) + assert "alpha.py" in names + assert "alpha_test.py" in names + assert "beta.py" not in names + + def test_directories_have_trailing_slash(self, tmp_path): + (tmp_path / "mydir").mkdir() + (tmp_path / "myfile.txt").touch() + + completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/")) + names = _display_names(completions) + metas = _display_metas(completions) + assert "mydir/" in names + idx = names.index("mydir/") + assert metas[idx] == "dir" + + def test_home_expansion(self, tmp_path, monkeypatch): + monkeypatch.setenv("HOME", str(tmp_path)) + (tmp_path / "testfile.md").touch() + + completions = list(SlashCommandCompleter._path_completions("~/test")) + names = _display_names(completions) + assert "testfile.md" in names + + def test_nonexistent_dir_returns_empty(self): + completions = list(SlashCommandCompleter._path_completions("/nonexistent_dir_xyz/")) + assert completions == [] + + def test_respects_limit(self, tmp_path): + for i in range(50): + (tmp_path / f"file_{i:03d}.txt").touch() + + completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/", limit=10)) + assert len(completions) == 10 + + def test_case_insensitive_prefix(self, tmp_path): + (tmp_path / "README.md").touch() + + completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/read")) + names = _display_names(completions) + assert "README.md" in names + + +class TestIntegration: + """Test the completer produces path completions via the prompt_toolkit API.""" + + def test_slash_commands_still_work(self, completer): + doc = Document("/hel", cursor_position=4) + event = MagicMock() + completions = list(completer.get_completions(doc, event)) + names = _display_names(completions) + assert "/help" in names + + def test_path_completion_triggers_on_dot_slash(self, completer, tmp_path): + (tmp_path / "test.py").touch() + old_cwd = os.getcwd() + os.chdir(tmp_path) + try: + doc = Document("edit ./te", cursor_position=9) + event = MagicMock() + completions = list(completer.get_completions(doc, event)) + names = _display_names(completions) + assert "test.py" in names + finally: + os.chdir(old_cwd) + + def test_no_completion_for_plain_words(self, completer): + doc = Document("hello world", cursor_position=11) + event = MagicMock() + completions = list(completer.get_completions(doc, event)) + assert completions == [] + + def test_absolute_path_triggers_completion(self, completer): + doc = Document("check /etc/hos", cursor_position=14) + event = MagicMock() + completions = list(completer.get_completions(doc, event)) + names = _display_names(completions) + # /etc/hosts should exist on Linux + assert any("host" in n.lower() for n in names) + + +class TestFileSizeLabel: + def test_bytes(self, tmp_path): + f = tmp_path / "small.txt" + f.write_text("hi") + assert _file_size_label(str(f)) == "2B" + + def test_kilobytes(self, tmp_path): + f = tmp_path / "medium.txt" + f.write_bytes(b"x" * 2048) + assert _file_size_label(str(f)) == "2K" + + def test_megabytes(self, tmp_path): + f = tmp_path / "large.bin" + f.write_bytes(b"x" * (2 * 1024 * 1024)) + assert _file_size_label(str(f)) == "2.0M" + + def test_nonexistent(self): + assert _file_size_label("/nonexistent_xyz") == "" diff --git a/tests/hermes_cli/test_set_config_value.py b/tests/hermes_cli/test_set_config_value.py index 52a9d1a6c..4eae64d6e 100644 --- a/tests/hermes_cli/test_set_config_value.py +++ b/tests/hermes_cli/test_set_config_value.py @@ -115,3 +115,13 @@ class TestConfigYamlRouting: set_config_value("terminal.docker_image", "python:3.12") config = _read_config(_isolated_hermes_home) assert "python:3.12" in config + + def test_terminal_docker_cwd_mount_flag_goes_to_config_and_env(self, _isolated_hermes_home): + set_config_value("terminal.docker_mount_cwd_to_workspace", "true") + config = _read_config(_isolated_hermes_home) + env_content = _read_env(_isolated_hermes_home) + assert "docker_mount_cwd_to_workspace: 'true'" in config or "docker_mount_cwd_to_workspace: true" in config + assert ( + "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE=true" in env_content + or "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE=True" in env_content + ) diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py new file mode 100644 index 000000000..b9cdecaa0 --- /dev/null +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -0,0 +1,305 @@ +"""Tests for cmd_update gateway auto-restart — systemd + launchd coverage. + +Ensures ``hermes update`` correctly detects running gateways managed by +systemd (Linux) or launchd (macOS) and restarts/informs the user properly, +rather than leaving zombie processes or telling users to manually restart +when launchd will auto-respawn. +""" + +import subprocess +from types import SimpleNamespace +from unittest.mock import patch, MagicMock + +import pytest + +import hermes_cli.gateway as gateway_cli +from hermes_cli.main import cmd_update + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_run_side_effect( + branch="main", + verify_ok=True, + commit_count="3", + systemd_active=False, + launchctl_loaded=False, +): + """Build a subprocess.run side_effect that simulates git + service commands.""" + + def side_effect(cmd, **kwargs): + joined = " ".join(str(c) for c in cmd) + + # git rev-parse --abbrev-ref HEAD + if "rev-parse" in joined and "--abbrev-ref" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout=f"{branch}\n", stderr="") + + # git rev-parse --verify origin/{branch} + if "rev-parse" in joined and "--verify" in joined: + rc = 0 if verify_ok else 128 + return subprocess.CompletedProcess(cmd, rc, stdout="", stderr="") + + # git rev-list HEAD..origin/{branch} --count + if "rev-list" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="") + + # systemctl --user is-active + if "systemctl" in joined and "is-active" in joined: + if systemd_active: + return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="") + return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="") + + # systemctl --user restart + if "systemctl" in joined and "restart" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + # launchctl list ai.hermes.gateway + if "launchctl" in joined and "list" in joined: + if launchctl_loaded: + return subprocess.CompletedProcess(cmd, 0, stdout="PID\tStatus\tLabel\n123\t0\tai.hermes.gateway\n", stderr="") + return subprocess.CompletedProcess(cmd, 113, stdout="", stderr="Could not find service") + + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + return side_effect + + +@pytest.fixture +def mock_args(): + return SimpleNamespace() + + +# --------------------------------------------------------------------------- +# Launchd plist includes --replace +# --------------------------------------------------------------------------- + + +class TestLaunchdPlistReplace: + """The generated launchd plist must include --replace so respawned + gateways kill stale instances.""" + + def test_plist_contains_replace_flag(self): + plist = gateway_cli.generate_launchd_plist() + assert "--replace" in plist + + def test_plist_program_arguments_order(self): + """--replace comes after 'run' in the ProgramArguments.""" + plist = gateway_cli.generate_launchd_plist() + lines = [line.strip() for line in plist.splitlines()] + # Find 'run' and '--replace' in the string entries + string_values = [ + line.replace("", "").replace("", "") + for line in lines + if "" in line and "" in line + ] + assert "run" in string_values + assert "--replace" in string_values + run_idx = string_values.index("run") + replace_idx = string_values.index("--replace") + assert replace_idx == run_idx + 1 + + +# --------------------------------------------------------------------------- +# cmd_update — macOS launchd detection +# --------------------------------------------------------------------------- + + +class TestLaunchdPlistRefresh: + """refresh_launchd_plist_if_needed rewrites stale plists (like systemd's + refresh_systemd_unit_if_needed).""" + + def test_refresh_rewrites_stale_plist(self, tmp_path, monkeypatch): + plist_path = tmp_path / "ai.hermes.gateway.plist" + plist_path.write_text("old content") + + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + calls = [] + def fake_run(cmd, check=False, **kwargs): + calls.append(cmd) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + + result = gateway_cli.refresh_launchd_plist_if_needed() + + assert result is True + # Plist should now contain the generated content (which includes --replace) + assert "--replace" in plist_path.read_text() + # Should have unloaded then reloaded + assert any("unload" in str(c) for c in calls) + assert any("load" in str(c) for c in calls) + + def test_refresh_skips_when_current(self, tmp_path, monkeypatch): + plist_path = tmp_path / "ai.hermes.gateway.plist" + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + # Write the current expected content + plist_path.write_text(gateway_cli.generate_launchd_plist()) + + calls = [] + monkeypatch.setattr( + gateway_cli.subprocess, "run", + lambda cmd, **kw: calls.append(cmd) or SimpleNamespace(returncode=0), + ) + + result = gateway_cli.refresh_launchd_plist_if_needed() + + assert result is False + assert len(calls) == 0 # No launchctl calls needed + + def test_refresh_skips_when_no_plist(self, tmp_path, monkeypatch): + plist_path = tmp_path / "nonexistent.plist" + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + result = gateway_cli.refresh_launchd_plist_if_needed() + assert result is False + + def test_launchd_start_calls_refresh(self, tmp_path, monkeypatch): + """launchd_start refreshes the plist before starting.""" + plist_path = tmp_path / "ai.hermes.gateway.plist" + plist_path.write_text("old") + monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path) + + calls = [] + def fake_run(cmd, check=False, **kwargs): + calls.append(cmd) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + + gateway_cli.launchd_start() + + # First calls should be refresh (unload/load), then start + cmd_strs = [" ".join(c) for c in calls] + assert any("unload" in s for s in cmd_strs) + assert any("start" in s for s in cmd_strs) + + +class TestCmdUpdateLaunchdRestart: + """cmd_update correctly detects and handles launchd on macOS.""" + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_detects_launchd_and_skips_manual_restart_message( + self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch, + ): + """When launchd is running the gateway, update should print + 'auto-restart via launchd' instead of 'Restart it with: hermes gateway run'.""" + # Create a fake launchd plist so is_macos + plist.exists() passes + plist_path = tmp_path / "ai.hermes.gateway.plist" + plist_path.write_text("") + + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: True, + ) + monkeypatch.setattr( + gateway_cli, "get_launchd_plist_path", lambda: plist_path, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + launchctl_loaded=True, + ) + + # Mock get_running_pid to return a PID + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Gateway restarted via launchd" in captured + assert "Restart it with: hermes gateway run" not in captured + # Verify launchctl stop + start were called (not manual SIGTERM) + launchctl_calls = [ + c for c in mock_run.call_args_list + if len(c.args[0]) > 0 and c.args[0][0] == "launchctl" + ] + stop_calls = [c for c in launchctl_calls if "stop" in c.args[0]] + start_calls = [c for c in launchctl_calls if "start" in c.args[0]] + assert len(stop_calls) >= 1 + assert len(start_calls) >= 1 + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_without_launchd_shows_manual_restart( + self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch, + ): + """When no service manager is running, update should show the manual restart hint.""" + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: True, + ) + plist_path = tmp_path / "ai.hermes.gateway.plist" + # plist does NOT exist — no launchd service + monkeypatch.setattr( + gateway_cli, "get_launchd_plist_path", lambda: plist_path, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + launchctl_loaded=False, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"), \ + patch("os.kill"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Restart it with: hermes gateway run" in captured + assert "Gateway restarted via launchd" not in captured + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_with_systemd_still_restarts_via_systemd( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """On Linux with systemd active, update should restart via systemctl.""" + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: False, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=True, + ) + + with patch("gateway.status.get_running_pid", return_value=12345), \ + patch("gateway.status.remove_pid_file"), \ + patch("os.kill"): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Gateway restarted" in captured + # Verify systemctl restart was called + restart_calls = [ + c for c in mock_run.call_args_list + if "restart" in " ".join(str(a) for a in c.args[0]) + and "systemctl" in " ".join(str(a) for a in c.args[0]) + ] + assert len(restart_calls) == 1 + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_no_gateway_running_skips_restart( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """When no gateway is running, update should skip the restart section entirely.""" + monkeypatch.setattr( + gateway_cli, "is_macos", lambda: False, + ) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=False, + ) + + with patch("gateway.status.get_running_pid", return_value=None): + cmd_update(mock_args) + + captured = capsys.readouterr().out + assert "Stopped gateway" not in captured + assert "Gateway restarted" not in captured + assert "Gateway restarted via launchd" not in captured diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 3144bed80..99d8830fa 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -162,6 +162,57 @@ def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch): assert shell.api_mode == "codex_responses" +def test_cli_turn_routing_uses_primary_when_disabled(monkeypatch): + cli = _import_cli() + shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1) + shell.provider = "openrouter" + shell.api_mode = "chat_completions" + shell.base_url = "https://openrouter.ai/api/v1" + shell.api_key = "sk-primary" + shell._smart_model_routing = {"enabled": False} + + result = shell._resolve_turn_agent_config("what time is it in tokyo?") + + assert result["model"] == "gpt-5" + assert result["runtime"]["provider"] == "openrouter" + assert result["label"] is None + + +def test_cli_turn_routing_uses_cheap_model_when_simple(monkeypatch): + cli = _import_cli() + + def _runtime_resolve(**kwargs): + assert kwargs["requested"] == "zai" + return { + "provider": "zai", + "api_mode": "chat_completions", + "base_url": "https://open.z.ai/api/v1", + "api_key": "cheap-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + + shell = cli.HermesCLI(model="anthropic/claude-sonnet-4", compact=True, max_turns=1) + shell.provider = "openrouter" + shell.api_mode = "chat_completions" + shell.base_url = "https://openrouter.ai/api/v1" + shell.api_key = "primary-key" + shell._smart_model_routing = { + "enabled": True, + "cheap_model": {"provider": "zai", "model": "glm-5-air"}, + "max_simple_chars": 160, + "max_simple_words": 28, + } + + result = shell._resolve_turn_agent_config("what time is it in tokyo?") + + assert result["model"] == "glm-5-air" + assert result["runtime"]["provider"] == "zai" + assert result["runtime"]["api_key"] == "cheap-key" + assert result["label"] is not None + + def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch): cli = _import_cli() diff --git a/tests/test_cli_status_bar.py b/tests/test_cli_status_bar.py index c5225ce91..4e281ffa8 100644 --- a/tests/test_cli_status_bar.py +++ b/tests/test_cli_status_bar.py @@ -65,24 +65,39 @@ class TestCLIStatusBar: assert "claude-sonnet-4-20250514" in text assert "12.4K/200K" in text assert "6%" in text - assert "$0.06" in text + assert "$0.06" not in text # cost hidden by default assert "15m" in text + def test_build_status_bar_text_shows_cost_when_enabled(self): + cli_obj = _attach_agent( + _make_cli(), + prompt_tokens=10000, + completion_tokens=2400, + total_tokens=12400, + api_calls=7, + context_tokens=12400, + context_length=200_000, + ) + cli_obj.show_cost = True + + text = cli_obj._build_status_bar_text(width=120) + assert "$" in text # cost is shown when enabled + def test_build_status_bar_text_collapses_for_narrow_terminal(self): cli_obj = _attach_agent( _make_cli(), - prompt_tokens=10_230, - completion_tokens=2_220, - total_tokens=12_450, + prompt_tokens=10000, + completion_tokens=2400, + total_tokens=12400, api_calls=7, - context_tokens=12_450, + context_tokens=12400, context_length=200_000, ) text = cli_obj._build_status_bar_text(width=60) assert "⚕" in text - assert "$0.06" in text + assert "$0.06" not in text # cost hidden by default assert "15m" in text assert "200K" not in text diff --git a/tests/test_plugins.py b/tests/test_plugins.py new file mode 100644 index 000000000..88e194ef3 --- /dev/null +++ b/tests/test_plugins.py @@ -0,0 +1,340 @@ +"""Tests for the Hermes plugin system (hermes_cli.plugins).""" + +import logging +import os +import sys +import types +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +import yaml + +from hermes_cli.plugins import ( + ENTRY_POINTS_GROUP, + VALID_HOOKS, + LoadedPlugin, + PluginContext, + PluginManager, + PluginManifest, + get_plugin_manager, + get_plugin_tool_names, + discover_plugins, + invoke_hook, +) + + +# ── Helpers ──────────────────────────────────────────────────────────────── + + +def _make_plugin_dir(base: Path, name: str, *, register_body: str = "pass", + manifest_extra: dict | None = None) -> Path: + """Create a minimal plugin directory with plugin.yaml + __init__.py.""" + plugin_dir = base / name + plugin_dir.mkdir(parents=True, exist_ok=True) + + manifest = {"name": name, "version": "0.1.0", "description": f"Test plugin {name}"} + if manifest_extra: + manifest.update(manifest_extra) + + (plugin_dir / "plugin.yaml").write_text(yaml.dump(manifest)) + (plugin_dir / "__init__.py").write_text( + f"def register(ctx):\n {register_body}\n" + ) + return plugin_dir + + +# ── TestPluginDiscovery ──────────────────────────────────────────────────── + + +class TestPluginDiscovery: + """Tests for plugin discovery from directories and entry points.""" + + def test_discover_user_plugins(self, tmp_path, monkeypatch): + """Plugins in ~/.hermes/plugins/ are discovered.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir(plugins_dir, "hello_plugin") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + assert "hello_plugin" in mgr._plugins + assert mgr._plugins["hello_plugin"].enabled + + def test_discover_project_plugins(self, tmp_path, monkeypatch): + """Plugins in ./.hermes/plugins/ are discovered.""" + project_dir = tmp_path / "project" + project_dir.mkdir() + monkeypatch.chdir(project_dir) + plugins_dir = project_dir / ".hermes" / "plugins" + _make_plugin_dir(plugins_dir, "proj_plugin") + + mgr = PluginManager() + mgr.discover_and_load() + + assert "proj_plugin" in mgr._plugins + assert mgr._plugins["proj_plugin"].enabled + + def test_discover_is_idempotent(self, tmp_path, monkeypatch): + """Calling discover_and_load() twice does not duplicate plugins.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir(plugins_dir, "once_plugin") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + mgr.discover_and_load() # second call should no-op + + assert len(mgr._plugins) == 1 + + def test_discover_skips_dir_without_manifest(self, tmp_path, monkeypatch): + """Directories without plugin.yaml are silently skipped.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + (plugins_dir / "no_manifest").mkdir(parents=True) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + assert len(mgr._plugins) == 0 + + def test_entry_points_scanned(self, tmp_path, monkeypatch): + """Entry-point based plugins are discovered (mocked).""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + fake_module = types.ModuleType("fake_ep_plugin") + fake_module.register = lambda ctx: None # type: ignore[attr-defined] + + fake_ep = MagicMock() + fake_ep.name = "ep_plugin" + fake_ep.value = "fake_ep_plugin:register" + fake_ep.group = ENTRY_POINTS_GROUP + fake_ep.load.return_value = fake_module + + def fake_entry_points(): + result = MagicMock() + result.select = MagicMock(return_value=[fake_ep]) + return result + + with patch("importlib.metadata.entry_points", fake_entry_points): + mgr = PluginManager() + mgr.discover_and_load() + + assert "ep_plugin" in mgr._plugins + + +# ── TestPluginLoading ────────────────────────────────────────────────────── + + +class TestPluginLoading: + """Tests for plugin module loading.""" + + def test_load_missing_init(self, tmp_path, monkeypatch): + """Plugin dir without __init__.py records an error.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + plugin_dir = plugins_dir / "bad_plugin" + plugin_dir.mkdir(parents=True) + (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "bad_plugin"})) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + assert "bad_plugin" in mgr._plugins + assert not mgr._plugins["bad_plugin"].enabled + assert mgr._plugins["bad_plugin"].error is not None + + def test_load_missing_register_fn(self, tmp_path, monkeypatch): + """Plugin without register() function records an error.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + plugin_dir = plugins_dir / "no_reg" + plugin_dir.mkdir(parents=True) + (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "no_reg"})) + (plugin_dir / "__init__.py").write_text("# no register function\n") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + assert "no_reg" in mgr._plugins + assert not mgr._plugins["no_reg"].enabled + assert "no register()" in mgr._plugins["no_reg"].error + + def test_load_registers_namespace_module(self, tmp_path, monkeypatch): + """Directory plugins are importable under hermes_plugins..""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir(plugins_dir, "ns_plugin") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + # Clean up any prior namespace module + sys.modules.pop("hermes_plugins.ns_plugin", None) + + mgr = PluginManager() + mgr.discover_and_load() + + assert "hermes_plugins.ns_plugin" in sys.modules + + +# ── TestPluginHooks ──────────────────────────────────────────────────────── + + +class TestPluginHooks: + """Tests for lifecycle hook registration and invocation.""" + + def test_register_and_invoke_hook(self, tmp_path, monkeypatch): + """Registered hooks are called on invoke_hook().""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir( + plugins_dir, "hook_plugin", + register_body='ctx.register_hook("pre_tool_call", lambda **kw: None)', + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + # Should not raise + mgr.invoke_hook("pre_tool_call", tool_name="test", args={}, task_id="t1") + + def test_hook_exception_does_not_propagate(self, tmp_path, monkeypatch): + """A hook callback that raises does NOT crash the caller.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir( + plugins_dir, "bad_hook", + register_body='ctx.register_hook("post_tool_call", lambda **kw: 1/0)', + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + # Should not raise despite 1/0 + mgr.invoke_hook("post_tool_call", tool_name="x", args={}, result="r", task_id="") + + def test_invalid_hook_name_warns(self, tmp_path, monkeypatch, caplog): + """Registering an unknown hook name logs a warning.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir( + plugins_dir, "warn_plugin", + register_body='ctx.register_hook("on_banana", lambda **kw: None)', + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + with caplog.at_level(logging.WARNING, logger="hermes_cli.plugins"): + mgr = PluginManager() + mgr.discover_and_load() + + assert any("on_banana" in record.message for record in caplog.records) + + +# ── TestPluginContext ────────────────────────────────────────────────────── + + +class TestPluginContext: + """Tests for the PluginContext facade.""" + + def test_register_tool_adds_to_registry(self, tmp_path, monkeypatch): + """PluginContext.register_tool() puts the tool in the global registry.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + plugin_dir = plugins_dir / "tool_plugin" + plugin_dir.mkdir(parents=True) + (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "tool_plugin"})) + (plugin_dir / "__init__.py").write_text( + 'def register(ctx):\n' + ' ctx.register_tool(\n' + ' name="plugin_echo",\n' + ' toolset="plugin_tool_plugin",\n' + ' schema={"name": "plugin_echo", "description": "Echo", "parameters": {"type": "object", "properties": {}}},\n' + ' handler=lambda args, **kw: "echo",\n' + ' )\n' + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + assert "plugin_echo" in mgr._plugin_tool_names + + from tools.registry import registry + assert "plugin_echo" in registry._tools + + +# ── TestPluginToolVisibility ─────────────────────────────────────────────── + + +class TestPluginToolVisibility: + """Plugin-registered tools appear in get_tool_definitions().""" + + def test_plugin_tools_in_definitions(self, tmp_path, monkeypatch): + """Tools from plugins bypass the toolset filter.""" + import hermes_cli.plugins as plugins_mod + + plugins_dir = tmp_path / "hermes_test" / "plugins" + plugin_dir = plugins_dir / "vis_plugin" + plugin_dir.mkdir(parents=True) + (plugin_dir / "plugin.yaml").write_text(yaml.dump({"name": "vis_plugin"})) + (plugin_dir / "__init__.py").write_text( + 'def register(ctx):\n' + ' ctx.register_tool(\n' + ' name="vis_tool",\n' + ' toolset="plugin_vis_plugin",\n' + ' schema={"name": "vis_tool", "description": "Visible", "parameters": {"type": "object", "properties": {}}},\n' + ' handler=lambda args, **kw: "ok",\n' + ' )\n' + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + monkeypatch.setattr(plugins_mod, "_plugin_manager", mgr) + + from model_tools import get_tool_definitions + tools = get_tool_definitions(enabled_toolsets=["terminal"], quiet_mode=True) + tool_names = [t["function"]["name"] for t in tools] + assert "vis_tool" in tool_names + + +# ── TestPluginManagerList ────────────────────────────────────────────────── + + +class TestPluginManagerList: + """Tests for PluginManager.list_plugins().""" + + def test_list_empty(self): + """Empty manager returns empty list.""" + mgr = PluginManager() + assert mgr.list_plugins() == [] + + def test_list_returns_sorted(self, tmp_path, monkeypatch): + """list_plugins() returns results sorted by name.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir(plugins_dir, "zulu") + _make_plugin_dir(plugins_dir, "alpha") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + listing = mgr.list_plugins() + names = [p["name"] for p in listing] + assert names == sorted(names) + + def test_list_with_plugins(self, tmp_path, monkeypatch): + """list_plugins() returns info dicts for each discovered plugin.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir(plugins_dir, "alpha") + _make_plugin_dir(plugins_dir, "beta") + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + listing = mgr.list_plugins() + names = [p["name"] for p in listing] + assert "alpha" in names + assert "beta" in names + for p in listing: + assert "enabled" in p + assert "tools" in p + assert "hooks" in p diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index 3ed297b59..03b32d207 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -1,11 +1,31 @@ import logging import subprocess +import sys +import types import pytest from tools.environments import docker as docker_env +def _install_fake_minisweagent(monkeypatch, captured_run_args): + class MockInnerDocker: + container_id = "fake-container" + config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() + + def __init__(self, **kwargs): + captured_run_args.extend(kwargs.get("run_args", [])) + + minisweagent_mod = types.ModuleType("minisweagent") + environments_mod = types.ModuleType("minisweagent.environments") + docker_mod = types.ModuleType("minisweagent.environments.docker") + docker_mod.DockerEnvironment = MockInnerDocker + + monkeypatch.setitem(sys.modules, "minisweagent", minisweagent_mod) + monkeypatch.setitem(sys.modules, "minisweagent.environments", environments_mod) + monkeypatch.setitem(sys.modules, "minisweagent.environments.docker", docker_mod) + + def _make_dummy_env(**kwargs): """Helper to construct DockerEnvironment with minimal required args.""" return docker_env.DockerEnvironment( @@ -19,6 +39,8 @@ def _make_dummy_env(**kwargs): task_id=kwargs.get("task_id", "test-task"), volumes=kwargs.get("volumes", []), network=kwargs.get("network", True), + host_cwd=kwargs.get("host_cwd"), + auto_mount_cwd=kwargs.get("auto_mount_cwd", False), ) @@ -88,65 +110,10 @@ def test_ensure_docker_available_uses_resolved_executable(monkeypatch): def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path): - """When host_cwd is provided, it should be auto-mounted to /workspace.""" - import os - - # Create a temp directory to simulate user's project directory + """Opt-in docker cwd mounting should bind the host cwd to /workspace.""" project_dir = tmp_path / "my-project" project_dir.mkdir() - # Mock Docker availability - def _run_docker_version(*args, **kwargs): - return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") - - def _run_docker_create(*args, **kwargs): - return subprocess.CompletedProcess(args[0], 1, stdout="", stderr="storage-opt not supported") - - monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") - monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) - - # Mock the inner _Docker class to capture run_args - captured_run_args = [] - - class MockInnerDocker: - container_id = "mock-container-123" - config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() - - def __init__(self, **kwargs): - captured_run_args.extend(kwargs.get("run_args", [])) - - monkeypatch.setattr( - "minisweagent.environments.docker.DockerEnvironment", - MockInnerDocker, - ) - - # Create environment with host_cwd - env = docker_env.DockerEnvironment( - image="python:3.11", - cwd="/workspace", - timeout=60, - persistent_filesystem=False, # Non-persistent mode uses tmpfs, should be overridden - task_id="test-auto-mount", - volumes=[], - host_cwd=str(project_dir), - auto_mount_cwd=True, - ) - - # Check that the host_cwd was added as a volume mount - volume_mount = f"-v {project_dir}:/workspace" - run_args_str = " ".join(captured_run_args) - assert f"{project_dir}:/workspace" in run_args_str, f"Expected auto-mount in run_args: {run_args_str}" - - -def test_auto_mount_disabled_via_env(monkeypatch, tmp_path): - """Auto-mount should be disabled when TERMINAL_DOCKER_NO_AUTO_MOUNT is set.""" - import os - - project_dir = tmp_path / "my-project" - project_dir.mkdir() - - monkeypatch.setenv("TERMINAL_DOCKER_NO_AUTO_MOUNT", "true") - def _run_docker_version(*args, **kwargs): return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") @@ -154,39 +121,44 @@ def test_auto_mount_disabled_via_env(monkeypatch, tmp_path): monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) captured_run_args = [] + _install_fake_minisweagent(monkeypatch, captured_run_args) - class MockInnerDocker: - container_id = "mock-container-456" - config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() - - def __init__(self, **kwargs): - captured_run_args.extend(kwargs.get("run_args", [])) - - monkeypatch.setattr( - "minisweagent.environments.docker.DockerEnvironment", - MockInnerDocker, - ) - - env = docker_env.DockerEnvironment( - image="python:3.11", + _make_dummy_env( cwd="/workspace", - timeout=60, - persistent_filesystem=False, - task_id="test-no-auto-mount", - volumes=[], host_cwd=str(project_dir), auto_mount_cwd=True, ) - # Check that the host_cwd was NOT added (because env var disabled it) run_args_str = " ".join(captured_run_args) - assert f"{project_dir}:/workspace" not in run_args_str, f"Auto-mount should be disabled: {run_args_str}" + assert f"{project_dir}:/workspace" in run_args_str + + +def test_auto_mount_disabled_by_default(monkeypatch, tmp_path): + """Host cwd should not be mounted unless the caller explicitly opts in.""" + project_dir = tmp_path / "my-project" + project_dir.mkdir() + + def _run_docker_version(*args, **kwargs): + return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") + + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) + + captured_run_args = [] + _install_fake_minisweagent(monkeypatch, captured_run_args) + + _make_dummy_env( + cwd="/root", + host_cwd=str(project_dir), + auto_mount_cwd=False, + ) + + run_args_str = " ".join(captured_run_args) + assert f"{project_dir}:/workspace" not in run_args_str def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path): - """Auto-mount should be skipped if /workspace is already mounted via user volumes.""" - import os - + """Explicit user volumes for /workspace should take precedence over cwd mount.""" project_dir = tmp_path / "my-project" project_dir.mkdir() other_dir = tmp_path / "other" @@ -199,35 +171,43 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) captured_run_args = [] + _install_fake_minisweagent(monkeypatch, captured_run_args) - class MockInnerDocker: - container_id = "mock-container-789" - config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() - - def __init__(self, **kwargs): - captured_run_args.extend(kwargs.get("run_args", [])) - - monkeypatch.setattr( - "minisweagent.environments.docker.DockerEnvironment", - MockInnerDocker, - ) - - # User already configured a volume mount for /workspace - env = docker_env.DockerEnvironment( - image="python:3.11", + _make_dummy_env( cwd="/workspace", - timeout=60, - persistent_filesystem=False, - task_id="test-workspace-exists", - volumes=[f"{other_dir}:/workspace"], # User explicitly mounted something to /workspace host_cwd=str(project_dir), auto_mount_cwd=True, + volumes=[f"{other_dir}:/workspace"], ) - # The user's explicit mount should be present run_args_str = " ".join(captured_run_args) assert f"{other_dir}:/workspace" in run_args_str + assert run_args_str.count(":/workspace") == 1 - # But the auto-mount should NOT add a duplicate - assert run_args_str.count(":/workspace") == 1, f"Should only have one /workspace mount: {run_args_str}" + +def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path): + """Persistent mode should still prefer the configured host cwd at /workspace.""" + project_dir = tmp_path / "my-project" + project_dir.mkdir() + + def _run_docker_version(*args, **kwargs): + return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") + + monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") + monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) + + captured_run_args = [] + _install_fake_minisweagent(monkeypatch, captured_run_args) + + _make_dummy_env( + cwd="/workspace", + persistent_filesystem=True, + host_cwd=str(project_dir), + auto_mount_cwd=True, + task_id="test-persistent-auto-mount", + ) + + run_args_str = " ".join(captured_run_args) + assert f"{project_dir}:/workspace" in run_args_str + assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py index 6da25216b..49c306231 100644 --- a/tests/tools/test_modal_sandbox_fixes.py +++ b/tests/tools/test_modal_sandbox_fixes.py @@ -91,8 +91,8 @@ class TestCwdHandling: "/home/ paths should be replaced for modal backend." ) - def test_users_path_replaced_for_docker(self): - """TERMINAL_CWD=/Users/... should be replaced with /root for docker.""" + def test_users_path_replaced_for_docker_by_default(self): + """Docker should keep host paths out of the sandbox unless explicitly enabled.""" with patch.dict(os.environ, { "TERMINAL_ENV": "docker", "TERMINAL_CWD": "/Users/someone/projects", @@ -100,8 +100,22 @@ class TestCwdHandling: config = _tt_mod._get_env_config() assert config["cwd"] == "/root", ( f"Expected /root, got {config['cwd']}. " - "/Users/ paths should be replaced for docker backend." + "Host paths should be discarded for docker backend by default." ) + assert config["host_cwd"] is None + assert config["docker_mount_cwd_to_workspace"] is False + + def test_users_path_maps_to_workspace_for_docker_when_enabled(self): + """Docker should map the host cwd into /workspace only when explicitly enabled.""" + with patch.dict(os.environ, { + "TERMINAL_ENV": "docker", + "TERMINAL_CWD": "/Users/someone/projects", + "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true", + }): + config = _tt_mod._get_env_config() + assert config["cwd"] == "/workspace" + assert config["host_cwd"] == "/Users/someone/projects" + assert config["docker_mount_cwd_to_workspace"] is True def test_windows_path_replaced_for_modal(self): """TERMINAL_CWD=C:\\Users\\... should be replaced for modal.""" @@ -119,12 +133,27 @@ class TestCwdHandling: # Remove TERMINAL_CWD so it uses default env = os.environ.copy() env.pop("TERMINAL_CWD", None) + env.pop("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", None) with patch.dict(os.environ, env, clear=True): config = _tt_mod._get_env_config() assert config["cwd"] == "/root", ( f"Backend {backend}: expected /root default, got {config['cwd']}" ) + def test_docker_default_cwd_maps_current_directory_when_enabled(self): + """Docker should use /workspace when cwd mounting is explicitly enabled.""" + with patch("tools.terminal_tool.os.getcwd", return_value="/home/user/project"): + with patch.dict(os.environ, { + "TERMINAL_ENV": "docker", + "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true", + }, clear=False): + env = os.environ.copy() + env.pop("TERMINAL_CWD", None) + with patch.dict(os.environ, env, clear=True): + config = _tt_mod._get_env_config() + assert config["cwd"] == "/workspace" + assert config["host_cwd"] == "/home/user/project" + def test_local_backend_uses_getcwd(self): """Local backend should use os.getcwd(), not /root.""" with patch.dict(os.environ, {"TERMINAL_ENV": "local"}, clear=False): @@ -134,6 +163,31 @@ class TestCwdHandling: config = _tt_mod._get_env_config() assert config["cwd"] == os.getcwd() + def test_create_environment_passes_docker_host_cwd_and_flag(self, monkeypatch): + """Docker host cwd and mount flag should reach DockerEnvironment.""" + captured = {} + sentinel = object() + + def _fake_docker_environment(**kwargs): + captured.update(kwargs) + return sentinel + + monkeypatch.setattr(_tt_mod, "_DockerEnvironment", _fake_docker_environment) + + env = _tt_mod._create_environment( + env_type="docker", + image="python:3.11", + cwd="/workspace", + timeout=60, + container_config={"docker_mount_cwd_to_workspace": True}, + host_cwd="/home/user/project", + ) + + assert env is sentinel + assert captured["cwd"] == "/workspace" + assert captured["host_cwd"] == "/home/user/project" + assert captured["auto_mount_cwd"] is True + def test_ssh_preserves_home_paths(self): """SSH backend should NOT replace /home/ paths (they're valid remotely).""" with patch.dict(os.environ, { diff --git a/tools/approval.py b/tools/approval.py index 92da71ca5..9f1b541ff 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -4,6 +4,7 @@ This module is the single source of truth for the dangerous command system: - Pattern detection (DANGEROUS_PATTERNS, detect_dangerous_command) - Per-session approval state (thread-safe, keyed by session_key) - Approval prompting (CLI interactive + gateway async) +- Smart approval via auxiliary LLM (auto-approve low-risk commands) - Permanent allowlist persistence (config.yaml) """ @@ -283,6 +284,68 @@ def prompt_dangerous_approval(command: str, description: str, sys.stdout.flush() +def _get_approval_mode() -> str: + """Read the approval mode from config. Returns 'manual', 'smart', or 'off'.""" + try: + from hermes_cli.config import load_config + config = load_config() + return config.get("approvals", {}).get("mode", "manual") + except Exception: + return "manual" + + +def _smart_approve(command: str, description: str) -> str: + """Use the auxiliary LLM to assess risk and decide approval. + + Returns 'approve' if the LLM determines the command is safe, + 'deny' if genuinely dangerous, or 'escalate' if uncertain. + + Inspired by OpenAI Codex's Smart Approvals guardian subagent + (openai/codex#13860). + """ + try: + from agent.auxiliary_client import get_text_auxiliary_client, auxiliary_max_tokens_param + + client, model = get_text_auxiliary_client(task="approval") + if not client or not model: + logger.debug("Smart approvals: no aux client available, escalating") + return "escalate" + + prompt = f"""You are a security reviewer for an AI coding agent. A terminal command was flagged by pattern matching as potentially dangerous. + +Command: {command} +Flagged reason: {description} + +Assess the ACTUAL risk of this command. Many flagged commands are false positives — for example, `python -c "print('hello')"` is flagged as "script execution via -c flag" but is completely harmless. + +Rules: +- APPROVE if the command is clearly safe (benign script execution, safe file operations, development tools, package installs, git operations, etc.) +- DENY if the command could genuinely damage the system (recursive delete of important paths, overwriting system files, fork bombs, wiping disks, dropping databases, etc.) +- ESCALATE if you're uncertain + +Respond with exactly one word: APPROVE, DENY, or ESCALATE""" + + response = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + **auxiliary_max_tokens_param(16), + temperature=0, + ) + + answer = (response.choices[0].message.content or "").strip().upper() + + if "APPROVE" in answer: + return "approve" + elif "DENY" in answer: + return "deny" + else: + return "escalate" + + except Exception as e: + logger.debug("Smart approvals: LLM call failed (%s), escalating", e) + return "escalate" + + def check_dangerous_command(command: str, env_type: str, approval_callback=None) -> dict: """Check if a command is dangerous and handle approval. @@ -372,8 +435,9 @@ def check_all_command_guards(command: str, env_type: str, if env_type in ("docker", "singularity", "modal", "daytona"): return {"approved": True, "message": None} - # --yolo: bypass all approval prompts and pre-exec guard checks - if os.getenv("HERMES_YOLO_MODE"): + # --yolo or approvals.mode=off: bypass all approval prompts + approval_mode = _get_approval_mode() + if os.getenv("HERMES_YOLO_MODE") or approval_mode == "off": return {"approved": True, "message": None} is_cli = os.getenv("HERMES_INTERACTIVE") @@ -430,6 +494,31 @@ def check_all_command_guards(command: str, env_type: str, if not warnings: return {"approved": True, "message": None} + # --- Phase 2.5: Smart approval (auxiliary LLM risk assessment) --- + # When approvals.mode=smart, ask the aux LLM before prompting the user. + # Inspired by OpenAI Codex's Smart Approvals guardian subagent + # (openai/codex#13860). + if approval_mode == "smart": + combined_desc_for_llm = "; ".join(desc for _, desc, _ in warnings) + verdict = _smart_approve(command, combined_desc_for_llm) + if verdict == "approve": + # Auto-approve and grant session-level approval for these patterns + for key, _, _ in warnings: + approve_session(session_key, key) + logger.debug("Smart approval: auto-approved '%s' (%s)", + command[:60], combined_desc_for_llm) + return {"approved": True, "message": None, + "smart_approved": True} + elif verdict == "deny": + combined_desc_for_llm = "; ".join(desc for _, desc, _ in warnings) + return { + "approved": False, + "message": f"BLOCKED by smart approval: {combined_desc_for_llm}. " + "The command was assessed as genuinely dangerous. Do NOT retry.", + "smart_denied": True, + } + # verdict == "escalate" → fall through to manual prompt + # --- Phase 3: Approval --- # Combine descriptions for a single approval prompt diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 88eba3884..e595e8105 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -98,6 +98,16 @@ def _get_extraction_model() -> Optional[str]: return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None +def _get_cdp_override() -> str: + """Return a user-supplied CDP URL override, or empty string. + + When ``BROWSER_CDP_URL`` is set (e.g. via ``/browser connect``), we skip + both Browserbase and the local headless launcher and connect directly to + the supplied Chrome DevTools Protocol endpoint. + """ + return os.environ.get("BROWSER_CDP_URL", "").strip() + + def _is_local_mode() -> bool: """Return True when no Browserbase credentials are configured. @@ -105,6 +115,8 @@ def _is_local_mode() -> bool: ``agent-browser --session`` instead of connecting to a remote Browserbase session via ``--cdp``. """ + if _get_cdp_override(): + return False # CDP override takes priority return not (os.environ.get("BROWSERBASE_API_KEY") and os.environ.get("BROWSERBASE_PROJECT_ID")) @@ -608,6 +620,20 @@ def _create_local_session(task_id: str) -> Dict[str, str]: } +def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]: + """Create a session that connects to a user-supplied CDP endpoint.""" + import uuid + session_name = f"cdp_{uuid.uuid4().hex[:10]}" + logger.info("Created CDP browser session %s → %s for task %s", + session_name, cdp_url, task_id) + return { + "session_name": session_name, + "bb_session_id": None, + "cdp_url": cdp_url, + "features": {"cdp_override": True}, + } + + def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: """ Get or create session info for the given task. @@ -638,7 +664,10 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: return _active_sessions[task_id] # Create session outside the lock (network call in cloud mode) - if _is_local_mode(): + cdp_override = _get_cdp_override() + if cdp_override: + session_info = _create_cdp_session(task_id, cdp_override) + elif _is_local_mode(): session_info = _create_local_session(task_id) else: session_info = _create_browserbase_session(task_id) diff --git a/tools/environments/docker.py b/tools/environments/docker.py index 1c95f7b34..ec6d8b30c 100644 --- a/tools/environments/docker.py +++ b/tools/environments/docker.py @@ -158,10 +158,6 @@ class DockerEnvironment(BaseEnvironment): Persistence: when enabled, bind mounts preserve /workspace and /root across container restarts. - - Auto-mount: when host_cwd is provided (the user's original working directory), - it is automatically bind-mounted to /workspace unless auto_mount_cwd=False - or the path is already covered by an explicit volume mount. """ def __init__( @@ -177,7 +173,7 @@ class DockerEnvironment(BaseEnvironment): volumes: list = None, network: bool = True, host_cwd: str = None, - auto_mount_cwd: bool = True, + auto_mount_cwd: bool = False, ): if cwd == "~": cwd = "/root" @@ -220,30 +216,9 @@ class DockerEnvironment(BaseEnvironment): # mode uses tmpfs (ephemeral, fast, gone on cleanup). from tools.environments.base import get_sandbox_dir - self._workspace_dir: Optional[str] = None - self._home_dir: Optional[str] = None - if self._persistent: - sandbox = get_sandbox_dir() / "docker" / task_id - self._workspace_dir = str(sandbox / "workspace") - self._home_dir = str(sandbox / "home") - os.makedirs(self._workspace_dir, exist_ok=True) - os.makedirs(self._home_dir, exist_ok=True) - writable_args = [ - "-v", f"{self._workspace_dir}:/workspace", - "-v", f"{self._home_dir}:/root", - ] - else: - writable_args = [ - "--tmpfs", "/workspace:rw,exec,size=10g", - "--tmpfs", "/home:rw,exec,size=1g", - "--tmpfs", "/root:rw,exec,size=1g", - ] - - # All containers get security hardening (capabilities dropped, no privilege - # escalation, PID limits). The container filesystem is writable so agents - # can install packages as needed. # User-configured volume mounts (from config.yaml docker_volumes) volume_args = [] + workspace_explicitly_mounted = False for vol in (volumes or []): if not isinstance(vol, str): logger.warning(f"Docker volume entry is not a string: {vol!r}") @@ -253,31 +228,52 @@ class DockerEnvironment(BaseEnvironment): continue if ":" in vol: volume_args.extend(["-v", vol]) + if ":/workspace" in vol: + workspace_explicitly_mounted = True else: logger.warning(f"Docker volume '{vol}' missing colon, skipping") - # Auto-mount host CWD to /workspace when enabled (fixes #1445). - # This allows users to run `cd my-project && hermes` and have Docker - # automatically mount their project directory into the container. - # Disabled when: auto_mount_cwd=False, host_cwd is not a valid directory, - # or /workspace is already covered by writable_args or a user volume. - auto_mount_disabled = os.getenv("TERMINAL_DOCKER_NO_AUTO_MOUNT", "").lower() in ("1", "true", "yes") - if host_cwd and auto_mount_cwd and not auto_mount_disabled: - host_cwd_abs = os.path.abspath(os.path.expanduser(host_cwd)) - if os.path.isdir(host_cwd_abs): - # Check if /workspace is already being mounted by persistence or user config - workspace_already_mounted = any( - ":/workspace" in arg for arg in writable_args - ) or any( - ":/workspace" in arg for arg in volume_args - ) - if not workspace_already_mounted: - logger.info(f"Auto-mounting host CWD to /workspace: {host_cwd_abs}") - volume_args.extend(["-v", f"{host_cwd_abs}:/workspace"]) - else: - logger.debug(f"Skipping auto-mount: /workspace already mounted") - else: - logger.debug(f"Skipping auto-mount: host_cwd is not a valid directory: {host_cwd}") + host_cwd_abs = os.path.abspath(os.path.expanduser(host_cwd)) if host_cwd else "" + bind_host_cwd = ( + auto_mount_cwd + and bool(host_cwd_abs) + and os.path.isdir(host_cwd_abs) + and not workspace_explicitly_mounted + ) + if auto_mount_cwd and host_cwd and not os.path.isdir(host_cwd_abs): + logger.debug(f"Skipping docker cwd mount: host_cwd is not a valid directory: {host_cwd}") + + self._workspace_dir: Optional[str] = None + self._home_dir: Optional[str] = None + writable_args = [] + if self._persistent: + sandbox = get_sandbox_dir() / "docker" / task_id + self._home_dir = str(sandbox / "home") + os.makedirs(self._home_dir, exist_ok=True) + writable_args.extend([ + "-v", f"{self._home_dir}:/root", + ]) + if not bind_host_cwd and not workspace_explicitly_mounted: + self._workspace_dir = str(sandbox / "workspace") + os.makedirs(self._workspace_dir, exist_ok=True) + writable_args.extend([ + "-v", f"{self._workspace_dir}:/workspace", + ]) + else: + if not bind_host_cwd and not workspace_explicitly_mounted: + writable_args.extend([ + "--tmpfs", "/workspace:rw,exec,size=10g", + ]) + writable_args.extend([ + "--tmpfs", "/home:rw,exec,size=1g", + "--tmpfs", "/root:rw,exec,size=1g", + ]) + + if bind_host_cwd: + logger.info(f"Mounting configured host cwd to /workspace: {host_cwd_abs}") + volume_args = ["-v", f"{host_cwd_abs}:/workspace", *volume_args] + elif workspace_explicitly_mounted: + logger.debug("Skipping docker cwd mount: /workspace already mounted by user config") logger.info(f"Docker volume_args: {volume_args}") all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args diff --git a/tools/file_tools.py b/tools/file_tools.py index 98ea15bd4..ddcfcd567 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -140,6 +140,7 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations: container_config=container_config, local_config=local_config, task_id=task_id, + host_cwd=config.get("host_cwd"), ) with _env_lock: diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py index bc8e34403..f53451c63 100644 --- a/tools/fuzzy_match.py +++ b/tools/fuzzy_match.py @@ -32,6 +32,19 @@ import re from typing import Tuple, Optional, List, Callable from difflib import SequenceMatcher +UNICODE_MAP = { + "\u201c": '"', "\u201d": '"', # smart double quotes + "\u2018": "'", "\u2019": "'", # smart single quotes + "\u2014": "--", "\u2013": "-", # em/en dashes + "\u2026": "...", "\u00a0": " ", # ellipsis and non-breaking space +} + +def _unicode_normalize(text: str) -> str: + """Normalizes Unicode characters to their standard ASCII equivalents.""" + for char, repl in UNICODE_MAP.items(): + text = text.replace(char, repl) + return text + def fuzzy_find_and_replace(content: str, old_string: str, new_string: str, replace_all: bool = False) -> Tuple[str, int, Optional[str]]: @@ -253,42 +266,52 @@ def _strategy_trimmed_boundary(content: str, pattern: str) -> List[Tuple[int, in def _strategy_block_anchor(content: str, pattern: str) -> List[Tuple[int, int]]: """ Strategy 7: Match by anchoring on first and last lines. - - If first and last lines match exactly, accept middle with 70% similarity. + Adjusted with permissive thresholds and unicode normalization. """ - pattern_lines = pattern.split('\n') + # Normalize both strings for comparison while keeping original content for offset calculation + norm_pattern = _unicode_normalize(pattern) + norm_content = _unicode_normalize(content) + + pattern_lines = norm_pattern.split('\n') if len(pattern_lines) < 2: - return [] # Need at least 2 lines for anchoring + return [] first_line = pattern_lines[0].strip() last_line = pattern_lines[-1].strip() - content_lines = content.split('\n') - matches = [] + # Use normalized lines for matching logic + norm_content_lines = norm_content.split('\n') + # BUT use original lines for calculating start/end positions to prevent index shift + orig_content_lines = content.split('\n') pattern_line_count = len(pattern_lines) - for i in range(len(content_lines) - pattern_line_count + 1): - # Check if first and last lines match - if (content_lines[i].strip() == first_line and - content_lines[i + pattern_line_count - 1].strip() == last_line): + potential_matches = [] + for i in range(len(norm_content_lines) - pattern_line_count + 1): + if (norm_content_lines[i].strip() == first_line and + norm_content_lines[i + pattern_line_count - 1].strip() == last_line): + potential_matches.append(i) - # Check middle similarity - if pattern_line_count <= 2: - # Only first and last, they match - similarity = 1.0 - else: - content_middle = '\n'.join(content_lines[i+1:i+pattern_line_count-1]) - pattern_middle = '\n'.join(pattern_lines[1:-1]) - similarity = SequenceMatcher(None, content_middle, pattern_middle).ratio() - - if similarity >= 0.70: - # Calculate positions - start_pos = sum(len(line) + 1 for line in content_lines[:i]) - end_pos = sum(len(line) + 1 for line in content_lines[:i + pattern_line_count]) - 1 - if end_pos >= len(content): - end_pos = len(content) - matches.append((start_pos, end_pos)) + matches = [] + candidate_count = len(potential_matches) + + # Thresholding logic: 0.10 for unique matches (max flexibility), 0.30 for multiple candidates + threshold = 0.10 if candidate_count == 1 else 0.30 + + for i in potential_matches: + if pattern_line_count <= 2: + similarity = 1.0 + else: + # Compare normalized middle sections + content_middle = '\n'.join(norm_content_lines[i+1:i+pattern_line_count-1]) + pattern_middle = '\n'.join(pattern_lines[1:-1]) + similarity = SequenceMatcher(None, content_middle, pattern_middle).ratio() + + if similarity >= threshold: + # Calculate positions using ORIGINAL lines to ensure correct character offsets in the file + start_pos = sum(len(line) + 1 for line in orig_content_lines[:i]) + end_pos = sum(len(line) + 1 for line in orig_content_lines[:i + pattern_line_count]) - 1 + matches.append((start_pos, min(end_pos, len(content)))) return matches diff --git a/tools/memory_tool.py b/tools/memory_tool.py index f77e8116b..d7950d38c 100644 --- a/tools/memory_tool.py +++ b/tools/memory_tool.py @@ -439,11 +439,13 @@ MEMORY_SCHEMA = { "Memory is injected into future turns, so keep it compact and focused on facts " "that will still matter later.\n\n" "WHEN TO SAVE (do this proactively, don't wait to be asked):\n" + "- User corrects you or says 'remember this' / 'don't do that again'\n" "- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n" "- You discover something about the environment (OS, installed tools, project structure)\n" - "- User corrects you or says 'remember this' / 'don't do that again'\n" "- You learn a convention, API quirk, or workflow specific to this user's setup\n" "- You identify a stable fact that will be useful again in future sessions\n\n" + "PRIORITY: User preferences and corrections > environment facts > procedural knowledge. " + "The most valuable memory prevents the user from having to repeat themselves.\n\n" "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO " "state to memory; use session_search to recall those from past transcripts.\n" "If you've discovered a new way to do something, solved a problem that could be " diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index 8a8c13006..13356ec9f 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -47,9 +47,9 @@ def _format_timestamp(ts: Union[int, float, str, None]) -> str: return ts except (ValueError, OSError, OverflowError) as e: # Log specific errors for debugging while gracefully handling edge cases - logging.debug("Failed to format timestamp %s: %s", ts, e) + logging.debug("Failed to format timestamp %s: %s", ts, e, exc_info=True) except Exception as e: - logging.debug("Unexpected error formatting timestamp %s: %s", ts, e) + logging.debug("Unexpected error formatting timestamp %s: %s", ts, e, exc_info=True) return str(ts) @@ -170,7 +170,12 @@ async def _summarize_session( if attempt < max_retries - 1: await asyncio.sleep(1 * (attempt + 1)) else: - logging.warning(f"Session summarization failed after {max_retries} attempts: {e}") + logging.warning( + "Session summarization failed after %d attempts: %s", + max_retries, + e, + exc_info=True, + ) return None @@ -237,7 +242,12 @@ def session_search( else: break except Exception as e: - logging.debug("Error resolving parent for session %s: %s", sid, e) + logging.debug( + "Error resolving parent for session %s: %s", + sid, + e, + exc_info=True, + ) break return sid @@ -270,7 +280,12 @@ def session_search( conversation_text = _truncate_around_matches(conversation_text, query) tasks.append((session_id, match_info, conversation_text, session_meta)) except Exception as e: - logging.warning(f"Failed to prepare session {session_id}: {e}") + logging.warning( + "Failed to prepare session %s: %s", + session_id, + e, + exc_info=True, + ) # Summarize all sessions in parallel async def _summarize_all() -> List[Union[str, Exception]]: @@ -289,7 +304,10 @@ def session_search( # No event loop running, create a new one results = asyncio.run(_summarize_all()) except concurrent.futures.TimeoutError: - logging.warning("Session summarization timed out after 60 seconds") + logging.warning( + "Session summarization timed out after 60 seconds", + exc_info=True, + ) return json.dumps({ "success": False, "error": "Session summarization timed out. Try a more specific query or reduce the limit.", @@ -298,7 +316,12 @@ def session_search( summaries = [] for (session_id, match_info, _, _), result in zip(tasks, results): if isinstance(result, Exception): - logging.warning(f"Failed to summarize session {session_id}: {result}") + logging.warning( + "Failed to summarize session %s: %s", + session_id, + result, + exc_info=True, + ) continue if result: summaries.append({ @@ -318,6 +341,7 @@ def session_search( }, ensure_ascii=False) except Exception as e: + logging.error("Session search failed: %s", e, exc_info=True) return json.dumps({"success": False, "error": f"Search failed: {str(e)}"}, ensure_ascii=False) diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index 86d04e635..203afe499 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -561,7 +561,8 @@ SKILL_MANAGE_SCHEMA = { "user-corrected approach worked, non-trivial workflow discovered, " "or user asks you to remember a procedure.\n" "Update when: instructions stale/wrong, OS-specific failures, " - "missing steps or pitfalls found during use.\n\n" + "missing steps or pitfalls found during use. " + "If you used a skill and hit issues not covered by it, patch it immediately.\n\n" "After difficult/iterative tasks, offer to save as a skill. " "Skip for simple one-offs. Confirm with user before creating/deleting.\n\n" "Good skills: trigger conditions, numbered steps with exact commands, " diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index a9326f3ec..49a82e249 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -466,6 +466,8 @@ def _get_env_config() -> Dict[str, Any]: default_image = "nikolaik/python-nodejs:python3.11-nodejs20" env_type = os.getenv("TERMINAL_ENV", "local") + mount_docker_cwd = os.getenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "false").lower() in ("true", "1", "yes") + # Default cwd: local uses the host's current directory, everything # else starts in the user's home (~ resolves to whatever account # is running inside the container/remote). @@ -475,21 +477,25 @@ def _get_env_config() -> Dict[str, Any]: default_cwd = "~" else: default_cwd = "/root" - + # Read TERMINAL_CWD but sanity-check it for container backends. - # If the CWD looks like a host-local path that can't exist inside a - # container/sandbox, fall back to the backend's own default. This - # catches the case where cli.py (or .env) leaked the host's CWD. - # SSH is excluded since /home/ paths are valid on remote machines. - raw_cwd = os.getenv("TERMINAL_CWD", default_cwd) - cwd = raw_cwd - # Capture original host CWD for auto-mounting into containers (fixes #1445). - # Even when the container's working directory falls back to /root, we still - # want to auto-mount the user's host project directory to /workspace. - host_cwd = raw_cwd if raw_cwd and os.path.isdir(raw_cwd) else os.getcwd() - if env_type in ("modal", "docker", "singularity", "daytona") and cwd: + # If Docker cwd passthrough is explicitly enabled, remap the host path to + # /workspace and track the original host path separately. Otherwise keep the + # normal sandbox behavior and discard host paths. + cwd = os.getenv("TERMINAL_CWD", default_cwd) + host_cwd = None + host_prefixes = ("/Users/", "/home/", "C:\\", "C:/") + if env_type == "docker" and mount_docker_cwd: + docker_cwd_source = os.getenv("TERMINAL_CWD") or os.getcwd() + candidate = os.path.abspath(os.path.expanduser(docker_cwd_source)) + if ( + any(candidate.startswith(p) for p in host_prefixes) + or (os.path.isabs(candidate) and os.path.isdir(candidate) and not candidate.startswith(("/workspace", "/root"))) + ): + host_cwd = candidate + cwd = "/workspace" + elif env_type in ("modal", "docker", "singularity", "daytona") and cwd: # Host paths that won't exist inside containers - host_prefixes = ("/Users/", "/home/", "C:\\", "C:/") if any(cwd.startswith(p) for p in host_prefixes) and cwd != default_cwd: logger.info("Ignoring TERMINAL_CWD=%r for %s backend " "(host path won't exist in sandbox). Using %r instead.", @@ -503,7 +509,8 @@ def _get_env_config() -> Dict[str, Any]: "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image), "daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image), "cwd": cwd, - "host_cwd": host_cwd, # Original host directory for auto-mounting into containers + "host_cwd": host_cwd, + "docker_mount_cwd_to_workspace": mount_docker_cwd, "timeout": _parse_env_var("TERMINAL_TIMEOUT", "180"), "lifetime_seconds": _parse_env_var("TERMINAL_LIFETIME_SECONDS", "300"), # SSH-specific config @@ -544,7 +551,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, ssh_config: SSH connection config (for env_type="ssh") container_config: Resource config for container backends (cpu, memory, disk, persistent) task_id: Task identifier for environment reuse and snapshot keying - host_cwd: Original host working directory (for auto-mounting into containers) + host_cwd: Optional host working directory to bind into Docker when explicitly enabled Returns: Environment instance with execute() method @@ -568,6 +575,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, persistent_filesystem=persistent, task_id=task_id, volumes=volumes, host_cwd=host_cwd, + auto_mount_cwd=cc.get("docker_mount_cwd_to_workspace", False), ) elif env_type == "singularity": @@ -957,6 +965,7 @@ def terminal_tool( "container_disk": config.get("container_disk", 51200), "container_persistent": config.get("container_persistent", True), "docker_volumes": config.get("docker_volumes", []), + "docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False), } local_config = None diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md new file mode 100644 index 000000000..c141f895a --- /dev/null +++ b/website/docs/guides/build-a-hermes-plugin.md @@ -0,0 +1,438 @@ +--- +sidebar_position: 10 +--- + +# Build a Hermes Plugin + +This guide walks through building a complete Hermes plugin from scratch. By the end you'll have a working plugin with multiple tools, lifecycle hooks, shipped data files, and a bundled skill — everything the plugin system supports. + +## What you're building + +A **calculator** plugin with two tools: +- `calculate` — evaluate math expressions (`2**16`, `sqrt(144)`, `pi * 5**2`) +- `unit_convert` — convert between units (`100 F → 37.78 C`, `5 km → 3.11 mi`) + +Plus a hook that logs every tool call, and a bundled skill file. + +## Step 1: Create the plugin directory + +```bash +mkdir -p ~/.hermes/plugins/calculator +cd ~/.hermes/plugins/calculator +``` + +## Step 2: Write the manifest + +Create `plugin.yaml`: + +```yaml +name: calculator +version: 1.0.0 +description: Math calculator — evaluate expressions and convert units +provides: + tools: true + hooks: true +``` + +This tells Hermes: "I'm a plugin called calculator, I provide tools and hooks." That's all the manifest needs. + +Optional fields you could add: +```yaml +author: Your Name +requires_env: # gate loading on env vars + - SOME_API_KEY # plugin disabled if missing +``` + +## Step 3: Write the tool schemas + +Create `schemas.py` — this is what the LLM reads to decide when to call your tools: + +```python +"""Tool schemas — what the LLM sees.""" + +CALCULATE = { + "name": "calculate", + "description": ( + "Evaluate a mathematical expression and return the result. " + "Supports arithmetic (+, -, *, /, **), functions (sqrt, sin, cos, " + "log, abs, round, floor, ceil), and constants (pi, e). " + "Use this for any math the user asks about." + ), + "parameters": { + "type": "object", + "properties": { + "expression": { + "type": "string", + "description": "Math expression to evaluate (e.g., '2**10', 'sqrt(144)')", + }, + }, + "required": ["expression"], + }, +} + +UNIT_CONVERT = { + "name": "unit_convert", + "description": ( + "Convert a value between units. Supports length (m, km, mi, ft, in), " + "weight (kg, lb, oz, g), temperature (C, F, K), data (B, KB, MB, GB, TB), " + "and time (s, min, hr, day)." + ), + "parameters": { + "type": "object", + "properties": { + "value": { + "type": "number", + "description": "The numeric value to convert", + }, + "from_unit": { + "type": "string", + "description": "Source unit (e.g., 'km', 'lb', 'F', 'GB')", + }, + "to_unit": { + "type": "string", + "description": "Target unit (e.g., 'mi', 'kg', 'C', 'MB')", + }, + }, + "required": ["value", "from_unit", "to_unit"], + }, +} +``` + +**Why schemas matter:** The `description` field is how the LLM decides when to use your tool. Be specific about what it does and when to use it. The `parameters` define what arguments the LLM passes. + +## Step 4: Write the tool handlers + +Create `tools.py` — this is the code that actually executes when the LLM calls your tools: + +```python +"""Tool handlers — the code that runs when the LLM calls each tool.""" + +import json +import math + +# Safe globals for expression evaluation — no file/network access +_SAFE_MATH = { + "abs": abs, "round": round, "min": min, "max": max, + "pow": pow, "sqrt": math.sqrt, "sin": math.sin, "cos": math.cos, + "tan": math.tan, "log": math.log, "log2": math.log2, "log10": math.log10, + "floor": math.floor, "ceil": math.ceil, + "pi": math.pi, "e": math.e, + "factorial": math.factorial, +} + + +def calculate(args: dict, **kwargs) -> str: + """Evaluate a math expression safely. + + Rules for handlers: + 1. Receive args (dict) — the parameters the LLM passed + 2. Do the work + 3. Return a JSON string — ALWAYS, even on error + 4. Accept **kwargs for forward compatibility + """ + expression = args.get("expression", "").strip() + if not expression: + return json.dumps({"error": "No expression provided"}) + + try: + result = eval(expression, {"__builtins__": {}}, _SAFE_MATH) + return json.dumps({"expression": expression, "result": result}) + except ZeroDivisionError: + return json.dumps({"expression": expression, "error": "Division by zero"}) + except Exception as e: + return json.dumps({"expression": expression, "error": f"Invalid: {e}"}) + + +# Conversion tables — values are in base units +_LENGTH = {"m": 1, "km": 1000, "mi": 1609.34, "ft": 0.3048, "in": 0.0254, "cm": 0.01} +_WEIGHT = {"kg": 1, "g": 0.001, "lb": 0.453592, "oz": 0.0283495} +_DATA = {"B": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4} +_TIME = {"s": 1, "ms": 0.001, "min": 60, "hr": 3600, "day": 86400} + + +def _convert_temp(value, from_u, to_u): + # Normalize to Celsius + c = {"F": (value - 32) * 5/9, "K": value - 273.15}.get(from_u, value) + # Convert to target + return {"F": c * 9/5 + 32, "K": c + 273.15}.get(to_u, c) + + +def unit_convert(args: dict, **kwargs) -> str: + """Convert between units.""" + value = args.get("value") + from_unit = args.get("from_unit", "").strip() + to_unit = args.get("to_unit", "").strip() + + if value is None or not from_unit or not to_unit: + return json.dumps({"error": "Need value, from_unit, and to_unit"}) + + try: + # Temperature + if from_unit.upper() in {"C","F","K"} and to_unit.upper() in {"C","F","K"}: + result = _convert_temp(float(value), from_unit.upper(), to_unit.upper()) + return json.dumps({"input": f"{value} {from_unit}", "result": round(result, 4), + "output": f"{round(result, 4)} {to_unit}"}) + + # Ratio-based conversions + for table in (_LENGTH, _WEIGHT, _DATA, _TIME): + lc = {k.lower(): v for k, v in table.items()} + if from_unit.lower() in lc and to_unit.lower() in lc: + result = float(value) * lc[from_unit.lower()] / lc[to_unit.lower()] + return json.dumps({"input": f"{value} {from_unit}", + "result": round(result, 6), + "output": f"{round(result, 6)} {to_unit}"}) + + return json.dumps({"error": f"Cannot convert {from_unit} → {to_unit}"}) + except Exception as e: + return json.dumps({"error": f"Conversion failed: {e}"}) +``` + +**Key rules for handlers:** +1. **Signature:** `def my_handler(args: dict, **kwargs) -> str` +2. **Return:** Always a JSON string. Success and errors alike. +3. **Never raise:** Catch all exceptions, return error JSON instead. +4. **Accept `**kwargs`:** Hermes may pass additional context in the future. + +## Step 5: Write the registration + +Create `__init__.py` — this wires schemas to handlers: + +```python +"""Calculator plugin — registration.""" + +import logging + +from . import schemas, tools + +logger = logging.getLogger(__name__) + +# Track tool usage via hooks +_call_log = [] + +def _on_post_tool_call(tool_name, args, result, task_id, **kwargs): + """Hook: runs after every tool call (not just ours).""" + _call_log.append({"tool": tool_name, "session": task_id}) + if len(_call_log) > 100: + _call_log.pop(0) + logger.debug("Tool called: %s (session %s)", tool_name, task_id) + + +def register(ctx): + """Wire schemas to handlers and register hooks.""" + ctx.register_tool(name="calculate", toolset="calculator", + schema=schemas.CALCULATE, handler=tools.calculate) + ctx.register_tool(name="unit_convert", toolset="calculator", + schema=schemas.UNIT_CONVERT, handler=tools.unit_convert) + + # This hook fires for ALL tool calls, not just ours + ctx.register_hook("post_tool_call", _on_post_tool_call) +``` + +**What `register()` does:** +- Called exactly once at startup +- `ctx.register_tool()` puts your tool in the registry — the model sees it immediately +- `ctx.register_hook()` subscribes to lifecycle events +- If this function crashes, the plugin is disabled but Hermes continues fine + +## Step 6: Test it + +Start Hermes: + +```bash +hermes +``` + +You should see `calculator: calculate, unit_convert` in the banner's tool list. + +Try these prompts: +``` +What's 2 to the power of 16? +Convert 100 fahrenheit to celsius +What's the square root of 2 times pi? +How many gigabytes is 1.5 terabytes? +``` + +Check plugin status: +``` +/plugins +``` + +Output: +``` +Plugins (1): + ✓ calculator v1.0.0 (2 tools, 1 hooks) +``` + +## Your plugin's final structure + +``` +~/.hermes/plugins/calculator/ +├── plugin.yaml # "I'm calculator, I provide tools and hooks" +├── __init__.py # Wiring: schemas → handlers, register hooks +├── schemas.py # What the LLM reads (descriptions + parameter specs) +└── tools.py # What runs (calculate, unit_convert functions) +``` + +Four files, clear separation: +- **Manifest** declares what the plugin is +- **Schemas** describe tools for the LLM +- **Handlers** implement the actual logic +- **Registration** connects everything + +## What else can plugins do? + +### Ship data files + +Put any files in your plugin directory and read them at import time: + +```python +# In tools.py or __init__.py +from pathlib import Path + +_PLUGIN_DIR = Path(__file__).parent +_DATA_FILE = _PLUGIN_DIR / "data" / "languages.yaml" + +with open(_DATA_FILE) as f: + _DATA = yaml.safe_load(f) +``` + +### Bundle a skill + +Include a `skill.md` file and install it during registration: + +```python +import shutil +from pathlib import Path + +def _install_skill(): + """Copy our skill to ~/.hermes/skills/ on first load.""" + try: + from hermes_cli.config import get_hermes_home + dest = get_hermes_home() / "skills" / "my-plugin" / "SKILL.md" + except Exception: + dest = Path.home() / ".hermes" / "skills" / "my-plugin" / "SKILL.md" + + if dest.exists(): + return # don't overwrite user edits + + source = Path(__file__).parent / "skill.md" + if source.exists(): + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source, dest) + +def register(ctx): + ctx.register_tool(...) + _install_skill() +``` + +### Gate on environment variables + +If your plugin needs an API key: + +```yaml +# plugin.yaml +requires_env: + - WEATHER_API_KEY +``` + +If `WEATHER_API_KEY` isn't set, the plugin is disabled with a clear message. No crash, no error in the agent — just "Plugin weather disabled (missing: WEATHER_API_KEY)". + +### Conditional tool availability + +For tools that depend on optional libraries: + +```python +ctx.register_tool( + name="my_tool", + schema={...}, + handler=my_handler, + check_fn=lambda: _has_optional_lib(), # False = tool hidden from model +) +``` + +### Register multiple hooks + +```python +def register(ctx): + ctx.register_hook("pre_tool_call", before_any_tool) + ctx.register_hook("post_tool_call", after_any_tool) + ctx.register_hook("on_session_start", on_new_session) + ctx.register_hook("on_session_end", on_session_end) +``` + +Available hooks: + +| Hook | When | Arguments | +|------|------|-----------| +| `pre_tool_call` | Before any tool runs | `tool_name`, `args`, `task_id` | +| `post_tool_call` | After any tool returns | `tool_name`, `args`, `result`, `task_id` | +| `pre_llm_call` | Before LLM API call | `messages`, `model` | +| `post_llm_call` | After LLM response | `messages`, `response`, `model` | +| `on_session_start` | Session begins | `session_id`, `platform` | +| `on_session_end` | Session ends | `session_id`, `platform` | + +Hooks are observers — they can't modify arguments or return values. If a hook crashes, it's logged and skipped; other hooks and the tool continue normally. + +### Distribute via pip + +For sharing plugins publicly, add an entry point to your Python package: + +```toml +# pyproject.toml +[project.entry-points."hermes_agent.plugins"] +my-plugin = "my_plugin_package" +``` + +```bash +pip install hermes-plugin-calculator +# Plugin auto-discovered on next hermes startup +``` + +## Common mistakes + +**Handler doesn't return JSON string:** +```python +# Wrong — returns a dict +def handler(args, **kwargs): + return {"result": 42} + +# Right — returns a JSON string +def handler(args, **kwargs): + return json.dumps({"result": 42}) +``` + +**Missing `**kwargs` in handler signature:** +```python +# Wrong — will break if Hermes passes extra context +def handler(args): + ... + +# Right +def handler(args, **kwargs): + ... +``` + +**Handler raises exceptions:** +```python +# Wrong — exception propagates, tool call fails +def handler(args, **kwargs): + result = 1 / int(args["value"]) # ZeroDivisionError! + return json.dumps({"result": result}) + +# Right — catch and return error JSON +def handler(args, **kwargs): + try: + result = 1 / int(args.get("value", 0)) + return json.dumps({"result": result}) + except Exception as e: + return json.dumps({"error": str(e)}) +``` + +**Schema description too vague:** +```python +# Bad — model doesn't know when to use it +"description": "Does stuff" + +# Good — model knows exactly when and how +"description": "Evaluate a mathematical expression. Use for arithmetic, trig, logarithms. Supports: +, -, *, /, **, sqrt, sin, cos, log, pi, e." +``` diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 03e84d93f..daaad87bc 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -34,7 +34,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `VOICE_TOOLS_OPENAI_KEY` | Preferred OpenAI key for OpenAI speech-to-text and text-to-speech providers | | `HERMES_LOCAL_STT_COMMAND` | Optional local speech-to-text command template. Supports `{input_path}`, `{output_dir}`, `{language}`, and `{model}` placeholders | | `HERMES_LOCAL_STT_LANGUAGE` | Default language passed to `HERMES_LOCAL_STT_COMMAND` or auto-detected local `whisper` CLI fallback (default: `en`) | -| `HERMES_HOME` | Override Hermes config directory (default: `~/.hermes`) | +| `HERMES_HOME` | Override Hermes config directory (default: `~/.hermes`). Also scopes the gateway PID file and systemd service name, so multiple installations can run concurrently | ## Provider Auth (OAuth) @@ -79,6 +79,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `TERMINAL_ENV` | Backend: `local`, `docker`, `ssh`, `singularity`, `modal`, `daytona` | | `TERMINAL_DOCKER_IMAGE` | Docker image (default: `python:3.11`) | | `TERMINAL_DOCKER_VOLUMES` | Additional Docker volume mounts (comma-separated `host:container` pairs) | +| `TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE` | Advanced opt-in: mount the launch cwd into Docker `/workspace` (`true`/`false`, default: `false`) | | `TERMINAL_SINGULARITY_IMAGE` | Singularity image or `.sif` path | | `TERMINAL_MODAL_IMAGE` | Modal container image | | `TERMINAL_DAYTONA_IMAGE` | Daytona sandbox image | diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index 83cbfeecf..9a27a7131 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -63,7 +63,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | Command | Description | |---------|-------------| | `/help` | Show this help message | -| `/usage` | Show token usage for the current session | +| `/usage` | Show token usage, cost breakdown, and session duration | | `/insights` | Show usage insights and analytics (last 30 days) | | `/platforms` | Show gateway/messaging platform status | | `/paste` | Check clipboard for an image and attach it | @@ -104,7 +104,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/compress` | Manually compress conversation context. | | `/title [name]` | Set or show the session title. | | `/resume [name]` | Resume a previously named session. | -| `/usage` | Show token usage for the current session. | +| `/usage` | Show token usage, estimated cost breakdown (input/output), context window state, and session duration. | | `/insights [days]` | Show usage analytics. | | `/reasoning [level\|show\|hide]` | Change reasoning effort or toggle reasoning display. | | `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. | diff --git a/website/docs/user-guide/checkpoints-and-rollback.md b/website/docs/user-guide/checkpoints-and-rollback.md index a7a34115f..f81a7d4f8 100644 --- a/website/docs/user-guide/checkpoints-and-rollback.md +++ b/website/docs/user-guide/checkpoints-and-rollback.md @@ -6,10 +6,28 @@ description: "Filesystem safety nets for destructive operations using shadow git # Checkpoints and `/rollback` -Hermes Agent can automatically snapshot your project before **destructive operations** (like file write/patch tools) and restore it later with a single command. +Hermes Agent automatically snapshots your project before **destructive operations** and lets you restore it with a single command. Checkpoints are **enabled by default** — there's zero cost when no file-mutating tools fire. This safety net is powered by an internal **Checkpoint Manager** that keeps a separate shadow git repository under `~/.hermes/checkpoints/` — your real project `.git` is never touched. +## What Triggers a Checkpoint + +Checkpoints are taken automatically before: + +- **File tools** — `write_file` and `patch` +- **Destructive terminal commands** — `rm`, `mv`, `sed -i`, `truncate`, `shred`, output redirects (`>`), and `git reset`/`clean`/`checkout` + +The agent creates **at most one checkpoint per directory per turn**, so long-running sessions don't spam snapshots. + +## Quick Reference + +| Command | Description | +|---------|-------------| +| `/rollback` | List all checkpoints with change stats | +| `/rollback ` | Restore to checkpoint N (also undoes last chat turn) | +| `/rollback diff ` | Preview diff between checkpoint N and current state | +| `/rollback ` | Restore a single file from checkpoint N | + ## How Checkpoints Work At a high level: @@ -21,24 +39,11 @@ At a high level: - Stages and commits the current state with a short, human‑readable reason. - These commits form a checkpoint history that you can inspect and restore via `/rollback`. -Internally, the Checkpoint Manager: - -- Stores shadow repos under: - - `~/.hermes/checkpoints//` -- Keeps metadata about: - - The original working directory (`HERMES_WORKDIR` file in the shadow repo). - - Excluded paths such as: - - `node_modules/`, `dist/`, `build/` - - `.venv/`, `__pycache__/`, `*.pyc` - - `.git/`, `.cache/`, `.pytest_cache/`, etc. - -The agent creates **at most one checkpoint per directory per turn**, so long running sessions do not spam snapshots. - ```mermaid flowchart LR user["User command\n(hermes, gateway)"] agent["AIAgent\n(run_agent.py)"] - tools["File tools\n(write/patch)"] + tools["File & terminal tools"] cpMgr["CheckpointManager"] shadowRepo["Shadow git repo\n~/.hermes/checkpoints/"] @@ -50,108 +55,128 @@ flowchart LR tools -->|"apply changes"| agent ``` -## Enabling Checkpoints +## Configuration -Checkpoints are controlled by a simple on/off flag and a maximum snapshot count **per directory**: - -- `checkpoints_enabled` – master switch -- `checkpoint_max_snapshots` – soft cap on history depth per directory - -You can configure these in `~/.hermes/config.yaml`: +Checkpoints are enabled by default. Configure in `~/.hermes/config.yaml`: ```yaml -agent: - checkpoints_enabled: true - checkpoint_max_snapshots: 50 +checkpoints: + enabled: true # master switch (default: true) + max_snapshots: 50 # max checkpoints per directory ``` -Or via CLI flags (exact wiring may depend on your version of the CLI): +To disable: -```bash -hermes --checkpoints -# or -hermes chat --checkpoints +```yaml +checkpoints: + enabled: false ``` When disabled, the Checkpoint Manager is a no‑op and never attempts git operations. ## Listing Checkpoints -Hermes exposes an interactive way to list checkpoints for the current working directory. +From a CLI session: -From the CLI session where you are working on a project: - -```bash -# Ask Hermes to show checkpoints for the current directory +``` /rollback ``` -Hermes responds with a formatted list similar to: +Hermes responds with a formatted list showing change statistics: ```text 📸 Checkpoints for /path/to/project: - 1. a1b2c3d 2026-03-13 10:24 auto: before apply_patch - 2. d4e5f6a 2026-03-13 10:15 pre-rollback snapshot (restoring to a1b2c3d0) + 1. 4270a8c 2026-03-16 04:36 before patch (1 file, +1/-0) + 2. eaf4c1f 2026-03-16 04:35 before write_file + 3. b3f9d2e 2026-03-16 04:34 before terminal: sed -i s/old/new/ config.py (1 file, +1/-1) -Use /rollback to restore, e.g. /rollback 1 + /rollback restore to checkpoint N + /rollback diff preview changes since checkpoint N + /rollback restore a single file from checkpoint N ``` Each entry shows: - Short hash - Timestamp -- Reason (commit message for the snapshot) +- Reason (what triggered the snapshot) +- Change summary (files changed, insertions/deletions) + +## Previewing Changes with `/rollback diff` + +Before committing to a restore, preview what has changed since a checkpoint: + +``` +/rollback diff 1 +``` + +This shows a git diff stat summary followed by the actual diff: + +```text +test.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/test.py b/test.py +--- a/test.py ++++ b/test.py +@@ -1 +1 @@ +-print('original content') ++print('modified content') +``` + +Long diffs are capped at 80 lines to avoid flooding the terminal. ## Restoring with `/rollback` -Once you have identified the snapshot you want to go back to, use `/rollback` with the number from the list: +Restore to a checkpoint by number: -```bash -# Restore to the most recent snapshot +``` /rollback 1 ``` Behind the scenes, Hermes: 1. Verifies the target commit exists in the shadow repo. -2. Takes a **pre‑rollback snapshot** of the current state so you can “undo the undo” later. -3. Runs `git checkout -- .` in the shadow repo, restoring tracked files in your working directory. +2. Takes a **pre‑rollback snapshot** of the current state so you can "undo the undo" later. +3. Restores tracked files in your working directory. +4. **Undoes the last conversation turn** so the agent's context matches the restored filesystem state. -On success, Hermes responds with a short summary like: +On success: ```text -✅ Restored /path/to/project to a1b2c3d -Reason: auto: before apply_patch +✅ Restored to checkpoint 4270a8c5: before patch +A pre-rollback snapshot was saved automatically. +(^_^)b Undid 4 message(s). Removed: "Now update test.py to ..." + 4 message(s) remaining in history. + Chat turn undone to match restored file state. ``` -If something goes wrong (missing commit, git error), you will see a clear error message and details will be logged. +The conversation undo ensures the agent doesn't "remember" changes that have been rolled back, avoiding confusion on the next turn. + +## Single-File Restore + +Restore just one file from a checkpoint without affecting the rest of the directory: + +``` +/rollback 1 src/broken_file.py +``` + +This is useful when the agent made changes to multiple files but only one needs to be reverted. ## Safety and Performance Guards To keep checkpointing safe and fast, Hermes applies several guardrails: -- **Git availability** - - If `git` is not found on `PATH`, checkpoints are transparently disabled. - - A debug log entry is emitted, but your session continues normally. -- **Directory scope** - - Hermes skips overly broad directories such as: - - Root (`/`) - - Your home directory (`$HOME`) - - This prevents accidental snapshots of your entire filesystem. -- **Repository size** - - Before committing, Hermes performs a quick file count. - - If the directory has more than a configured threshold (e.g. `50,000` files), - checkpoints are skipped to avoid large git operations. -- **No‑change snapshots** - - If there are no changes since the last snapshot, the checkpoint is skipped - instead of committing an empty diff. - -All errors inside the Checkpoint Manager are treated as **non‑fatal**: they are logged at debug level and your tools continue to run. +- **Git availability** — if `git` is not found on `PATH`, checkpoints are transparently disabled. +- **Directory scope** — Hermes skips overly broad directories (root `/`, home `$HOME`). +- **Repository size** — directories with more than 50,000 files are skipped to avoid slow git operations. +- **No‑change snapshots** — if there are no changes since the last snapshot, the checkpoint is skipped. +- **Non‑fatal errors** — all errors inside the Checkpoint Manager are logged at debug level; your tools continue to run. ## Where Checkpoints Live -By default, all shadow repos live under: +All shadow repos live under: ```text ~/.hermes/checkpoints/ @@ -160,21 +185,19 @@ By default, all shadow repos live under: └── ... ``` -Each `` is derived from the absolute path of the working directory. Inside each shadow repo you will find: +Each `` is derived from the absolute path of the working directory. Inside each shadow repo you'll find: - Standard git internals (`HEAD`, `refs/`, `objects/`) - An `info/exclude` file containing a curated ignore list - A `HERMES_WORKDIR` file pointing back to the original project root -You normally never need to touch these manually; they are documented here so advanced users understand how the safety net works. +You normally never need to touch these manually. ## Best Practices -- **Keep checkpoints enabled** for interactive development and refactors. -- **Use `/rollback` instead of `git reset`** when you want to undo agent‑driven changes only. -- **Combine with Git branches and worktrees** for maximum safety: - - Keep each Hermes session in its own worktree/branch. - - Let checkpoints act as an extra layer of protection on top. - -For running multiple agents in parallel on the same repo without interfering with each other, see the dedicated guide on [Git worktrees](./git-worktrees.md). +- **Leave checkpoints enabled** — they're on by default and have zero cost when no files are modified. +- **Use `/rollback diff` before restoring** — preview what will change to pick the right checkpoint. +- **Use `/rollback` instead of `git reset`** when you want to undo agent-driven changes only. +- **Combine with Git worktrees** for maximum safety — keep each Hermes session in its own worktree/branch, with checkpoints as an extra layer. +For running multiple agents in parallel on the same repo, see the guide on [Git worktrees](./git-worktrees.md). diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md index 0211ae36b..a33ed295e 100644 --- a/website/docs/user-guide/cli.md +++ b/website/docs/user-guide/cli.md @@ -50,6 +50,35 @@ hermes -w -q "Fix issue #123" # Single query in worktree The welcome banner shows your model, terminal backend, working directory, available tools, and installed skills at a glance. +### Status Bar + +A persistent status bar sits above the input area, updating in real time: + +``` + ⚕ claude-sonnet-4-20250514 │ 12.4K/200K │ [██████░░░░] 6% │ $0.06 │ 15m +``` + +| Element | Description | +|---------|-------------| +| Model name | Current model (truncated if longer than 26 chars) | +| Token count | Context tokens used / max context window | +| Context bar | Visual fill indicator with color-coded thresholds | +| Cost | Estimated session cost (or `n/a` for unknown/zero-priced models) | +| Duration | Elapsed session time | + +The bar adapts to terminal width — full layout at ≥ 76 columns, compact at 52–75, minimal (model + duration only) below 52. + +**Context color coding:** + +| Color | Threshold | Meaning | +|-------|-----------|---------| +| Green | < 50% | Plenty of room | +| Yellow | 50–80% | Getting full | +| Orange | 80–95% | Approaching limit | +| Red | ≥ 95% | Near overflow — consider `/compress` | + +Use `/usage` for a detailed breakdown including per-category costs (input vs output tokens). + ### Session Resume Display When resuming a previous session (`hermes -c` or `hermes --resume `), a "Previous Conversation" panel appears between the banner and the input prompt, showing a compact recap of the conversation history. See [Sessions — Conversation Recap on Resume](sessions.md#conversation-recap-on-resume) for details and configuration. diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 97cb9f0b5..f55a65181 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -441,6 +441,39 @@ Supported providers: `openrouter`, `nous`, `openai-codex`, `anthropic`, `zai`, ` Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers). ::: +## Smart Model Routing + +Optional cheap-vs-strong routing lets Hermes keep your main model for complex work while sending very short/simple turns to a cheaper model. + +```yaml +smart_model_routing: + enabled: true + max_simple_chars: 160 + max_simple_words: 28 + cheap_model: + provider: openrouter + model: google/gemini-2.5-flash + # base_url: http://localhost:8000/v1 # optional custom endpoint + # api_key_env: MY_CUSTOM_KEY # optional env var name for that endpoint's API key +``` + +How it works: +- If a turn is short, single-line, and does not look code/tool/debug heavy, Hermes may route it to `cheap_model` +- If the turn looks complex, Hermes stays on your primary model/provider +- If the cheap route cannot be resolved cleanly, Hermes falls back to the primary model automatically + +This is intentionally conservative. It is meant for quick, low-stakes turns like: +- short factual questions +- quick rewrites +- lightweight summaries + +It will avoid routing prompts that look like: +- coding/debugging work +- tool-heavy requests +- long or multi-line analysis asks + +Use this when you want lower latency or cost without fully changing your default model. + ## Terminal Backend Configuration Configure which environment the agent uses for terminal commands: @@ -453,7 +486,8 @@ terminal: # Docker-specific settings docker_image: "nikolaik/python-nodejs:python3.11-nodejs20" - docker_volumes: # Share host directories with the container + docker_mount_cwd_to_workspace: false # SECURITY: off by default. Opt in to mount the launch cwd into /workspace. + docker_volumes: # Additional explicit host mounts - "/home/user/projects:/workspace/projects" - "/home/user/data:/data:ro" # :ro for read-only @@ -520,41 +554,30 @@ This is useful for: Can also be set via environment variable: `TERMINAL_DOCKER_VOLUMES='["/host:/container"]'` (JSON array). -### Docker Auto-Mount Current Directory +### Optional: Mount the Launch Directory into `/workspace` -When using the Docker backend, Hermes **automatically mounts your current working directory** to `/workspace` inside the container. This means you can: +Docker sandboxes stay isolated by default. Hermes does **not** pass your current host working directory into the container unless you explicitly opt in. -```bash -cd ~/projects/my-app -hermes -# The agent can now see and edit files in ~/projects/my-app via /workspace +Enable it in `config.yaml`: + +```yaml +terminal: + backend: docker + docker_mount_cwd_to_workspace: true ``` -No manual volume configuration needed — just `cd` to your project and run `hermes`. +When enabled: +- if you launch Hermes from `~/projects/my-app`, that host directory is bind-mounted to `/workspace` +- the Docker backend starts in `/workspace` +- file tools and terminal commands both see the same mounted project -**How it works:** -- If you're in `/home/user/projects/my-app`, that directory is mounted to `/workspace` -- The container's working directory is set to `/workspace` -- Files you edit on the host are immediately visible to the agent, and vice versa +When disabled, `/workspace` stays sandbox-owned unless you explicitly mount something via `docker_volumes`. -**Disabling auto-mount:** +Security tradeoff: +- `false` preserves the sandbox boundary +- `true` gives the sandbox direct access to the directory you launched Hermes from -If you prefer the old behavior (empty `/workspace` with tmpfs or persistent sandbox), disable auto-mount: - -```bash -export TERMINAL_DOCKER_NO_AUTO_MOUNT=true -``` - -**Precedence:** - -Auto-mount is skipped when: -1. `TERMINAL_DOCKER_NO_AUTO_MOUNT=true` is set -2. You've explicitly configured a volume mount to `/workspace` in `docker_volumes` -3. `container_persistent: true` is set (persistent sandbox mode uses its own `/workspace`) - -:::tip -Auto-mount is ideal for project-based work where you want the agent to operate on your actual files. For isolated sandboxing where the agent shouldn't access your filesystem, set `TERMINAL_DOCKER_NO_AUTO_MOUNT=true`. -::: +Use the opt-in only when you intentionally want the container to work on live host files. ### Persistent Shell @@ -843,6 +866,27 @@ display: | `all` | Every tool call with a short preview (default) | | `verbose` | Full args, results, and debug logs | +## Privacy + +```yaml +privacy: + redact_pii: false # Strip PII from LLM context (gateway only) +``` + +When `redact_pii` is `true`, the gateway redacts personally identifiable information from the system prompt before sending it to the LLM on supported platforms: + +| Field | Treatment | +|-------|-----------| +| Phone numbers (user ID on WhatsApp/Signal) | Hashed to `user_<12-char-sha256>` | +| User IDs | Hashed to `user_<12-char-sha256>` | +| Chat IDs | Numeric portion hashed, platform prefix preserved (`telegram:`) | +| Home channel IDs | Numeric portion hashed | +| User names / usernames | **Not affected** (user-chosen, publicly visible) | + +**Platform support:** Redaction applies to WhatsApp, Signal, and Telegram. Discord and Slack are excluded because their mention systems (`<@user_id>`) require the real ID in the LLM context. + +Hashes are deterministic — the same user always maps to the same hash, so the model can still distinguish between users in group chats. Routing and delivery use the original values internally. + ## Speech-to-Text (STT) ```yaml diff --git a/website/docs/user-guide/features/checkpoints.md b/website/docs/user-guide/features/checkpoints.md index a50aca8ff..aed879fc2 100644 --- a/website/docs/user-guide/features/checkpoints.md +++ b/website/docs/user-guide/features/checkpoints.md @@ -1,97 +1,30 @@ # Filesystem Checkpoints -Hermes can automatically snapshot your working directory before making file changes, giving you a safety net to roll back if something goes wrong. +Hermes automatically snapshots your working directory before making file changes, giving you a safety net to roll back if something goes wrong. Checkpoints are **enabled by default**. -## How It Works +## Quick Reference -When enabled, Hermes takes a **one-time snapshot** at the start of each conversation turn before the first file-modifying operation (`write_file` or `patch`). This creates a point-in-time backup you can restore to at any time. +| Command | Description | +|---------|-------------| +| `/rollback` | List all checkpoints with change stats | +| `/rollback ` | Restore to checkpoint N (also undoes last chat turn) | +| `/rollback diff ` | Preview diff between checkpoint N and current state | +| `/rollback ` | Restore a single file from checkpoint N | -Under the hood, checkpoints use a **shadow git repository** stored at `~/.hermes/checkpoints/`. This is completely separate from your project's git — no `.git` directory is created in your project, and your own git history is never touched. +## What Triggers Checkpoints -## Enabling Checkpoints +- **File tools** — `write_file` and `patch` +- **Destructive terminal commands** — `rm`, `mv`, `sed -i`, output redirects (`>`), `git reset`/`clean` -### Per-session (CLI flag) - -```bash -hermes --checkpoints -``` - -### Permanently (config.yaml) +## Configuration ```yaml # ~/.hermes/config.yaml checkpoints: - enabled: true - max_snapshots: 50 # max checkpoints per directory (default: 50) + enabled: true # default: true + max_snapshots: 50 # max checkpoints per directory ``` -## Rolling Back +## Learn More -Use the `/rollback` slash command: - -``` -/rollback # List all available checkpoints -/rollback 1 # Restore to checkpoint #1 (most recent) -/rollback 3 # Restore to checkpoint #3 (further back) -/rollback abc1234 # Restore by git commit hash -``` - -Example output: - -``` -📸 Checkpoints for /home/user/project: - - 1. abc1234 2026-03-10 14:22 before write_file - 2. def5678 2026-03-10 14:15 before patch - 3. ghi9012 2026-03-10 14:08 before write_file - -Use /rollback to restore, e.g. /rollback 1 -``` - -When you restore, Hermes automatically takes a **pre-rollback snapshot** first — so you can always undo your undo. - -## What Gets Checkpointed - -Checkpoints capture the entire working directory (the project root), excluding common large/sensitive patterns: - -- `node_modules/`, `dist/`, `build/` -- `.env`, `.env.*` -- `__pycache__/`, `*.pyc` -- `.venv/`, `venv/` -- `.git/` -- `.DS_Store`, `*.log` - -## Performance - -Checkpoints are designed to be lightweight: - -- **Once per turn** — only the first file operation triggers a snapshot, not every write -- **Skips large directories** — directories with >50,000 files are skipped automatically -- **Skips when nothing changed** — if no files were modified since the last checkpoint, no commit is created -- **Non-blocking** — if a checkpoint fails for any reason, the file operation proceeds normally - -## How It Determines the Project Root - -When you write to a file like `src/components/Button.tsx`, Hermes walks up the directory tree looking for project markers (`.git`, `pyproject.toml`, `package.json`, `Cargo.toml`, etc.) to find the project root. This ensures the entire project is checkpointed, not just the file's parent directory. - -## Platforms - -Checkpoints work on both: -- **CLI** — uses your current working directory -- **Gateway** (Telegram, Discord, etc.) — uses `MESSAGING_CWD` - -The `/rollback` command is available on all platforms. - -## FAQ - -**Does this conflict with my project's git?** -No. Checkpoints use a completely separate shadow git repository via `GIT_DIR` environment variables. Your project's `.git/` is never touched. - -**How much disk space do checkpoints use?** -Git is very efficient at storing diffs. For most projects, checkpoint data is negligible. Old checkpoints are pruned when `max_snapshots` is exceeded. - -**Can I checkpoint without git installed?** -No — git must be available on your PATH. If it's not installed, checkpoints silently disable. - -**Can I roll back across sessions?** -Yes! Checkpoints persist in `~/.hermes/checkpoints/` and survive across sessions. You can roll back to a checkpoint from yesterday. +For the full guide — how shadow repos work, diff previews, file-level restore, conversation undo, safety guards, and best practices — see **[Checkpoints and /rollback](../checkpoints-and-rollback.md)**. diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md new file mode 100644 index 000000000..9b86d5d16 --- /dev/null +++ b/website/docs/user-guide/features/plugins.md @@ -0,0 +1,62 @@ +--- +sidebar_position: 20 +--- + +# Plugins + +Hermes has a plugin system for adding custom tools, hooks, and integrations without modifying core code. + +**→ [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin)** — step-by-step guide with a complete working example. + +## Quick overview + +Drop a directory into `~/.hermes/plugins/` with a `plugin.yaml` and Python code: + +``` +~/.hermes/plugins/my-plugin/ +├── plugin.yaml # manifest +├── __init__.py # register() — wires schemas to handlers +├── schemas.py # tool schemas (what the LLM sees) +└── tools.py # tool handlers (what runs when called) +``` + +Start Hermes — your tools appear alongside built-in tools. The model can call them immediately. + +## What plugins can do + +| Capability | How | +|-----------|-----| +| Add tools | `ctx.register_tool(name, schema, handler)` | +| Add hooks | `ctx.register_hook("post_tool_call", callback)` | +| Ship data files | `Path(__file__).parent / "data" / "file.yaml"` | +| Bundle skills | Copy `skill.md` to `~/.hermes/skills/` at load time | +| Gate on env vars | `requires_env: [API_KEY]` in plugin.yaml | +| Distribute via pip | `[project.entry-points."hermes_agent.plugins"]` | + +## Plugin discovery + +| Source | Path | Use case | +|--------|------|----------| +| User | `~/.hermes/plugins/` | Personal plugins | +| Project | `.hermes/plugins/` | Project-specific plugins | +| pip | `hermes_agent.plugins` entry_points | Distributed packages | + +## Available hooks + +| Hook | Fires when | +|------|-----------| +| `pre_tool_call` | Before any tool executes | +| `post_tool_call` | After any tool returns | +| `pre_llm_call` | Before LLM API request | +| `post_llm_call` | After LLM API response | +| `on_session_start` | Session begins | +| `on_session_end` | Session ends | + +## Managing plugins + +``` +/plugins # list loaded plugins in a session +hermes config set display.show_cost true # show cost in status bar +``` + +See the **[full guide](/docs/guides/build-a-hermes-plugin)** for handler contracts, schema format, hook behavior, error handling, and common mistakes. diff --git a/website/docs/user-guide/messaging/email.md b/website/docs/user-guide/messaging/email.md index 8f515e851..c302532b1 100644 --- a/website/docs/user-guide/messaging/email.md +++ b/website/docs/user-guide/messaging/email.md @@ -118,6 +118,18 @@ Replies are sent via SMTP with proper email threading: The agent can send file attachments in replies. Include `MEDIA:/path/to/file` in the response and the file is attached to the outgoing email. +### Skipping Attachments + +To ignore all incoming attachments (for malware protection or bandwidth savings), add to your `config.yaml`: + +```yaml +platforms: + email: + skip_attachments: true +``` + +When enabled, attachment and inline parts are skipped before payload decoding. The email body text is still processed normally. + --- ## Access Control diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md index fea310d21..0c17e65e6 100644 --- a/website/docs/user-guide/messaging/index.md +++ b/website/docs/user-guide/messaging/index.md @@ -244,10 +244,10 @@ Background tasks on messaging platforms are fire-and-forget — you don't need t ```bash hermes gateway install # Install as user service -systemctl --user start hermes-gateway -systemctl --user stop hermes-gateway -systemctl --user status hermes-gateway -journalctl --user -u hermes-gateway -f +hermes gateway start # Start the service +hermes gateway stop # Stop the service +hermes gateway status # Check status +journalctl --user -u hermes-gateway -f # View logs # Enable lingering (keeps running after logout) sudo loginctl enable-linger $USER @@ -263,6 +263,10 @@ Use the user service on laptops and dev boxes. Use the system service on VPS or Avoid keeping both the user and system gateway units installed at once unless you really mean to. Hermes will warn if it detects both because start/stop/status behavior gets ambiguous. +:::info Multiple installations +If you run multiple Hermes installations on the same machine (with different `HERMES_HOME` directories), each gets its own systemd service name. The default `~/.hermes` uses `hermes-gateway`; other installations use `hermes-gateway-`. The `hermes gateway` commands automatically target the correct service for your current `HERMES_HOME`. +::: + ### macOS (launchd) ```bash