From 57be18c0268941a51c9ad08681ddfdbace228869 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 16 Mar 2026 06:20:11 -0700 Subject: [PATCH] feat: smart approvals + /stop command (inspired by OpenAI Codex) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: smart approvals — LLM-based risk assessment for dangerous commands Adds a 'smart' approval mode that uses the auxiliary LLM to assess whether a flagged command is genuinely dangerous or a false positive, auto-approving low-risk commands without prompting the user. Inspired by OpenAI Codex's Smart Approvals guardian subagent (openai/codex#13860). Config (config.yaml): approvals: mode: manual # manual (default), smart, off Modes: - manual — current behavior, always prompt the user - smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block), or ESCALATE (fall through to manual prompt) - off — skip all approval prompts (equivalent to --yolo) When smart mode auto-approves, the pattern gets session-level approval so subsequent uses of the same pattern don't trigger another LLM call. When it denies, the command is blocked without user prompt. When uncertain, it escalates to the normal manual approval flow. The LLM prompt is carefully scoped: it sees only the command text and the flagged reason, assesses actual risk vs false positive, and returns a single-word verdict. * feat: make smart approval model configurable via config.yaml Adds auxiliary.approval section to config.yaml with the same provider/model/base_url/api_key pattern as other aux tasks (vision, web_extract, compression, etc.). Config: auxiliary: approval: provider: auto model: '' # fast/cheap model recommended base_url: '' api_key: '' Bridged to env vars in both CLI and gateway paths so the aux client picks them up automatically. * feat: add /stop command to kill all background processes Adds a /stop slash command that kills all running background processes at once. Currently users have to process(list) then process(kill) for each one individually. Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current turn) from /stop (cleans up background processes). See openai/codex#14602. Ctrl+C continues to only interrupt the active agent turn — background dev servers, watchers, etc. are preserved. /stop is the explicit way to clean them all up. --- cli.py | 30 +++++++++- gateway/run.py | 6 ++ hermes_cli/commands.py | 1 + hermes_cli/config.py | 14 +++++ tests/hermes_cli/test_commands.py | 2 +- tools/approval.py | 93 ++++++++++++++++++++++++++++++- 6 files changed, 142 insertions(+), 4 deletions(-) diff --git a/cli.py b/cli.py index aa888fd6a..1088480f3 100755 --- a/cli.py +++ b/cli.py @@ -395,7 +395,13 @@ def load_cli_config() -> Dict[str, Any]: "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER", "model": "AUXILIARY_WEB_EXTRACT_MODEL", "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL", - "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY", + "api_key": "AUXILI..._KEY", + }, + "approval": { + "provider": "AUXILIARY_APPROVAL_PROVIDER", + "model": "AUXILIARY_APPROVAL_MODEL", + "base_url": "AUXILIARY_APPROVAL_BASE_URL", + "api_key": "AUXILIARY_APPROVAL_API_KEY", }, } @@ -1987,6 +1993,26 @@ class HermesCLI: # Treat as a git hash return ref + def _handle_stop_command(self): + """Handle /stop — kill all running background processes. + + Inspired by OpenAI Codex's separation of interrupt (stop current turn) + from /stop (clean up background processes). See openai/codex#14602. + """ + from tools.process_registry import get_registry + + registry = get_registry() + processes = registry.list_processes() + running = [p for p in processes if p.get("status") == "running"] + + if not running: + print(" No running background processes.") + return + + print(f" Stopping {len(running)} background process(es)...") + killed = registry.kill_all() + print(f" ✅ Stopped {killed} process(es).") + def _handle_paste_command(self): """Handle /paste — explicitly check clipboard for an image. @@ -3237,6 +3263,8 @@ class HermesCLI: self._reload_mcp() elif cmd_lower.startswith("/rollback"): self._handle_rollback_command(cmd_original) + elif cmd_lower == "/stop": + self._handle_stop_command() elif cmd_lower.startswith("/background"): self._handle_background_command(cmd_original) elif cmd_lower.startswith("/skin"): diff --git a/gateway/run.py b/gateway/run.py index f77821c5a..50e68eaad 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -157,6 +157,12 @@ if _config_path.exists(): "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL", "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY", }, + "approval": { + "provider": "AUXILIARY_APPROVAL_PROVIDER", + "model": "AUXILIARY_APPROVAL_MODEL", + "base_url": "AUXILIARY_APPROVAL_BASE_URL", + "api_key": "AUXILIARY_APPROVAL_API_KEY", + }, } for _task_key, _env_map in _aux_task_env.items(): _task_cfg = _auxiliary_cfg.get(_task_key, {}) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 3a9b5b712..ecfdaba05 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -28,6 +28,7 @@ COMMANDS_BY_CATEGORY = { "/title": "Set a title for the current session (usage: /title My Session Name)", "/compress": "Manually compress conversation context (flush memories + summarize)", "/rollback": "List or restore filesystem checkpoints (usage: /rollback [number])", + "/stop": "Kill all running background processes", "/background": "Run a prompt in the background (usage: /background )", }, "Configuration": { diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 5c19ad676..d7f47c49a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -185,6 +185,12 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", }, + "approval": { + "provider": "auto", + "model": "", # fast/cheap model recommended (e.g. gemini-flash, haiku) + "base_url": "", + "api_key": "", + }, "mcp": { "provider": "auto", "model": "", @@ -296,6 +302,14 @@ DEFAULT_CONFIG = { "auto_thread": True, # Auto-create threads on @mention in channels (like Slack) }, + # Approval mode for dangerous commands: + # manual — always prompt the user (default) + # smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk + # off — skip all approval prompts (equivalent to --yolo) + "approvals": { + "mode": "manual", + }, + # Permanently allowed dangerous command patterns (added via "always" approval) "command_allowlist": [], # User-defined quick commands that bypass the agent loop (type: exec only) diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index 218059434..cb5a863a3 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -12,7 +12,7 @@ EXPECTED_COMMANDS = { "/personality", "/clear", "/history", "/new", "/reset", "/retry", "/undo", "/save", "/config", "/cron", "/skills", "/platforms", "/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste", - "/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/quit", + "/reload-mcp", "/rollback", "/stop", "/background", "/skin", "/voice", "/quit", } diff --git a/tools/approval.py b/tools/approval.py index 92da71ca5..9f1b541ff 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -4,6 +4,7 @@ This module is the single source of truth for the dangerous command system: - Pattern detection (DANGEROUS_PATTERNS, detect_dangerous_command) - Per-session approval state (thread-safe, keyed by session_key) - Approval prompting (CLI interactive + gateway async) +- Smart approval via auxiliary LLM (auto-approve low-risk commands) - Permanent allowlist persistence (config.yaml) """ @@ -283,6 +284,68 @@ def prompt_dangerous_approval(command: str, description: str, sys.stdout.flush() +def _get_approval_mode() -> str: + """Read the approval mode from config. Returns 'manual', 'smart', or 'off'.""" + try: + from hermes_cli.config import load_config + config = load_config() + return config.get("approvals", {}).get("mode", "manual") + except Exception: + return "manual" + + +def _smart_approve(command: str, description: str) -> str: + """Use the auxiliary LLM to assess risk and decide approval. + + Returns 'approve' if the LLM determines the command is safe, + 'deny' if genuinely dangerous, or 'escalate' if uncertain. + + Inspired by OpenAI Codex's Smart Approvals guardian subagent + (openai/codex#13860). + """ + try: + from agent.auxiliary_client import get_text_auxiliary_client, auxiliary_max_tokens_param + + client, model = get_text_auxiliary_client(task="approval") + if not client or not model: + logger.debug("Smart approvals: no aux client available, escalating") + return "escalate" + + prompt = f"""You are a security reviewer for an AI coding agent. A terminal command was flagged by pattern matching as potentially dangerous. + +Command: {command} +Flagged reason: {description} + +Assess the ACTUAL risk of this command. Many flagged commands are false positives — for example, `python -c "print('hello')"` is flagged as "script execution via -c flag" but is completely harmless. + +Rules: +- APPROVE if the command is clearly safe (benign script execution, safe file operations, development tools, package installs, git operations, etc.) +- DENY if the command could genuinely damage the system (recursive delete of important paths, overwriting system files, fork bombs, wiping disks, dropping databases, etc.) +- ESCALATE if you're uncertain + +Respond with exactly one word: APPROVE, DENY, or ESCALATE""" + + response = client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + **auxiliary_max_tokens_param(16), + temperature=0, + ) + + answer = (response.choices[0].message.content or "").strip().upper() + + if "APPROVE" in answer: + return "approve" + elif "DENY" in answer: + return "deny" + else: + return "escalate" + + except Exception as e: + logger.debug("Smart approvals: LLM call failed (%s), escalating", e) + return "escalate" + + def check_dangerous_command(command: str, env_type: str, approval_callback=None) -> dict: """Check if a command is dangerous and handle approval. @@ -372,8 +435,9 @@ def check_all_command_guards(command: str, env_type: str, if env_type in ("docker", "singularity", "modal", "daytona"): return {"approved": True, "message": None} - # --yolo: bypass all approval prompts and pre-exec guard checks - if os.getenv("HERMES_YOLO_MODE"): + # --yolo or approvals.mode=off: bypass all approval prompts + approval_mode = _get_approval_mode() + if os.getenv("HERMES_YOLO_MODE") or approval_mode == "off": return {"approved": True, "message": None} is_cli = os.getenv("HERMES_INTERACTIVE") @@ -430,6 +494,31 @@ def check_all_command_guards(command: str, env_type: str, if not warnings: return {"approved": True, "message": None} + # --- Phase 2.5: Smart approval (auxiliary LLM risk assessment) --- + # When approvals.mode=smart, ask the aux LLM before prompting the user. + # Inspired by OpenAI Codex's Smart Approvals guardian subagent + # (openai/codex#13860). + if approval_mode == "smart": + combined_desc_for_llm = "; ".join(desc for _, desc, _ in warnings) + verdict = _smart_approve(command, combined_desc_for_llm) + if verdict == "approve": + # Auto-approve and grant session-level approval for these patterns + for key, _, _ in warnings: + approve_session(session_key, key) + logger.debug("Smart approval: auto-approved '%s' (%s)", + command[:60], combined_desc_for_llm) + return {"approved": True, "message": None, + "smart_approved": True} + elif verdict == "deny": + combined_desc_for_llm = "; ".join(desc for _, desc, _ in warnings) + return { + "approved": False, + "message": f"BLOCKED by smart approval: {combined_desc_for_llm}. " + "The command was assessed as genuinely dangerous. Do NOT retry.", + "smart_denied": True, + } + # verdict == "escalate" → fall through to manual prompt + # --- Phase 3: Approval --- # Combine descriptions for a single approval prompt