diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index b8371f79..06d63632 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -71,15 +71,17 @@ DEFAULT_AGENT_IDENTITY = ( ) MEMORY_GUIDANCE = ( - "You have persistent memory across sessions. Proactively save important things " - "you learn (user preferences, environment details, useful approaches) and do " - "(like a diary!) using the memory tool -- don't wait to be asked." + "You have persistent memory across sessions. Save durable facts using the memory " + "tool: user preferences, environment details, tool quirks, and stable conventions. " + "Memory is injected into every turn, so keep it compact. Do NOT save task progress, " + "session outcomes, or completed-work logs to memory; use session_search to recall " + "those from past transcripts." ) SESSION_SEARCH_GUIDANCE = ( "When the user references something from a past conversation or you suspect " - "relevant prior context exists, use session_search to recall it before asking " - "them to repeat themselves." + "relevant cross-session context exists, use session_search to recall it before " + "asking them to repeat themselves." ) SKILLS_GUIDANCE = ( @@ -139,6 +141,13 @@ PLATFORM_HINTS = { "is preserved for threading. Do not include greetings or sign-offs unless " "contextually appropriate." ), + "cron": ( + "You are running as a scheduled cron job. Your final response is automatically " + "delivered to the job's configured destination, so do not use send_message to " + "send to that same target again. If you want the user to receive something in " + "the scheduled destination, put it directly in your final response. Use " + "send_message only for additional or different targets." + ), "cli": ( "You are a CLI AI Agent. Try not to use markdown but simple text " "renderable inside a terminal." diff --git a/cli.py b/cli.py index 6dcf5e16..a13080af 100755 --- a/cli.py +++ b/cli.py @@ -3203,8 +3203,33 @@ class HermesCLI: else: self.console.print(f"[bold red]Failed to load skill for {base_cmd}[/]") else: - self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]") - self.console.print("[dim #B8860B]Type /help for available commands[/]") + # Prefix matching: if input uniquely identifies one command, execute it. + # Matches against both built-in COMMANDS and installed skill commands so + # that execution-time resolution agrees with tab-completion. + from hermes_cli.commands import COMMANDS + typed_base = cmd_lower.split()[0] + all_known = set(COMMANDS) | set(_skill_commands) + matches = [c for c in all_known if c.startswith(typed_base)] + if len(matches) == 1: + # Expand the prefix to the full command name, preserving arguments. + # Guard against redispatching the same token to avoid infinite + # recursion when the expanded name still doesn't hit an exact branch + # (e.g. /config with extra args that are not yet handled above). + full_name = matches[0] + if full_name == typed_base: + # Already an exact token — no expansion possible; fall through + self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]") + self.console.print("[dim #B8860B]Type /help for available commands[/]") + else: + remainder = cmd_original.strip()[len(typed_base):] + full_cmd = full_name + remainder + return self.process_command(full_cmd) + elif len(matches) > 1: + self.console.print(f"[bold yellow]Ambiguous command: {cmd_lower}[/]") + self.console.print(f"[dim]Did you mean: {', '.join(sorted(matches))}?[/]") + else: + self.console.print(f"[bold red]Unknown command: {cmd_lower}[/]") + self.console.print("[dim #B8860B]Type /help for available commands[/]") return True @@ -4174,6 +4199,8 @@ class HermesCLI: Called from the agent thread. Shows a selection UI similar to clarify with choices: once / session / always / deny. When allow_permanent is False (tirith warnings present), the 'always' option is hidden. + Long commands also get a 'view' option so the full command can be + expanded before deciding. Uses _approval_lock to serialize concurrent requests (e.g. from parallel delegation subtasks) so each prompt gets its own turn @@ -4184,12 +4211,11 @@ class HermesCLI: with self._approval_lock: timeout = 60 response_queue = queue.Queue() - choices = ["once", "session", "always", "deny"] if allow_permanent else ["once", "session", "deny"] self._approval_state = { "command": command, "description": description, - "choices": choices, + "choices": self._approval_choices(command, allow_permanent=allow_permanent), "selected": 0, "response_queue": response_queue, } @@ -4220,6 +4246,116 @@ class HermesCLI: _cprint(f"\n{_DIM} ⏱ Timeout — denying command{_RST}") return "deny" + def _approval_choices(self, command: str, *, allow_permanent: bool = True) -> list[str]: + """Return approval choices for a dangerous command prompt.""" + choices = ["once", "session", "always", "deny"] if allow_permanent else ["once", "session", "deny"] + if len(command) > 70: + choices.append("view") + return choices + + def _handle_approval_selection(self) -> None: + """Process the currently selected dangerous-command approval choice.""" + state = self._approval_state + if not state: + return + + selected = state.get("selected", 0) + choices = state.get("choices") or [] + if not (0 <= selected < len(choices)): + return + + chosen = choices[selected] + if chosen == "view": + state["show_full"] = True + state["choices"] = [choice for choice in choices if choice != "view"] + if state["selected"] >= len(state["choices"]): + state["selected"] = max(0, len(state["choices"]) - 1) + self._invalidate() + return + + state["response_queue"].put(chosen) + self._approval_state = None + self._invalidate() + + def _get_approval_display_fragments(self): + """Render the dangerous-command approval panel for the prompt_toolkit UI.""" + state = self._approval_state + if not state: + return [] + + def _panel_box_width(title_text: str, content_lines: list[str], min_width: int = 46, max_width: int = 76) -> int: + term_cols = shutil.get_terminal_size((100, 20)).columns + longest = max([len(title_text)] + [len(line) for line in content_lines] + [min_width - 4]) + inner = min(max(longest + 4, min_width - 2), max_width - 2, max(24, term_cols - 6)) + return inner + 2 + + def _wrap_panel_text(text: str, width: int, subsequent_indent: str = "") -> list[str]: + wrapped = textwrap.wrap( + text, + width=max(8, width), + replace_whitespace=False, + drop_whitespace=False, + subsequent_indent=subsequent_indent, + ) + return wrapped or [""] + + def _append_panel_line(lines, border_style: str, content_style: str, text: str, box_width: int) -> None: + inner_width = max(0, box_width - 2) + lines.append((border_style, "│ ")) + lines.append((content_style, text.ljust(inner_width))) + lines.append((border_style, " │\n")) + + def _append_blank_panel_line(lines, border_style: str, box_width: int) -> None: + lines.append((border_style, "│" + (" " * box_width) + "│\n")) + + command = state["command"] + description = state["description"] + choices = state["choices"] + selected = state.get("selected", 0) + show_full = state.get("show_full", False) + + title = "⚠️ Dangerous Command" + cmd_display = command if show_full or len(command) <= 70 else command[:70] + '...' + choice_labels = { + "once": "Allow once", + "session": "Allow for this session", + "always": "Add to permanent allowlist", + "deny": "Deny", + "view": "Show full command", + } + + preview_lines = _wrap_panel_text(description, 60) + preview_lines.extend(_wrap_panel_text(cmd_display, 60)) + for i, choice in enumerate(choices): + prefix = '❯ ' if i == selected else ' ' + preview_lines.extend(_wrap_panel_text( + f"{prefix}{choice_labels.get(choice, choice)}", + 60, + subsequent_indent=" ", + )) + + box_width = _panel_box_width(title, preview_lines) + inner_text_width = max(8, box_width - 2) + + lines = [] + lines.append(('class:approval-border', '╭' + ('─' * box_width) + '╮\n')) + _append_panel_line(lines, 'class:approval-border', 'class:approval-title', title, box_width) + _append_blank_panel_line(lines, 'class:approval-border', box_width) + for wrapped in _wrap_panel_text(description, inner_text_width): + _append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width) + for wrapped in _wrap_panel_text(cmd_display, inner_text_width): + _append_panel_line(lines, 'class:approval-border', 'class:approval-cmd', wrapped, box_width) + _append_blank_panel_line(lines, 'class:approval-border', box_width) + for i, choice in enumerate(choices): + label = choice_labels.get(choice, choice) + style = 'class:approval-selected' if i == selected else 'class:approval-choice' + prefix = '❯ ' if i == selected else ' ' + for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "): + _append_panel_line(lines, 'class:approval-border', style, wrapped, box_width) + _append_blank_panel_line(lines, 'class:approval-border', box_width) + lines.append(('class:approval-border', '╰' + ('─' * box_width) + '╯\n')) + return lines + def _secret_capture_callback(self, var_name: str, prompt: str, metadata=None) -> dict: return prompt_for_secret(self, var_name, prompt, metadata) @@ -4811,22 +4947,7 @@ class HermesCLI: # --- Approval selection: confirm the highlighted choice --- if self._approval_state: - state = self._approval_state - selected = state["selected"] - choices = state["choices"] - if 0 <= selected < len(choices): - chosen = choices[selected] - if chosen == "view": - # Toggle full command display without closing the prompt - state["show_full"] = True - # Remove the "view" option since it's been used - state["choices"] = [c for c in choices if c != "view"] - if state["selected"] >= len(state["choices"]): - state["selected"] = len(state["choices"]) - 1 - event.app.invalidate() - return - state["response_queue"].put(chosen) - self._approval_state = None + self._handle_approval_selection() event.app.invalidate() return @@ -5512,53 +5633,7 @@ class HermesCLI: # --- Dangerous command approval: display widget --- def _get_approval_display(): - state = cli_ref._approval_state - if not state: - return [] - command = state["command"] - description = state["description"] - choices = state["choices"] - selected = state.get("selected", 0) - show_full = state.get("show_full", False) - - if show_full or len(command) <= 70: - cmd_display = command - else: - cmd_display = command[:70] + '...' - choice_labels = { - "once": "Allow once", - "session": "Allow for this session", - "always": "Add to permanent allowlist", - "deny": "Deny", - "view": "Show full command", - } - preview_lines = _wrap_panel_text(description, 60) - preview_lines.extend(_wrap_panel_text(cmd_display, 60)) - for i, choice in enumerate(choices): - prefix = '❯ ' if i == selected else ' ' - preview_lines.extend(_wrap_panel_text(f"{prefix}{choice_labels.get(choice, choice)}", 60, subsequent_indent=" ")) - box_width = _panel_box_width("⚠️ Dangerous Command", preview_lines) - inner_text_width = max(8, box_width - 2) - - lines = [] - lines.append(('class:approval-border', '╭─ ')) - lines.append(('class:approval-title', '⚠️ Dangerous Command')) - lines.append(('class:approval-border', ' ' + ('─' * max(0, box_width - len("⚠️ Dangerous Command") - 3)) + '╮\n')) - _append_blank_panel_line(lines, 'class:approval-border', box_width) - for wrapped in _wrap_panel_text(description, inner_text_width): - _append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width) - for wrapped in _wrap_panel_text(cmd_display, inner_text_width): - _append_panel_line(lines, 'class:approval-border', 'class:approval-cmd', wrapped, box_width) - _append_blank_panel_line(lines, 'class:approval-border', box_width) - for i, choice in enumerate(choices): - label = choice_labels.get(choice, choice) - style = 'class:approval-selected' if i == selected else 'class:approval-choice' - prefix = '❯ ' if i == selected else ' ' - for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "): - _append_panel_line(lines, 'class:approval-border', style, wrapped, box_width) - _append_blank_panel_line(lines, 'class:approval-border', box_width) - lines.append(('class:approval-border', '╰' + ('─' * box_width) + '╯\n')) - return lines + return cli_ref._get_approval_display_fragments() approval_widget = ConditionalContainer( Window( diff --git a/cron/scheduler.py b/cron/scheduler.py index 62b54fbb..2938a43e 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -57,6 +57,50 @@ def _resolve_origin(job: dict) -> Optional[dict]: return None +def _resolve_delivery_target(job: dict) -> Optional[dict]: + """Resolve the concrete auto-delivery target for a cron job, if any.""" + deliver = job.get("deliver", "local") + origin = _resolve_origin(job) + + if deliver == "local": + return None + + if deliver == "origin": + if not origin: + return None + return { + "platform": origin["platform"], + "chat_id": str(origin["chat_id"]), + "thread_id": origin.get("thread_id"), + } + + if ":" in deliver: + platform_name, chat_id = deliver.split(":", 1) + return { + "platform": platform_name, + "chat_id": chat_id, + "thread_id": None, + } + + platform_name = deliver + if origin and origin.get("platform") == platform_name: + return { + "platform": platform_name, + "chat_id": str(origin["chat_id"]), + "thread_id": origin.get("thread_id"), + } + + chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "") + if not chat_id: + return None + + return { + "platform": platform_name, + "chat_id": chat_id, + "thread_id": None, + } + + def _deliver_result(job: dict, content: str) -> None: """ Deliver job output to the configured target (origin chat, specific platform, etc.). @@ -64,36 +108,19 @@ def _deliver_result(job: dict, content: str) -> None: Uses the standalone platform send functions from send_message_tool so delivery works whether or not the gateway is running. """ - deliver = job.get("deliver", "local") - origin = _resolve_origin(job) - - if deliver == "local": + target = _resolve_delivery_target(job) + if not target: + if job.get("deliver", "local") != "local": + logger.warning( + "Job '%s' deliver=%s but no concrete delivery target could be resolved", + job["id"], + job.get("deliver", "local"), + ) return - thread_id = None - - # Resolve target platform + chat_id - if deliver == "origin": - if not origin: - logger.warning("Job '%s' deliver=origin but no origin stored, skipping delivery", job["id"]) - return - platform_name = origin["platform"] - chat_id = origin["chat_id"] - thread_id = origin.get("thread_id") - elif ":" in deliver: - platform_name, chat_id = deliver.split(":", 1) - else: - # Bare platform name like "telegram" — need to resolve to origin or home channel - platform_name = deliver - if origin and origin.get("platform") == platform_name: - chat_id = origin["chat_id"] - thread_id = origin.get("thread_id") - else: - # Fall back to home channel - chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "") - if not chat_id: - logger.warning("Job '%s' deliver=%s but no chat_id or home channel. Set via: hermes config set %s_HOME_CHANNEL ", job["id"], deliver, platform_name.upper()) - return + platform_name = target["platform"] + chat_id = target["chat_id"] + thread_id = target.get("thread_id") from tools.send_message_tool import _send_to_platform from gateway.config import load_gateway_config, Platform @@ -207,6 +234,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: job_name = job["name"] prompt = _build_job_prompt(job) origin = _resolve_origin(job) + delivery_target = _resolve_delivery_target(job) logger.info("Running job '%s' (ID: %s)", job_name, job_id) logger.info("Prompt: %s", prompt[:100]) @@ -217,6 +245,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"]) if origin.get("chat_name"): os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"] + if delivery_target: + os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"] + os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"]) + if delivery_target.get("thread_id") is not None: + os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"]) try: # Re-read .env and config.yaml fresh every run so provider/key @@ -363,7 +396,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: finally: # Clean up injected env vars so they don't leak to other jobs - for key in ("HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"): + for key in ( + "HERMES_SESSION_PLATFORM", + "HERMES_SESSION_CHAT_ID", + "HERMES_SESSION_CHAT_NAME", + "HERMES_CRON_AUTO_DELIVER_PLATFORM", + "HERMES_CRON_AUTO_DELIVER_CHAT_ID", + "HERMES_CRON_AUTO_DELIVER_THREAD_ID", + ): os.environ.pop(key, None) if _session_db: try: diff --git a/environments/agent_loop.py b/environments/agent_loop.py index ab8c0236..dec3bc4e 100644 --- a/environments/agent_loop.py +++ b/environments/agent_loop.py @@ -39,7 +39,9 @@ def resize_tool_pool(max_workers: int): Safe to call before any tasks are submitted. """ global _tool_executor + old_executor = _tool_executor _tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) + old_executor.shutdown(wait=False) logger.info("Tool thread pool resized to %d workers", max_workers) logger = logging.getLogger(__name__) diff --git a/gateway/delivery.py b/gateway/delivery.py index 7ceb90ab..28b7cf75 100644 --- a/gateway/delivery.py +++ b/gateway/delivery.py @@ -161,7 +161,7 @@ class DeliveryRouter: # Always include local if configured if self.config.always_log_local: - local_key = (Platform.LOCAL, None) + local_key = (Platform.LOCAL, None, None) if local_key not in seen_platforms: targets.append(DeliveryTarget(platform=Platform.LOCAL)) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 67a8323a..e523d939 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -346,6 +346,10 @@ class BasePlatformAdapter(ABC): self.platform = platform self._message_handler: Optional[MessageHandler] = None self._running = False + self._fatal_error_code: Optional[str] = None + self._fatal_error_message: Optional[str] = None + self._fatal_error_retryable = True + self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None # Track active message handlers per session for interrupt support # Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt) @@ -353,6 +357,70 @@ class BasePlatformAdapter(ABC): self._pending_messages: Dict[str, MessageEvent] = {} # Chats where auto-TTS on voice input is disabled (set by /voice off) self._auto_tts_disabled_chats: set = set() + + @property + def has_fatal_error(self) -> bool: + return self._fatal_error_message is not None + + @property + def fatal_error_message(self) -> Optional[str]: + return self._fatal_error_message + + @property + def fatal_error_code(self) -> Optional[str]: + return self._fatal_error_code + + @property + def fatal_error_retryable(self) -> bool: + return self._fatal_error_retryable + + def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None: + self._fatal_error_handler = handler + + def _mark_connected(self) -> None: + self._running = True + self._fatal_error_code = None + self._fatal_error_message = None + self._fatal_error_retryable = True + try: + from gateway.status import write_runtime_status + write_runtime_status(platform=self.platform.value, platform_state="connected", error_code=None, error_message=None) + except Exception: + pass + + def _mark_disconnected(self) -> None: + self._running = False + if self.has_fatal_error: + return + try: + from gateway.status import write_runtime_status + write_runtime_status(platform=self.platform.value, platform_state="disconnected", error_code=None, error_message=None) + except Exception: + pass + + def _set_fatal_error(self, code: str, message: str, *, retryable: bool) -> None: + self._running = False + self._fatal_error_code = code + self._fatal_error_message = message + self._fatal_error_retryable = retryable + try: + from gateway.status import write_runtime_status + write_runtime_status( + platform=self.platform.value, + platform_state="fatal", + error_code=code, + error_message=message, + ) + except Exception: + pass + + async def _notify_fatal_error(self) -> None: + handler = self._fatal_error_handler + if not handler: + return + result = handler(self) + if asyncio.iscoroutine(result): + await result @property def name(self) -> str: diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index df44733e..833c95c8 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -105,12 +105,43 @@ class TelegramAdapter(BasePlatformAdapter): # Telegram message limits MAX_MESSAGE_LENGTH = 4096 + MEDIA_GROUP_WAIT_SECONDS = 0.8 def __init__(self, config: PlatformConfig): super().__init__(config, Platform.TELEGRAM) self._app: Optional[Application] = None self._bot: Optional[Bot] = None - + self._media_group_events: Dict[str, MessageEvent] = {} + self._media_group_tasks: Dict[str, asyncio.Task] = {} + self._token_lock_identity: Optional[str] = None + self._polling_error_task: Optional[asyncio.Task] = None + + @staticmethod + def _looks_like_polling_conflict(error: Exception) -> bool: + text = str(error).lower() + return ( + error.__class__.__name__.lower() == "conflict" + or "terminated by other getupdates request" in text + or "another bot instance is running" in text + ) + + async def _handle_polling_conflict(self, error: Exception) -> None: + if self.has_fatal_error and self.fatal_error_code == "telegram_polling_conflict": + return + message = ( + "Another Telegram bot poller is already using this token. " + "Hermes stopped Telegram polling to avoid endless retry spam. " + "Make sure only one gateway instance is running for this bot token." + ) + logger.error("[%s] %s Original error: %s", self.name, message, error) + self._set_fatal_error("telegram_polling_conflict", message, retryable=False) + try: + if self._app and self._app.updater: + await self._app.updater.stop() + except Exception as stop_error: + logger.warning("[%s] Failed stopping Telegram polling after conflict: %s", self.name, stop_error, exc_info=True) + await self._notify_fatal_error() + async def connect(self) -> bool: """Connect to Telegram and start polling for updates.""" if not TELEGRAM_AVAILABLE: @@ -125,6 +156,25 @@ class TelegramAdapter(BasePlatformAdapter): return False try: + from gateway.status import acquire_scoped_lock + + self._token_lock_identity = self.config.token + acquired, existing = acquire_scoped_lock( + "telegram-bot-token", + self._token_lock_identity, + metadata={"platform": self.platform.value}, + ) + if not acquired: + owner_pid = existing.get("pid") if isinstance(existing, dict) else None + message = ( + "Another local Hermes gateway is already using this Telegram bot token" + + (f" (PID {owner_pid})." if owner_pid else ".") + + " Stop the other gateway before starting a second Telegram poller." + ) + logger.error("[%s] %s", self.name, message) + self._set_fatal_error("telegram_token_lock", message, retryable=False) + return False + # Build the application self._app = Application.builder().token(self.config.token).build() self._bot = self._app.bot @@ -150,9 +200,20 @@ class TelegramAdapter(BasePlatformAdapter): # Start polling in background await self._app.initialize() await self._app.start() + loop = asyncio.get_running_loop() + + def _polling_error_callback(error: Exception) -> None: + if not self._looks_like_polling_conflict(error): + logger.error("[%s] Telegram polling error: %s", self.name, error, exc_info=True) + return + if self._polling_error_task and not self._polling_error_task.done(): + return + self._polling_error_task = loop.create_task(self._handle_polling_conflict(error)) + await self._app.updater.start_polling( allowed_updates=Update.ALL_TYPES, drop_pending_updates=True, + error_callback=_polling_error_callback, ) # Register bot commands so Telegram shows a hint menu when users type / @@ -188,16 +249,30 @@ class TelegramAdapter(BasePlatformAdapter): exc_info=True, ) - self._running = True + self._mark_connected() logger.info("[%s] Connected and polling for Telegram updates", self.name) return True except Exception as e: + if self._token_lock_identity: + try: + from gateway.status import release_scoped_lock + release_scoped_lock("telegram-bot-token", self._token_lock_identity) + except Exception: + pass logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True) return False async def disconnect(self) -> None: - """Stop polling and disconnect.""" + """Stop polling, cancel pending album flushes, and disconnect.""" + pending_media_group_tasks = list(self._media_group_tasks.values()) + for task in pending_media_group_tasks: + task.cancel() + if pending_media_group_tasks: + await asyncio.gather(*pending_media_group_tasks, return_exceptions=True) + self._media_group_tasks.clear() + self._media_group_events.clear() + if self._app: try: await self._app.updater.stop() @@ -205,10 +280,17 @@ class TelegramAdapter(BasePlatformAdapter): await self._app.shutdown() except Exception as e: logger.warning("[%s] Error during Telegram disconnect: %s", self.name, e, exc_info=True) + if self._token_lock_identity: + try: + from gateway.status import release_scoped_lock + release_scoped_lock("telegram-bot-token", self._token_lock_identity) + except Exception as e: + logger.warning("[%s] Error releasing Telegram token lock: %s", self.name, e, exc_info=True) - self._running = False + self._mark_disconnected() self._app = None self._bot = None + self._token_lock_identity = None logger.info("[%s] Disconnected from Telegram", self.name) async def send( @@ -872,8 +954,53 @@ class TelegramAdapter(BasePlatformAdapter): except Exception as e: logger.warning("[Telegram] Failed to cache document: %s", e, exc_info=True) + media_group_id = getattr(msg, "media_group_id", None) + if media_group_id: + await self._queue_media_group_event(str(media_group_id), event) + return + await self.handle_message(event) + async def _queue_media_group_event(self, media_group_id: str, event: MessageEvent) -> None: + """Buffer Telegram media-group items so albums arrive as one logical event. + + Telegram delivers albums as multiple updates with a shared media_group_id. + If we forward each item immediately, the gateway thinks the second image is a + new user message and interrupts the first. We debounce briefly and merge the + attachments into a single MessageEvent. + """ + existing = self._media_group_events.get(media_group_id) + if existing is None: + self._media_group_events[media_group_id] = event + else: + existing.media_urls.extend(event.media_urls) + existing.media_types.extend(event.media_types) + if event.text: + if existing.text: + if event.text not in existing.text.split("\n\n"): + existing.text = f"{existing.text}\n\n{event.text}" + else: + existing.text = event.text + + prior_task = self._media_group_tasks.get(media_group_id) + if prior_task: + prior_task.cancel() + + self._media_group_tasks[media_group_id] = asyncio.create_task( + self._flush_media_group_event(media_group_id) + ) + + async def _flush_media_group_event(self, media_group_id: str) -> None: + try: + await asyncio.sleep(self.MEDIA_GROUP_WAIT_SECONDS) + event = self._media_group_events.pop(media_group_id, None) + if event is not None: + await self.handle_message(event) + except asyncio.CancelledError: + return + finally: + self._media_group_tasks.pop(media_group_id, None) + async def _handle_sticker(self, msg: Message, event: "MessageEvent") -> None: """ Describe a Telegram sticker via vision analysis, with caching. diff --git a/gateway/run.py b/gateway/run.py index 5b889501..e973852b 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -215,6 +215,33 @@ def _resolve_gateway_model() -> str: return model +def _resolve_hermes_bin() -> Optional[list[str]]: + """Resolve the Hermes update command as argv parts. + + Tries in order: + 1. ``shutil.which("hermes")`` — standard PATH lookup + 2. ``sys.executable -m hermes_cli.main`` — fallback when Hermes is running + from a venv/module invocation and the ``hermes`` shim is not on PATH + + Returns argv parts ready for quoting/joining, or ``None`` if neither works. + """ + import shutil + + hermes_bin = shutil.which("hermes") + if hermes_bin: + return [hermes_bin] + + try: + import importlib.util + + if importlib.util.find_spec("hermes_cli") is not None: + return [sys.executable, "-m", "hermes_cli.main"] + except Exception: + pass + + return None + + class GatewayRunner: """ Main gateway controller. @@ -245,6 +272,8 @@ class GatewayRunner: self.delivery_router = DeliveryRouter(self.config) self._running = False self._shutdown_event = asyncio.Event() + self._exit_cleanly = False + self._exit_reason: Optional[str] = None # Track running agents per session for interrupt support # Key: session_key, Value: AIAgent instance @@ -463,6 +492,41 @@ class GatewayRunner: """Run the sync memory flush in a thread pool so it won't block the event loop.""" loop = asyncio.get_event_loop() await loop.run_in_executor(None, self._flush_memories_for_session, old_session_id) + + @property + def should_exit_cleanly(self) -> bool: + return self._exit_cleanly + + @property + def exit_reason(self) -> Optional[str]: + return self._exit_reason + + async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None: + """React to a non-retryable adapter failure after startup.""" + logger.error( + "Fatal %s adapter error (%s): %s", + adapter.platform.value, + adapter.fatal_error_code or "unknown", + adapter.fatal_error_message or "unknown error", + ) + + existing = self.adapters.get(adapter.platform) + if existing is adapter: + try: + await adapter.disconnect() + finally: + self.adapters.pop(adapter.platform, None) + self.delivery_router.adapters = self.adapters + + if not self.adapters: + self._exit_reason = adapter.fatal_error_message or "All messaging adapters disconnected" + logger.error("No connected messaging platforms remain. Shutting down gateway cleanly.") + await self.stop() + + def _request_clean_exit(self, reason: str) -> None: + self._exit_cleanly = True + self._exit_reason = reason + self._shutdown_event.set() @staticmethod def _load_prefill_messages() -> List[Dict[str, Any]]: @@ -647,6 +711,11 @@ class GatewayRunner: """ logger.info("Starting Hermes Gateway...") logger.info("Session storage: %s", self.config.sessions_dir) + try: + from gateway.status import write_runtime_status + write_runtime_status(gateway_state="starting", exit_reason=None) + except Exception: + pass # Warn if no user allowlists are configured and open access is not opted in _any_allowlist = any( @@ -676,6 +745,7 @@ class GatewayRunner: logger.warning("Process checkpoint recovery: %s", e) connected_count = 0 + startup_nonretryable_errors: list[str] = [] # Initialize and connect each configured platform for platform, platform_config in self.config.platforms.items(): @@ -687,8 +757,9 @@ class GatewayRunner: logger.warning("No adapter available for %s", platform.value) continue - # Set up message handler + # Set up message + fatal error handlers adapter.set_message_handler(self._handle_message) + adapter.set_fatal_error_handler(self._handle_adapter_fatal_error) # Try to connect logger.info("Connecting to %s...", platform.value) @@ -701,10 +772,24 @@ class GatewayRunner: logger.info("✓ %s connected", platform.value) else: logger.warning("✗ %s failed to connect", platform.value) + if adapter.has_fatal_error and not adapter.fatal_error_retryable: + startup_nonretryable_errors.append( + f"{platform.value}: {adapter.fatal_error_message}" + ) except Exception as e: logger.error("✗ %s error: %s", platform.value, e) if connected_count == 0: + if startup_nonretryable_errors: + reason = "; ".join(startup_nonretryable_errors) + logger.error("Gateway hit a non-retryable startup conflict: %s", reason) + try: + from gateway.status import write_runtime_status + write_runtime_status(gateway_state="startup_failed", exit_reason=reason) + except Exception: + pass + self._request_clean_exit(reason) + return True logger.warning("No messaging platforms connected.") logger.info("Gateway will continue running for cron job execution.") @@ -712,6 +797,11 @@ class GatewayRunner: self.delivery_router.adapters = self.adapters self._running = True + try: + from gateway.status import write_runtime_status + write_runtime_status(gateway_state="running", exit_reason=None) + except Exception: + pass # Emit gateway:startup hook hook_count = len(self.hooks.loaded_hooks) @@ -806,8 +896,12 @@ class GatewayRunner: self._shutdown_all_gateway_honcho() self._shutdown_event.set() - from gateway.status import remove_pid_file + from gateway.status import remove_pid_file, write_runtime_status remove_pid_file() + try: + write_runtime_status(gateway_state="stopped", exit_reason=self._exit_reason) + except Exception: + pass logger.info("Gateway stopped") @@ -3155,9 +3249,14 @@ class GatewayRunner: if not git_dir.exists(): return "✗ Not a git repository — cannot update." - hermes_bin = shutil.which("hermes") - if not hermes_bin: - return "✗ `hermes` command not found on PATH." + hermes_cmd = _resolve_hermes_bin() + if not hermes_cmd: + return ( + "✗ Could not locate the `hermes` command. " + "Hermes is running, but the update command could not find the " + "executable on PATH or via the current Python interpreter. " + "Try running `hermes update` manually in your terminal." + ) pending_path = _hermes_home / ".update_pending.json" output_path = _hermes_home / ".update_output.txt" @@ -3173,8 +3272,9 @@ class GatewayRunner: # Spawn `hermes update` in a separate cgroup so it survives gateway # restart. systemd-run --user --scope creates a transient scope unit. + hermes_cmd_str = " ".join(shlex.quote(part) for part in hermes_cmd) update_cmd = ( - f"{shlex.quote(hermes_bin)} update > {shlex.quote(str(output_path))} 2>&1; " + f"{hermes_cmd_str} update > {shlex.quote(str(output_path))} 2>&1; " f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}" ) try: @@ -4338,6 +4438,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = success = await runner.start() if not success: return False + if runner.should_exit_cleanly: + if runner.exit_reason: + logger.error("Gateway exiting cleanly: %s", runner.exit_reason) + return True # Write PID file so CLI can detect gateway is running import atexit diff --git a/gateway/status.py b/gateway/status.py index db72f1fe..3362a778 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -11,13 +11,17 @@ that will be useful when we add named profiles (multiple agents running concurrently under distinct configurations). """ +import hashlib import json import os import sys +from datetime import datetime, timezone from pathlib import Path -from typing import Optional +from typing import Any, Optional _GATEWAY_KIND = "hermes-gateway" +_RUNTIME_STATUS_FILE = "gateway_state.json" +_LOCKS_DIRNAME = "gateway-locks" def _get_pid_path() -> Path: @@ -26,6 +30,32 @@ def _get_pid_path() -> Path: return home / "gateway.pid" +def _get_runtime_status_path() -> Path: + """Return the persisted runtime health/status file path.""" + return _get_pid_path().with_name(_RUNTIME_STATUS_FILE) + + +def _get_lock_dir() -> Path: + """Return the machine-local directory for token-scoped gateway locks.""" + override = os.getenv("HERMES_GATEWAY_LOCK_DIR") + if override: + return Path(override) + state_home = Path(os.getenv("XDG_STATE_HOME", Path.home() / ".local" / "state")) + return state_home / "hermes" / _LOCKS_DIRNAME + + +def _utc_now_iso() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _scope_hash(identity: str) -> str: + return hashlib.sha256(identity.encode("utf-8")).hexdigest()[:16] + + +def _get_scope_lock_path(scope: str, identity: str) -> Path: + return _get_lock_dir() / f"{scope}-{_scope_hash(identity)}.lock" + + def _get_process_start_time(pid: int) -> Optional[int]: """Return the kernel start time for a process when available.""" stat_path = Path(f"/proc/{pid}/stat") @@ -73,6 +103,38 @@ def _build_pid_record() -> dict: } +def _build_runtime_status_record() -> dict[str, Any]: + payload = _build_pid_record() + payload.update({ + "gateway_state": "starting", + "exit_reason": None, + "platforms": {}, + "updated_at": _utc_now_iso(), + }) + return payload + + +def _read_json_file(path: Path) -> Optional[dict[str, Any]]: + if not path.exists(): + return None + try: + raw = path.read_text().strip() + except OSError: + return None + if not raw: + return None + try: + payload = json.loads(raw) + except json.JSONDecodeError: + return None + return payload if isinstance(payload, dict) else None + + +def _write_json_file(path: Path, payload: dict[str, Any]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload)) + + def _read_pid_record() -> Optional[dict]: pid_path = _get_pid_path() if not pid_path.exists(): @@ -99,9 +161,49 @@ def _read_pid_record() -> Optional[dict]: def write_pid_file() -> None: """Write the current process PID and metadata to the gateway PID file.""" - pid_path = _get_pid_path() - pid_path.parent.mkdir(parents=True, exist_ok=True) - pid_path.write_text(json.dumps(_build_pid_record())) + _write_json_file(_get_pid_path(), _build_pid_record()) + + +def write_runtime_status( + *, + gateway_state: Optional[str] = None, + exit_reason: Optional[str] = None, + platform: Optional[str] = None, + platform_state: Optional[str] = None, + error_code: Optional[str] = None, + error_message: Optional[str] = None, +) -> None: + """Persist gateway runtime health information for diagnostics/status.""" + path = _get_runtime_status_path() + payload = _read_json_file(path) or _build_runtime_status_record() + payload.setdefault("platforms", {}) + payload.setdefault("kind", _GATEWAY_KIND) + payload.setdefault("pid", os.getpid()) + payload.setdefault("start_time", _get_process_start_time(os.getpid())) + payload["updated_at"] = _utc_now_iso() + + if gateway_state is not None: + payload["gateway_state"] = gateway_state + if exit_reason is not None: + payload["exit_reason"] = exit_reason + + if platform is not None: + platform_payload = payload["platforms"].get(platform, {}) + if platform_state is not None: + platform_payload["state"] = platform_state + if error_code is not None: + platform_payload["error_code"] = error_code + if error_message is not None: + platform_payload["error_message"] = error_message + platform_payload["updated_at"] = _utc_now_iso() + payload["platforms"][platform] = platform_payload + + _write_json_file(path, payload) + + +def read_runtime_status() -> Optional[dict[str, Any]]: + """Read the persisted gateway runtime health/status information.""" + return _read_json_file(_get_runtime_status_path()) def remove_pid_file() -> None: @@ -112,6 +214,87 @@ def remove_pid_file() -> None: pass +def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str, Any]] = None) -> tuple[bool, Optional[dict[str, Any]]]: + """Acquire a machine-local lock keyed by scope + identity. + + Used to prevent multiple local gateways from using the same external identity + at once (e.g. the same Telegram bot token across different HERMES_HOME dirs). + """ + lock_path = _get_scope_lock_path(scope, identity) + lock_path.parent.mkdir(parents=True, exist_ok=True) + record = { + **_build_pid_record(), + "scope": scope, + "identity_hash": _scope_hash(identity), + "metadata": metadata or {}, + "updated_at": _utc_now_iso(), + } + + existing = _read_json_file(lock_path) + if existing: + try: + existing_pid = int(existing["pid"]) + except (KeyError, TypeError, ValueError): + existing_pid = None + + if existing_pid == os.getpid() and existing.get("start_time") == record.get("start_time"): + _write_json_file(lock_path, record) + return True, existing + + stale = existing_pid is None + if not stale: + try: + os.kill(existing_pid, 0) + except (ProcessLookupError, PermissionError): + stale = True + else: + current_start = _get_process_start_time(existing_pid) + if ( + existing.get("start_time") is not None + and current_start is not None + and current_start != existing.get("start_time") + ): + stale = True + if stale: + try: + lock_path.unlink(missing_ok=True) + except OSError: + pass + else: + return False, existing + + try: + fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY) + except FileExistsError: + return False, _read_json_file(lock_path) + try: + with os.fdopen(fd, "w", encoding="utf-8") as handle: + json.dump(record, handle) + except Exception: + try: + lock_path.unlink(missing_ok=True) + except OSError: + pass + raise + return True, None + + +def release_scoped_lock(scope: str, identity: str) -> None: + """Release a previously-acquired scope lock when owned by this process.""" + lock_path = _get_scope_lock_path(scope, identity) + existing = _read_json_file(lock_path) + if not existing: + return + if existing.get("pid") != os.getpid(): + return + if existing.get("start_time") != _get_process_start_time(os.getpid()): + return + try: + lock_path.unlink(missing_ok=True) + except OSError: + pass + + def get_running_pid() -> Optional[int]: """Return the PID of a running gateway instance, or ``None``. diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 492d00ae..7a932d9e 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -821,7 +821,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A print(f" ✓ Saved {name}") print() else: - print(" Set later with: hermes config set KEY VALUE") + print(" Set later with: hermes config set ") # Check for missing config fields missing_config = get_missing_config_fields() @@ -1265,7 +1265,7 @@ def show_config(): print() print(color("─" * 60, Colors.DIM)) print(color(" hermes config edit # Edit config file", Colors.DIM)) - print(color(" hermes config set KEY VALUE", Colors.DIM)) + print(color(" hermes config set ", Colors.DIM)) print(color(" hermes setup # Run setup wizard", Colors.DIM)) print() @@ -1391,7 +1391,7 @@ def config_command(args): key = getattr(args, 'key', None) value = getattr(args, 'value', None) if not key or not value: - print("Usage: hermes config set KEY VALUE") + print("Usage: hermes config set ") print() print("Examples:") print(" hermes config set model anthropic/claude-sonnet-4") @@ -1506,7 +1506,7 @@ def config_command(args): print("Available commands:") print(" hermes config Show current configuration") print(" hermes config edit Open config in editor") - print(" hermes config set K V Set a config value") + print(" hermes config set Set a config value") print(" hermes config check Check for missing/outdated config") print(" hermes config migrate Update config with new options") print(" hermes config path Show config file path") diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 4d3ed884..3ecc77e0 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -251,7 +251,6 @@ StandardError=journal WantedBy=default.target """ - def _normalize_service_definition(text: str) -> str: return "\n".join(line.rstrip() for line in text.strip().splitlines()) @@ -279,6 +278,65 @@ def refresh_systemd_unit_if_needed() -> bool: return True + +def _print_linger_enable_warning(username: str, detail: str | None = None) -> None: + print() + print("⚠ Linger not enabled — gateway may stop when you close this terminal.") + if detail: + print(f" Auto-enable failed: {detail}") + print() + print(" On headless servers (VPS, cloud instances) run:") + print(f" sudo loginctl enable-linger {username}") + print() + print(" Then restart the gateway:") + print(f" systemctl --user restart {SERVICE_NAME}.service") + print() + + + +def _ensure_linger_enabled() -> None: + """Enable linger when possible so the user gateway survives logout.""" + if not is_linux(): + return + + import getpass + import shutil + + username = getpass.getuser() + linger_file = Path(f"/var/lib/systemd/linger/{username}") + if linger_file.exists(): + print("✓ Systemd linger is enabled (service survives logout)") + return + + linger_enabled, linger_detail = get_systemd_linger_status() + if linger_enabled is True: + print("✓ Systemd linger is enabled (service survives logout)") + return + + if not shutil.which("loginctl"): + _print_linger_enable_warning(username, linger_detail or "loginctl not found") + return + + print("Enabling linger so the gateway survives SSH logout...") + try: + result = subprocess.run( + ["loginctl", "enable-linger", username], + capture_output=True, + text=True, + check=False, + ) + except Exception as e: + _print_linger_enable_warning(username, str(e)) + return + + if result.returncode == 0: + print("✓ Linger enabled — gateway will persist after logout") + return + + detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip() + _print_linger_enable_warning(username, detail or linger_detail) + + def systemd_install(force: bool = False): unit_path = get_systemd_unit_path() @@ -302,7 +360,7 @@ def systemd_install(force: bool = False): print(f" hermes gateway status # Check status") print(f" journalctl --user -u {SERVICE_NAME} -f # View logs") print() - print_systemd_linger_guidance() + _ensure_linger_enabled() def systemd_uninstall(): subprocess.run(["systemctl", "--user", "stop", SERVICE_NAME], check=False) @@ -367,6 +425,13 @@ def systemd_status(deep: bool = False): print("✗ Gateway service is stopped") print(" Run: hermes gateway start") + runtime_lines = _runtime_health_lines() + if runtime_lines: + print() + print("Recent gateway health:") + for line in runtime_lines: + print(f" {line}") + if deep: print_systemd_linger_guidance() else: @@ -693,6 +758,35 @@ def _platform_status(platform: dict) -> str: return "not configured" +def _runtime_health_lines() -> list[str]: + """Summarize the latest persisted gateway runtime health state.""" + try: + from gateway.status import read_runtime_status + except Exception: + return [] + + state = read_runtime_status() + if not state: + return [] + + lines: list[str] = [] + gateway_state = state.get("gateway_state") + exit_reason = state.get("exit_reason") + platforms = state.get("platforms", {}) or {} + + for platform, pdata in platforms.items(): + if pdata.get("state") == "fatal": + message = pdata.get("error_message") or "unknown error" + lines.append(f"⚠ {platform}: {message}") + + if gateway_state == "startup_failed" and exit_reason: + lines.append(f"⚠ Last startup issue: {exit_reason}") + elif gateway_state == "stopped" and exit_reason: + lines.append(f"⚠ Last shutdown reason: {exit_reason}") + + return lines + + def _setup_standard_platform(platform: dict): """Interactive setup for Telegram, Discord, or Slack.""" emoji = platform["emoji"] @@ -1186,11 +1280,23 @@ def gateway_command(args): if pids: print(f"✓ Gateway is running (PID: {', '.join(map(str, pids))})") print(" (Running manually, not as a system service)") + runtime_lines = _runtime_health_lines() + if runtime_lines: + print() + print("Recent gateway health:") + for line in runtime_lines: + print(f" {line}") print() print("To install as a service:") print(" hermes gateway install") else: print("✗ Gateway is not running") + runtime_lines = _runtime_health_lines() + if runtime_lines: + print() + print("Recent gateway health:") + for line in runtime_lines: + print(f" {line}") print() print("To start:") print(" hermes gateway # Run in foreground") diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 6276d77d..9f388df9 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2056,7 +2056,15 @@ def cmd_update(args): check=True ) branch = result.stdout.strip() - + + # Fall back to main if the current branch doesn't exist on the remote + verify = subprocess.run( + git_cmd + ["rev-parse", "--verify", f"origin/{branch}"], + cwd=PROJECT_ROOT, capture_output=True, text=True, + ) + if verify.returncode != 0: + branch = "main" + # Check if there are updates result = subprocess.run( git_cmd + ["rev-list", f"HEAD..origin/{branch}", "--count"], @@ -2736,7 +2744,7 @@ For more help on a command: skills_install = skills_subparsers.add_parser("install", help="Install a skill") skills_install.add_argument("identifier", help="Skill identifier (e.g. openai/skills/skill-creator)") skills_install.add_argument("--category", default="", help="Category folder to install into") - skills_install.add_argument("--force", action="store_true", help="Install despite caution verdict") + skills_install.add_argument("--force", "--yes", "-y", dest="force", action="store_true", help="Install despite blocked scan verdict") skills_inspect = skills_subparsers.add_parser("inspect", help="Preview a skill without installing") skills_inspect.add_argument("identifier", help="Skill identifier") diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 6cd57f95..fead6800 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -5,6 +5,7 @@ from __future__ import annotations import os from typing import Any, Dict, Optional +from hermes_cli import auth as auth_mod from hermes_cli.auth import ( AuthError, PROVIDER_REGISTRY, @@ -18,6 +19,10 @@ from hermes_cli.config import load_config from hermes_constants import OPENROUTER_BASE_URL +def _normalize_custom_provider_name(value: str) -> str: + return value.strip().lower().replace(" ", "-") + + def _get_model_config() -> Dict[str, Any]: config = load_config() model_cfg = config.get("model") @@ -47,6 +52,82 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str: return "auto" +def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]: + requested_norm = _normalize_custom_provider_name(requested_provider or "") + if not requested_norm or requested_norm == "custom": + return None + + # Raw names should only map to custom providers when they are not already + # valid built-in providers or aliases. Explicit menu keys like + # ``custom:local`` always target the saved custom provider. + if requested_norm == "auto": + return None + if not requested_norm.startswith("custom:"): + try: + auth_mod.resolve_provider(requested_norm) + except AuthError: + pass + else: + return None + + config = load_config() + custom_providers = config.get("custom_providers") + if not isinstance(custom_providers, list): + return None + + for entry in custom_providers: + if not isinstance(entry, dict): + continue + name = entry.get("name") + base_url = entry.get("base_url") + if not isinstance(name, str) or not isinstance(base_url, str): + continue + name_norm = _normalize_custom_provider_name(name) + menu_key = f"custom:{name_norm}" + if requested_norm not in {name_norm, menu_key}: + continue + return { + "name": name.strip(), + "base_url": base_url.strip(), + "api_key": str(entry.get("api_key", "") or "").strip(), + } + + return None + + +def _resolve_named_custom_runtime( + *, + requested_provider: str, + explicit_api_key: Optional[str] = None, + explicit_base_url: Optional[str] = None, +) -> Optional[Dict[str, Any]]: + custom_provider = _get_named_custom_provider(requested_provider) + if not custom_provider: + return None + + base_url = ( + (explicit_base_url or "").strip() + or custom_provider.get("base_url", "") + ).rstrip("/") + if not base_url: + return None + + api_key = ( + (explicit_api_key or "").strip() + or custom_provider.get("api_key", "") + or os.getenv("OPENAI_API_KEY", "").strip() + or os.getenv("OPENROUTER_API_KEY", "").strip() + ) + + return { + "provider": "openrouter", + "api_mode": "chat_completions", + "base_url": base_url, + "api_key": api_key, + "source": f"custom_provider:{custom_provider.get('name', requested_provider)}", + } + + def _resolve_openrouter_runtime( *, requested_provider: str, @@ -122,6 +203,15 @@ def resolve_runtime_provider( """Resolve runtime provider credentials for agent execution.""" requested_provider = resolve_requested_provider(requested) + custom_runtime = _resolve_named_custom_runtime( + requested_provider=requested_provider, + explicit_api_key=explicit_api_key, + explicit_base_url=explicit_base_url, + ) + if custom_runtime: + custom_runtime["requested_provider"] = requested_provider + return custom_runtime + provider = resolve_provider( requested_provider, explicit_api_key=explicit_api_key, diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 4a27339c..0fea9a92 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -460,12 +460,41 @@ def _print_setup_summary(config: dict, hermes_home): tool_status = [] - # OpenRouter (required for vision, moa) + # Vision — works with OpenRouter, Nous OAuth, Codex OAuth, or OpenAI endpoint + _has_vision = False if get_env_value("OPENROUTER_API_KEY"): + _has_vision = True + else: + try: + _vauth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json" + if _vauth_path.is_file(): + import json as _vjson + + _vauth = _vjson.loads(_vauth_path.read_text()) + if _vauth.get("active_provider") == "nous": + _np = _vauth.get("providers", {}).get("nous", {}) + if _np.get("agent_key") or _np.get("access_token"): + _has_vision = True + elif _vauth.get("active_provider") == "openai-codex": + _cp = _vauth.get("providers", {}).get("openai-codex", {}) + if _cp.get("tokens", {}).get("access_token"): + _has_vision = True + except Exception: + pass + if not _has_vision: + _oai_base = get_env_value("OPENAI_BASE_URL") or "" + if get_env_value("OPENAI_API_KEY") and "api.openai.com" in _oai_base.lower(): + _has_vision = True + + if _has_vision: tool_status.append(("Vision (image analysis)", True, None)) + else: + tool_status.append(("Vision (image analysis)", False, "run 'hermes setup' to configure")) + + # Mixture of Agents — requires OpenRouter specifically (calls multiple models) + if get_env_value("OPENROUTER_API_KEY"): tool_status.append(("Mixture of Agents", True, None)) else: - tool_status.append(("Vision (image analysis)", False, "OPENROUTER_API_KEY")) tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY")) # Firecrawl (web tools) @@ -602,7 +631,7 @@ def _print_setup_summary(config: dict, hermes_home): print( f" {color('hermes config edit', Colors.GREEN)} Open config in your editor" ) - print(f" {color('hermes config set KEY VALUE', Colors.GREEN)}") + print(f" {color('hermes config set ', Colors.GREEN)}") print(f" Set a specific value") print() print(f" Or edit the files directly:") @@ -1246,35 +1275,112 @@ def setup_model_provider(config: dict): elif existing_or: selected_provider = "openrouter" - # ── OpenRouter API Key for tools (if not already set) ── - # Tools (vision, web, MoA) use OpenRouter independently of the main provider. - # Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen. - if selected_provider in ( - "nous", - "openai-codex", - "custom", - "zai", - "kimi-coding", - "minimax", - "minimax-cn", - "anthropic", - ) and not get_env_value("OPENROUTER_API_KEY"): - print() - print_header("OpenRouter API Key (for tools)") - print_info("Tools like vision analysis, web search, and MoA use OpenRouter") - print_info("independently of your main inference provider.") - print_info("Get your API key at: https://openrouter.ai/keys") + # ── Vision & Image Analysis Setup ── + # Vision requires a multimodal-capable provider. Check whether the user's + # chosen provider already covers it — if so, skip the prompt entirely. + _vision_needs_setup = True - api_key = prompt( - " OpenRouter API key (optional, press Enter to skip)", password=True - ) - if api_key: - save_env_value("OPENROUTER_API_KEY", api_key) - print_success("OpenRouter API key saved (for tools)") - else: - print_info( - "Skipped - some tools (vision, web scraping) won't work without this" + if selected_provider == "openrouter": + # OpenRouter → Gemini for vision, already configured + _vision_needs_setup = False + elif selected_provider == "nous": + # Nous Portal OAuth → Gemini via Nous, already configured + _vision_needs_setup = False + elif selected_provider == "openai-codex": + # Codex OAuth → gpt-5.3-codex supports vision + _vision_needs_setup = False + elif selected_provider == "custom": + _custom_base = (get_env_value("OPENAI_BASE_URL") or "").lower() + if "api.openai.com" in _custom_base: + # Direct OpenAI endpoint — show vision model picker + print() + print_header("Vision Model") + print_info("Your OpenAI endpoint supports vision. Pick a model for image analysis:") + _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"] + _vm_choices = _oai_vision_models + ["Keep default (gpt-4o-mini)"] + _vm_idx = prompt_choice("Select vision model:", _vm_choices, len(_vm_choices) - 1) + _selected_vision_model = ( + _oai_vision_models[_vm_idx] + if _vm_idx < len(_oai_vision_models) + else "gpt-4o-mini" ) + save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model) + print_success(f"Vision model set to {_selected_vision_model}") + _vision_needs_setup = False + + # Even for providers without native vision, check if existing credentials + # from a previous setup already cover it (e.g. user had OpenRouter before + # switching to z.ai) + if _vision_needs_setup: + if get_env_value("OPENROUTER_API_KEY"): + _vision_needs_setup = False + else: + # Check for Nous Portal OAuth in auth.json + try: + _auth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json" + if _auth_path.is_file(): + import json as _json + + _auth_data = _json.loads(_auth_path.read_text()) + if _auth_data.get("active_provider") == "nous": + _nous_p = _auth_data.get("providers", {}).get("nous", {}) + if _nous_p.get("agent_key") or _nous_p.get("access_token"): + _vision_needs_setup = False + except Exception: + pass + + if _vision_needs_setup: + _prov_names = { + "nous-api": "Nous Portal API key", + "zai": "Z.AI / GLM", + "kimi-coding": "Kimi / Moonshot", + "minimax": "MiniMax", + "minimax-cn": "MiniMax CN", + "anthropic": "Anthropic", + "custom": "your custom endpoint", + } + _prov_display = _prov_names.get(selected_provider, selected_provider or "your provider") + + print() + print_header("Vision & Image Analysis (optional)") + print_info(f"Vision requires a multimodal-capable provider. {_prov_display}") + print_info("doesn't natively support it. Choose how to enable vision,") + print_info("or skip to configure later.") + print() + + _vision_choices = [ + "OpenRouter — uses Gemini (free tier at openrouter.ai/keys)", + "OpenAI — enter API key & choose a vision model", + "Skip for now", + ] + _vision_idx = prompt_choice("Configure vision:", _vision_choices, 2) + + if _vision_idx == 0: # OpenRouter + _or_key = prompt(" OpenRouter API key", password=True) + if _or_key: + save_env_value("OPENROUTER_API_KEY", _or_key) + print_success("OpenRouter key saved — vision will use Gemini") + else: + print_info("Skipped — vision won't be available") + elif _vision_idx == 1: # OpenAI + _oai_key = prompt(" OpenAI API key", password=True) + if _oai_key: + save_env_value("OPENAI_API_KEY", _oai_key) + save_env_value("OPENAI_BASE_URL", "https://api.openai.com/v1") + _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"] + _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"] + _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0) + _selected_vision_model = ( + _oai_vision_models[_vm_idx] + if _vm_idx < len(_oai_vision_models) + else "gpt-4o-mini" + ) + save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model) + print_success(f"Vision configured with OpenAI ({_selected_vision_model})") + else: + print_info("Skipped — vision won't be available") + else: + print_info("Skipped — add later with 'hermes config set OPENROUTER_API_KEY ...'") # ── Model Selection (adapts based on provider) ── if selected_provider != "custom": # Custom already prompted for model name diff --git a/hermes_cli/skills_hub.py b/hermes_cli/skills_hub.py index 60cfaf6b..e2d17557 100644 --- a/hermes_cli/skills_hub.py +++ b/hermes_cli/skills_hub.py @@ -1050,11 +1050,11 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None: elif action == "install": if not args: - c.print("[bold red]Usage:[/] /skills install [--category ] [--force]\n") + c.print("[bold red]Usage:[/] /skills install [--category ] [--force|--yes]\n") return identifier = args[0] category = "" - force = "--force" in args + force = any(flag in args for flag in ("--force", "--yes", "-y")) for i, a in enumerate(args): if a == "--category" and i + 1 < len(args): category = args[i + 1] diff --git a/skills/mlops/training/axolotl/references/api.md b/skills/mlops/training/axolotl/references/api.md index f00b6eb6..2f94b539 100644 --- a/skills/mlops/training/axolotl/references/api.md +++ b/skills/mlops/training/axolotl/references/api.md @@ -3240,7 +3240,7 @@ Prompt Strategy for finetuning Llama2 chat models see also https://github.com/fa This implementation is based on the Vicuna PR and the fastchat repo, see also: https://github.com/lm-sys/FastChat/blob/cdd7730686cb1bf9ae2b768ee171bdf7d1ff04f3/fastchat/conversation.py#L847 -Use dataset type: “llama2_chat” in conig.yml to use this prompt style. +Use dataset type: “llama2_chat” in config.yml to use this prompt style. E.g. in the config.yml: @@ -4991,7 +4991,7 @@ prompt_strategies.orcamini Prompt Strategy for finetuning Orca Mini (v2) models see also https://huggingface.co/psmathur/orca_mini_v2_7b for more information -Use dataset type: orcamini in conig.yml to use this prompt style. +Use dataset type: orcamini in config.yml to use this prompt style. Compared to the alpaca_w_system.open_orca dataset type, this one specifies the system prompt with “### System:”. diff --git a/skills/mlops/training/pytorch-fsdp/references/other.md b/skills/mlops/training/pytorch-fsdp/references/other.md index d5b6cae6..2b544dc9 100644 --- a/skills/mlops/training/pytorch-fsdp/references/other.md +++ b/skills/mlops/training/pytorch-fsdp/references/other.md @@ -2290,7 +2290,7 @@ This call gives the AsyncStager the opportunity to ‘stage’ the state_dict. T for serializing the state_dict and writing it to storage. -the serialization thread starts and before returning from dcp.async_save. If this is set to False, the assumption is the user has defined a custom synchronization point for the the purpose of further optimizing save latency in the training loop (for example, by overlapping staging with the forward/backward pass), and it is the respondsibility of the user to call AsyncStager.synchronize_staging at the appropriate time. +the serialization thread starts and before returning from dcp.async_save. If this is set to False, the assumption is the user has defined a custom synchronization point for the purpose of further optimizing save latency in the training loop (for example, by overlapping staging with the forward/backward pass), and it is the respondsibility of the user to call AsyncStager.synchronize_staging at the appropriate time. Clean up all resources used by the stager. @@ -2430,7 +2430,7 @@ Read the checkpoint metadata. The metadata object associated with the checkpoint being loaded. -Calls to indicates a brand new checkpoint read is going to happen. A checkpoint_id may be present if users set the checkpoint_id for this checkpoint read. The meaning of the checkpiont_id is storage-dependent. It can be a path to a folder/file or a key for a key-value storage. +Calls to indicates a brand new checkpoint read is going to happen. A checkpoint_id may be present if users set the checkpoint_id for this checkpoint read. The meaning of the checkpoint_id is storage-dependent. It can be a path to a folder/file or a key for a key-value storage. checkpoint_id (Union[str, os.PathLike, None]) – The ID of this checkpoint instance. The meaning of the checkpoint_id depends on the storage. It can be a path to a folder or to a file. It can also be a key if the storage is more like a key-value store. (Default: None) @@ -2488,7 +2488,7 @@ plan (SavePlan) – The local plan from the SavePlanner in use. A transformed SavePlan after storage local planning -Calls to indicates a brand new checkpoint write is going to happen. A checkpoint_id may be present if users set the checkpoint_id for this checkpoint write. The meaning of the checkpiont_id is storage-dependent. It can be a path to a folder/file or a key for a key-value storage. +Calls to indicates a brand new checkpoint write is going to happen. A checkpoint_id may be present if users set the checkpoint_id for this checkpoint write. The meaning of the checkpoint_id is storage-dependent. It can be a path to a folder/file or a key for a key-value storage. checkpoint_id (Union[str, os.PathLike, None]) – The ID of this checkpoint instance. The meaning of the checkpoint_id depends on the storage. It can be a path to a folder or to a file. It can also be a key if the storage is a key-value store. (Default: None) @@ -2498,7 +2498,19 @@ is_coordinator (bool) – Whether this instance is responsible for coordinating Return the storage-specific metadata. This is used to store additional information in a checkpoint that can be useful for providing request-level observability. StorageMeta is passed to the SavePlanner during save calls. Returns None by default. -TODO: provide an example +Example: + +```python +from torch.distributed.checkpoint.storage import StorageMeta + +class CustomStorageBackend: + def get_storage_metadata(self): + # Return storage-specific metadata that will be stored with the checkpoint + return StorageMeta() +``` + +This example shows how a storage backend can return `StorageMeta` +to attach additional metadata to a checkpoint. Optional[StorageMeta] @@ -3441,7 +3453,7 @@ The target module does not have to be an FSDP module. A StateDictSettings containing the state_dict_type and state_dict / optim_state_dict configs that are currently set. -AssertionError` if the StateDictSettings for differen – +AssertionError` if the StateDictSettings for different – FSDP submodules differ. – @@ -3766,7 +3778,7 @@ The sharing is done as described by ZeRO. The local optimizer instance in each rank is only responsible for updating approximately 1 / world_size parameters and hence only needs to keep 1 / world_size optimizer states. After parameters are updated locally, each rank will broadcast its parameters to all other peers to keep all model replicas in the same state. ZeroRedundancyOptimizer can be used in conjunction with torch.nn.parallel.DistributedDataParallel to reduce per-rank peak memory consumption. -ZeroRedundancyOptimizer uses a sorted-greedy algorithm to pack a number of parameters at each rank. Each parameter belongs to a single rank and is not divided among ranks. The partition is arbitrary and might not match the the parameter registration or usage order. +ZeroRedundancyOptimizer uses a sorted-greedy algorithm to pack a number of parameters at each rank. Each parameter belongs to a single rank and is not divided among ranks. The partition is arbitrary and might not match the parameter registration or usage order. params (Iterable) – an Iterable of torch.Tensor s or dict s giving all parameters, which will be sharded across ranks. diff --git a/skills/mlops/training/unsloth/references/llms-full.md b/skills/mlops/training/unsloth/references/llms-full.md index b0b6b24d..df3d2eeb 100644 --- a/skills/mlops/training/unsloth/references/llms-full.md +++ b/skills/mlops/training/unsloth/references/llms-full.md @@ -6348,7 +6348,7 @@ Our chat templates for the GGUF, our BnB and BF16 uploads and all versions are f ### :1234: Precision issues -We found multiple precision issues in Tesla T4 and float16 machines primarily since the model was trained using BF16, and so outliers and overflows existed. MXFP4 is not actually supported on Ampere and older GPUs, so Triton provides `tl.dot_scaled` for MXFP4 matrix multiplication. It upcasts the matrices to BF16 internaly on the fly. +We found multiple precision issues in Tesla T4 and float16 machines primarily since the model was trained using BF16, and so outliers and overflows existed. MXFP4 is not actually supported on Ampere and older GPUs, so Triton provides `tl.dot_scaled` for MXFP4 matrix multiplication. It upcasts the matrices to BF16 internally on the fly. We made a [MXFP4 inference notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/GPT_OSS_MXFP4_\(20B\)-Inference.ipynb) as well in Tesla T4 Colab! @@ -14877,7 +14877,7 @@ curl -X POST http://localhost:8000/v1/unload_lora_adapter \ # Text-to-Speech (TTS) Fine-tuning -Learn how to to fine-tune TTS & STT voice models with Unsloth. +Learn how to fine-tune TTS & STT voice models with Unsloth. Fine-tuning TTS models allows them to adapt to your specific dataset, use case, or desired style and tone. The goal is to customize these models to clone voices, adapt speaking styles and tones, support new languages, handle specific tasks and more. We also support **Speech-to-Text (STT)** models like OpenAI's Whisper. @@ -15306,7 +15306,7 @@ snapshot_download( ) ``` -And and let's do inference! +And let's do inference! {% code overflow="wrap" %} @@ -16036,7 +16036,7 @@ Then train the model as usual via `trainer.train() .` Tips to solve issues, and frequently asked questions. -If you're still encountering any issues with versions or depencies, please use our [Docker image](https://docs.unsloth.ai/get-started/install-and-update/docker) which will have everything pre-installed. +If you're still encountering any issues with versions or dependencies, please use our [Docker image](https://docs.unsloth.ai/get-started/install-and-update/docker) which will have everything pre-installed. {% hint style="success" %} **Try always to update Unsloth if you find any issues.** diff --git a/skills/mlops/training/unsloth/references/llms-txt.md b/skills/mlops/training/unsloth/references/llms-txt.md index c5895c7c..22f651e4 100644 --- a/skills/mlops/training/unsloth/references/llms-txt.md +++ b/skills/mlops/training/unsloth/references/llms-txt.md @@ -40,7 +40,7 @@ Read more on running Llama 4 here: 80%" not in MEMORY_GUIDANCE + + def test_session_search_guidance_is_simple_cross_session_recall(self): + assert "relevant cross-session context exists" in SESSION_SEARCH_GUIDANCE + assert "recent turns of the current session" not in SESSION_SEARCH_GUIDANCE + + # ========================================================================= # Context injection scanning # ========================================================================= @@ -435,6 +455,7 @@ class TestPromptBuilderConstants: assert "whatsapp" in PLATFORM_HINTS assert "telegram" in PLATFORM_HINTS assert "discord" in PLATFORM_HINTS + assert "cron" in PLATFORM_HINTS assert "cli" in PLATFORM_HINTS diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 3dbae4b4..a053028c 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -6,7 +6,7 @@ from unittest.mock import patch, MagicMock import pytest -from cron.scheduler import _resolve_origin, _deliver_result, run_job +from cron.scheduler import _resolve_origin, _resolve_delivery_target, _deliver_result, run_job class TestResolveOrigin: @@ -44,6 +44,56 @@ class TestResolveOrigin: assert _resolve_origin(job) is None +class TestResolveDeliveryTarget: + def test_origin_delivery_preserves_thread_id(self): + job = { + "deliver": "origin", + "origin": { + "platform": "telegram", + "chat_id": "-1001", + "thread_id": "17585", + }, + } + + assert _resolve_delivery_target(job) == { + "platform": "telegram", + "chat_id": "-1001", + "thread_id": "17585", + } + + def test_bare_platform_uses_matching_origin_chat(self): + job = { + "deliver": "telegram", + "origin": { + "platform": "telegram", + "chat_id": "-1001", + "thread_id": "17585", + }, + } + + assert _resolve_delivery_target(job) == { + "platform": "telegram", + "chat_id": "-1001", + "thread_id": "17585", + } + + def test_bare_platform_falls_back_to_home_channel(self, monkeypatch): + monkeypatch.setenv("TELEGRAM_HOME_CHANNEL", "-2002") + job = { + "deliver": "telegram", + "origin": { + "platform": "discord", + "chat_id": "abc", + }, + } + + assert _resolve_delivery_target(job) == { + "platform": "telegram", + "chat_id": "-2002", + "thread_id": None, + } + + class TestDeliverResultMirrorLogging: """Verify that mirror_to_session failures are logged, not silently swallowed.""" diff --git a/tests/gateway/test_delivery.py b/tests/gateway/test_delivery.py index 42eba781..3894897f 100644 --- a/tests/gateway/test_delivery.py +++ b/tests/gateway/test_delivery.py @@ -1,7 +1,7 @@ """Tests for the delivery routing module.""" from gateway.config import Platform, GatewayConfig, PlatformConfig, HomeChannel -from gateway.delivery import DeliveryTarget, parse_deliver_spec +from gateway.delivery import DeliveryRouter, DeliveryTarget, parse_deliver_spec from gateway.session import SessionSource @@ -85,3 +85,12 @@ class TestTargetToStringRoundtrip: reparsed = DeliveryTarget.parse(s) assert reparsed.platform == Platform.TELEGRAM assert reparsed.chat_id == "999" + + +class TestDeliveryRouter: + def test_resolve_targets_does_not_duplicate_local_when_explicit(self): + router = DeliveryRouter(GatewayConfig(always_log_local=True)) + + targets = router.resolve_targets(["local"]) + + assert [target.platform for target in targets] == [Platform.LOCAL] diff --git a/tests/gateway/test_runner_fatal_adapter.py b/tests/gateway/test_runner_fatal_adapter.py new file mode 100644 index 00000000..aa414d72 --- /dev/null +++ b/tests/gateway/test_runner_fatal_adapter.py @@ -0,0 +1,46 @@ +import pytest + +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import BasePlatformAdapter +from gateway.run import GatewayRunner + + +class _FatalAdapter(BasePlatformAdapter): + def __init__(self): + super().__init__(PlatformConfig(enabled=True, token="token"), Platform.TELEGRAM) + + async def connect(self) -> bool: + self._set_fatal_error( + "telegram_token_lock", + "Another local Hermes gateway is already using this Telegram bot token.", + retryable=False, + ) + return False + + async def disconnect(self) -> None: + self._mark_disconnected() + + async def send(self, chat_id, content, reply_to=None, metadata=None): + raise NotImplementedError + + async def get_chat_info(self, chat_id): + return {"id": chat_id} + + +@pytest.mark.asyncio +async def test_runner_requests_clean_exit_for_nonretryable_startup_conflict(monkeypatch, tmp_path): + config = GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="token") + }, + sessions_dir=tmp_path / "sessions", + ) + runner = GatewayRunner(config) + + monkeypatch.setattr(runner, "_create_adapter", lambda platform, platform_config: _FatalAdapter()) + + ok = await runner.start() + + assert ok is True + assert runner.should_exit_cleanly is True + assert "already using this Telegram bot token" in runner.exit_reason diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py index 025708a5..fdf1b57c 100644 --- a/tests/gateway/test_status.py +++ b/tests/gateway/test_status.py @@ -25,3 +25,77 @@ class TestGatewayPidState: assert status.get_running_pid() is None assert not pid_path.exists() + + +class TestGatewayRuntimeStatus: + def test_write_runtime_status_records_platform_failure(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + status.write_runtime_status( + gateway_state="startup_failed", + exit_reason="telegram conflict", + platform="telegram", + platform_state="fatal", + error_code="telegram_polling_conflict", + error_message="another poller is active", + ) + + payload = status.read_runtime_status() + assert payload["gateway_state"] == "startup_failed" + assert payload["exit_reason"] == "telegram conflict" + assert payload["platforms"]["telegram"]["state"] == "fatal" + assert payload["platforms"]["telegram"]["error_code"] == "telegram_polling_conflict" + assert payload["platforms"]["telegram"]["error_message"] == "another poller is active" + + +class TestScopedLocks: + def test_acquire_scoped_lock_rejects_live_other_process(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks")) + lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock" + lock_path.parent.mkdir(parents=True, exist_ok=True) + lock_path.write_text(json.dumps({ + "pid": 99999, + "start_time": 123, + "kind": "hermes-gateway", + })) + + monkeypatch.setattr(status.os, "kill", lambda pid, sig: None) + monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123) + + acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"}) + + assert acquired is False + assert existing["pid"] == 99999 + + def test_acquire_scoped_lock_replaces_stale_record(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks")) + lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock" + lock_path.parent.mkdir(parents=True, exist_ok=True) + lock_path.write_text(json.dumps({ + "pid": 99999, + "start_time": 123, + "kind": "hermes-gateway", + })) + + def fake_kill(pid, sig): + raise ProcessLookupError + + monkeypatch.setattr(status.os, "kill", fake_kill) + + acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"}) + + assert acquired is True + payload = json.loads(lock_path.read_text()) + assert payload["pid"] == os.getpid() + assert payload["metadata"]["platform"] == "telegram" + + def test_release_scoped_lock_only_removes_current_owner(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks")) + + acquired, _ = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"}) + assert acquired is True + lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock" + assert lock_path.exists() + + status.release_scoped_lock("telegram-bot-token", "secret") + assert not lock_path.exists() diff --git a/tests/gateway/test_telegram_conflict.py b/tests/gateway/test_telegram_conflict.py new file mode 100644 index 00000000..f2e21281 --- /dev/null +++ b/tests/gateway/test_telegram_conflict.py @@ -0,0 +1,100 @@ +import asyncio +import sys +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from gateway.config import PlatformConfig + + +def _ensure_telegram_mock(): + if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"): + return + + telegram_mod = MagicMock() + telegram_mod.ext.ContextTypes.DEFAULT_TYPE = type(None) + telegram_mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2" + telegram_mod.constants.ChatType.GROUP = "group" + telegram_mod.constants.ChatType.SUPERGROUP = "supergroup" + telegram_mod.constants.ChatType.CHANNEL = "channel" + telegram_mod.constants.ChatType.PRIVATE = "private" + + for name in ("telegram", "telegram.ext", "telegram.constants"): + sys.modules.setdefault(name, telegram_mod) + + +_ensure_telegram_mock() + +from gateway.platforms.telegram import TelegramAdapter # noqa: E402 + + +@pytest.mark.asyncio +async def test_connect_rejects_same_host_token_lock(monkeypatch): + adapter = TelegramAdapter(PlatformConfig(enabled=True, token="secret-token")) + + monkeypatch.setattr( + "gateway.status.acquire_scoped_lock", + lambda scope, identity, metadata=None: (False, {"pid": 4242}), + ) + + ok = await adapter.connect() + + assert ok is False + assert adapter.fatal_error_code == "telegram_token_lock" + assert adapter.has_fatal_error is True + assert "already using this Telegram bot token" in adapter.fatal_error_message + + +@pytest.mark.asyncio +async def test_polling_conflict_stops_polling_and_notifies_handler(monkeypatch): + adapter = TelegramAdapter(PlatformConfig(enabled=True, token="secret-token")) + fatal_handler = AsyncMock() + adapter.set_fatal_error_handler(fatal_handler) + + monkeypatch.setattr( + "gateway.status.acquire_scoped_lock", + lambda scope, identity, metadata=None: (True, None), + ) + monkeypatch.setattr( + "gateway.status.release_scoped_lock", + lambda scope, identity: None, + ) + + captured = {} + + async def fake_start_polling(**kwargs): + captured["error_callback"] = kwargs["error_callback"] + + updater = SimpleNamespace( + start_polling=AsyncMock(side_effect=fake_start_polling), + stop=AsyncMock(), + ) + bot = SimpleNamespace(set_my_commands=AsyncMock()) + app = SimpleNamespace( + bot=bot, + updater=updater, + add_handler=MagicMock(), + initialize=AsyncMock(), + start=AsyncMock(), + ) + builder = MagicMock() + builder.token.return_value = builder + builder.build.return_value = app + monkeypatch.setattr("gateway.platforms.telegram.Application", SimpleNamespace(builder=MagicMock(return_value=builder))) + + ok = await adapter.connect() + + assert ok is True + assert callable(captured["error_callback"]) + + conflict = type("Conflict", (Exception,), {}) + captured["error_callback"](conflict("Conflict: terminated by other getUpdates request; make sure that only one bot instance is running")) + + await asyncio.sleep(0) + await asyncio.sleep(0) + + assert adapter.fatal_error_code == "telegram_polling_conflict" + assert adapter.has_fatal_error is True + updater.stop.assert_awaited() + fatal_handler.assert_awaited_once() diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py index 7a76625f..5e3e6f94 100644 --- a/tests/gateway/test_telegram_documents.py +++ b/tests/gateway/test_telegram_documents.py @@ -81,20 +81,21 @@ def _make_document( return doc -def _make_message(document=None, caption=None): - """Build a mock Telegram Message with the given document.""" +def _make_message(document=None, caption=None, media_group_id=None, photo=None): + """Build a mock Telegram Message with the given document/photo.""" msg = MagicMock() msg.message_id = 42 msg.text = caption or "" msg.caption = caption msg.date = None - # Media flags — all None except document - msg.photo = None + # Media flags — all None except explicit payload + msg.photo = photo msg.video = None msg.audio = None msg.voice = None msg.sticker = None msg.document = document + msg.media_group_id = media_group_id # Chat / user msg.chat = MagicMock() msg.chat.id = 100 @@ -165,6 +166,12 @@ class TestDocumentTypeDetection: # TestDocumentDownloadBlock # --------------------------------------------------------------------------- +def _make_photo(file_obj=None): + photo = MagicMock() + photo.get_file = AsyncMock(return_value=file_obj or _make_file_obj(b"photo-bytes")) + return photo + + class TestDocumentDownloadBlock: @pytest.mark.asyncio async def test_supported_pdf_is_cached(self, adapter): @@ -339,6 +346,50 @@ class TestDocumentDownloadBlock: adapter.handle_message.assert_called_once() +# --------------------------------------------------------------------------- +# TestMediaGroups — media group (album) buffering +# --------------------------------------------------------------------------- + +class TestMediaGroups: + @pytest.mark.asyncio + async def test_photo_album_is_buffered_and_combined(self, adapter): + first_photo = _make_photo(_make_file_obj(b"first")) + second_photo = _make_photo(_make_file_obj(b"second")) + + msg1 = _make_message(caption="two images", media_group_id="album-1", photo=[first_photo]) + msg2 = _make_message(media_group_id="album-1", photo=[second_photo]) + + with patch("gateway.platforms.telegram.cache_image_from_bytes", side_effect=["/tmp/one.jpg", "/tmp/two.jpg"]): + await adapter._handle_media_message(_make_update(msg1), MagicMock()) + await adapter._handle_media_message(_make_update(msg2), MagicMock()) + assert adapter.handle_message.await_count == 0 + await asyncio.sleep(adapter.MEDIA_GROUP_WAIT_SECONDS + 0.05) + + adapter.handle_message.assert_awaited_once() + event = adapter.handle_message.call_args[0][0] + assert event.text == "two images" + assert event.media_urls == ["/tmp/one.jpg", "/tmp/two.jpg"] + assert len(event.media_types) == 2 + + @pytest.mark.asyncio + async def test_disconnect_cancels_pending_media_group_flush(self, adapter): + first_photo = _make_photo(_make_file_obj(b"first")) + msg = _make_message(caption="two images", media_group_id="album-2", photo=[first_photo]) + + with patch("gateway.platforms.telegram.cache_image_from_bytes", return_value="/tmp/one.jpg"): + await adapter._handle_media_message(_make_update(msg), MagicMock()) + + assert "album-2" in adapter._media_group_events + assert "album-2" in adapter._media_group_tasks + + await adapter.disconnect() + await asyncio.sleep(adapter.MEDIA_GROUP_WAIT_SECONDS + 0.05) + + assert adapter._media_group_events == {} + assert adapter._media_group_tasks == {} + adapter.handle_message.assert_not_awaited() + + # --------------------------------------------------------------------------- # TestSendDocument — outbound file attachment delivery # --------------------------------------------------------------------------- diff --git a/tests/gateway/test_update_command.py b/tests/gateway/test_update_command.py index 12474563..ac9beac1 100644 --- a/tests/gateway/test_update_command.py +++ b/tests/gateway/test_update_command.py @@ -88,7 +88,7 @@ class TestHandleUpdateCommand: @pytest.mark.asyncio async def test_no_hermes_binary(self, tmp_path): - """Returns error when hermes is not on PATH.""" + """Returns error when hermes is not on PATH and hermes_cli is not importable.""" runner = _make_runner() event = _make_event() @@ -102,10 +102,77 @@ class TestHandleUpdateCommand: with patch("gateway.run._hermes_home", tmp_path), \ patch("gateway.run.__file__", fake_file), \ - patch("shutil.which", return_value=None): + patch("shutil.which", return_value=None), \ + patch("importlib.util.find_spec", return_value=None): result = await runner._handle_update_command(event) - assert "not found on PATH" in result + assert "Could not locate" in result + assert "hermes update" in result + + @pytest.mark.asyncio + async def test_fallback_to_sys_executable(self, tmp_path): + """Falls back to sys.executable -m hermes_cli.main when hermes not on PATH.""" + runner = _make_runner() + event = _make_event() + + fake_root = tmp_path / "project" + fake_root.mkdir() + (fake_root / ".git").mkdir() + (fake_root / "gateway").mkdir() + (fake_root / "gateway" / "run.py").touch() + fake_file = str(fake_root / "gateway" / "run.py") + hermes_home = tmp_path / "hermes" + hermes_home.mkdir() + + mock_popen = MagicMock() + fake_spec = MagicMock() + + with patch("gateway.run._hermes_home", hermes_home), \ + patch("gateway.run.__file__", fake_file), \ + patch("shutil.which", return_value=None), \ + patch("importlib.util.find_spec", return_value=fake_spec), \ + patch("subprocess.Popen", mock_popen): + result = await runner._handle_update_command(event) + + assert "Starting Hermes update" in result + call_args = mock_popen.call_args[0][0] + # The update_cmd uses sys.executable -m hermes_cli.main + joined = " ".join(call_args) if isinstance(call_args, list) else call_args + assert "hermes_cli.main" in joined or "bash" in call_args[0] + + @pytest.mark.asyncio + async def test_resolve_hermes_bin_prefers_which(self, tmp_path): + """_resolve_hermes_bin returns argv parts from shutil.which when available.""" + from gateway.run import _resolve_hermes_bin + + with patch("shutil.which", return_value="/custom/path/hermes"): + result = _resolve_hermes_bin() + + assert result == ["/custom/path/hermes"] + + @pytest.mark.asyncio + async def test_resolve_hermes_bin_fallback(self): + """_resolve_hermes_bin falls back to sys.executable argv when which fails.""" + import sys + from gateway.run import _resolve_hermes_bin + + fake_spec = MagicMock() + with patch("shutil.which", return_value=None), \ + patch("importlib.util.find_spec", return_value=fake_spec): + result = _resolve_hermes_bin() + + assert result == [sys.executable, "-m", "hermes_cli.main"] + + @pytest.mark.asyncio + async def test_resolve_hermes_bin_returns_none_when_both_fail(self): + """_resolve_hermes_bin returns None when both strategies fail.""" + from gateway.run import _resolve_hermes_bin + + with patch("shutil.which", return_value=None), \ + patch("importlib.util.find_spec", return_value=None): + result = _resolve_hermes_bin() + + assert result is None @pytest.mark.asyncio async def test_writes_pending_marker(self, tmp_path): diff --git a/tests/hermes_cli/test_cmd_update.py b/tests/hermes_cli/test_cmd_update.py new file mode 100644 index 00000000..0ccb7af8 --- /dev/null +++ b/tests/hermes_cli/test_cmd_update.py @@ -0,0 +1,107 @@ +"""Tests for cmd_update — branch fallback when remote branch doesn't exist.""" + +import subprocess +from types import SimpleNamespace +from unittest.mock import patch + +import pytest + +from hermes_cli.main import cmd_update, PROJECT_ROOT + + +def _make_run_side_effect(branch="main", verify_ok=True, commit_count="0"): + """Build a side_effect function for subprocess.run that simulates git commands.""" + + def side_effect(cmd, **kwargs): + joined = " ".join(str(c) for c in cmd) + + # git rev-parse --abbrev-ref HEAD (get current branch) + if "rev-parse" in joined and "--abbrev-ref" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout=f"{branch}\n", stderr="") + + # git rev-parse --verify origin/{branch} (check remote branch exists) + if "rev-parse" in joined and "--verify" in joined: + rc = 0 if verify_ok else 128 + return subprocess.CompletedProcess(cmd, rc, stdout="", stderr="") + + # git rev-list HEAD..origin/{branch} --count + if "rev-list" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="") + + # Fallback: return a successful CompletedProcess with empty stdout + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + return side_effect + + +@pytest.fixture +def mock_args(): + return SimpleNamespace() + + +class TestCmdUpdateBranchFallback: + """cmd_update falls back to main when current branch has no remote counterpart.""" + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_falls_back_to_main_when_branch_not_on_remote( + self, mock_run, _mock_which, mock_args, capsys + ): + mock_run.side_effect = _make_run_side_effect( + branch="fix/stoicneko", verify_ok=False, commit_count="3" + ) + + cmd_update(mock_args) + + commands = [" ".join(str(a) for a in c.args[0]) for c in mock_run.call_args_list] + + # rev-list should use origin/main, not origin/fix/stoicneko + rev_list_cmds = [c for c in commands if "rev-list" in c] + assert len(rev_list_cmds) == 1 + assert "origin/main" in rev_list_cmds[0] + assert "origin/fix/stoicneko" not in rev_list_cmds[0] + + # pull should use main, not fix/stoicneko + pull_cmds = [c for c in commands if "pull" in c] + assert len(pull_cmds) == 1 + assert "main" in pull_cmds[0] + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_uses_current_branch_when_on_remote( + self, mock_run, _mock_which, mock_args, capsys + ): + mock_run.side_effect = _make_run_side_effect( + branch="main", verify_ok=True, commit_count="2" + ) + + cmd_update(mock_args) + + commands = [" ".join(str(a) for a in c.args[0]) for c in mock_run.call_args_list] + + rev_list_cmds = [c for c in commands if "rev-list" in c] + assert len(rev_list_cmds) == 1 + assert "origin/main" in rev_list_cmds[0] + + pull_cmds = [c for c in commands if "pull" in c] + assert len(pull_cmds) == 1 + assert "main" in pull_cmds[0] + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_already_up_to_date( + self, mock_run, _mock_which, mock_args, capsys + ): + mock_run.side_effect = _make_run_side_effect( + branch="main", verify_ok=True, commit_count="0" + ) + + cmd_update(mock_args) + + captured = capsys.readouterr() + assert "Already up to date!" in captured.out + + # Should NOT have called pull + commands = [" ".join(str(a) for a in c.args[0]) for c in mock_run.call_args_list] + pull_cmds = [c for c in commands if "pull" in c] + assert len(pull_cmds) == 0 diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py index a39b0c64..ad987d57 100644 --- a/tests/hermes_cli/test_gateway.py +++ b/tests/hermes_cli/test_gateway.py @@ -59,15 +59,16 @@ def test_systemd_install_checks_linger_status(monkeypatch, tmp_path, capsys): unit_path = tmp_path / "systemd" / "user" / "hermes-gateway.service" monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda: unit_path) - monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (False, "")) calls = [] + helper_calls = [] def fake_run(cmd, check=False, **kwargs): calls.append((cmd, check)) return SimpleNamespace(returncode=0, stdout="", stderr="") monkeypatch.setattr(gateway.subprocess, "run", fake_run) + monkeypatch.setattr(gateway, "_ensure_linger_enabled", lambda: helper_calls.append(True)) gateway.systemd_install(force=False) @@ -77,6 +78,5 @@ def test_systemd_install_checks_linger_status(monkeypatch, tmp_path, capsys): ["systemctl", "--user", "daemon-reload"], ["systemctl", "--user", "enable", gateway.SERVICE_NAME], ] + assert helper_calls == [True] assert "Service installed and enabled" in out - assert "Systemd linger is disabled" in out - assert "loginctl enable-linger" in out diff --git a/tests/hermes_cli/test_gateway_linger.py b/tests/hermes_cli/test_gateway_linger.py new file mode 100644 index 00000000..f1341d06 --- /dev/null +++ b/tests/hermes_cli/test_gateway_linger.py @@ -0,0 +1,120 @@ +"""Tests for gateway linger auto-enable behavior on headless Linux installs.""" + +from types import SimpleNamespace + +import hermes_cli.gateway as gateway + + +class TestEnsureLingerEnabled: + def test_linger_already_enabled_via_file(self, monkeypatch, capsys): + monkeypatch.setattr(gateway, "is_linux", lambda: True) + monkeypatch.setattr("getpass.getuser", lambda: "testuser") + monkeypatch.setattr(gateway, "Path", lambda _path: SimpleNamespace(exists=lambda: True)) + + calls = [] + monkeypatch.setattr(gateway.subprocess, "run", lambda *args, **kwargs: calls.append((args, kwargs))) + + gateway._ensure_linger_enabled() + + out = capsys.readouterr().out + assert "Systemd linger is enabled" in out + assert calls == [] + + def test_status_enabled_skips_enable(self, monkeypatch, capsys): + monkeypatch.setattr(gateway, "is_linux", lambda: True) + monkeypatch.setattr("getpass.getuser", lambda: "testuser") + monkeypatch.setattr(gateway, "Path", lambda _path: SimpleNamespace(exists=lambda: False)) + monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (True, "")) + + calls = [] + monkeypatch.setattr(gateway.subprocess, "run", lambda *args, **kwargs: calls.append((args, kwargs))) + + gateway._ensure_linger_enabled() + + out = capsys.readouterr().out + assert "Systemd linger is enabled" in out + assert calls == [] + + def test_loginctl_success_enables_linger(self, monkeypatch, capsys): + monkeypatch.setattr(gateway, "is_linux", lambda: True) + monkeypatch.setattr("getpass.getuser", lambda: "testuser") + monkeypatch.setattr(gateway, "Path", lambda _path: SimpleNamespace(exists=lambda: False)) + monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (False, "")) + monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/loginctl") + + run_calls = [] + + def fake_run(cmd, capture_output=False, text=False, check=False): + run_calls.append((cmd, capture_output, text, check)) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway.subprocess, "run", fake_run) + + gateway._ensure_linger_enabled() + + out = capsys.readouterr().out + assert "Enabling linger" in out + assert "Linger enabled" in out + assert run_calls == [(["loginctl", "enable-linger", "testuser"], True, True, False)] + + def test_missing_loginctl_shows_manual_guidance(self, monkeypatch, capsys): + monkeypatch.setattr(gateway, "is_linux", lambda: True) + monkeypatch.setattr("getpass.getuser", lambda: "testuser") + monkeypatch.setattr(gateway, "Path", lambda _path: SimpleNamespace(exists=lambda: False)) + monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (None, "loginctl not found")) + monkeypatch.setattr("shutil.which", lambda name: None) + + calls = [] + monkeypatch.setattr(gateway.subprocess, "run", lambda *args, **kwargs: calls.append((args, kwargs))) + + gateway._ensure_linger_enabled() + + out = capsys.readouterr().out + assert "sudo loginctl enable-linger testuser" in out + assert "loginctl not found" in out + assert calls == [] + + def test_loginctl_failure_shows_manual_guidance(self, monkeypatch, capsys): + monkeypatch.setattr(gateway, "is_linux", lambda: True) + monkeypatch.setattr("getpass.getuser", lambda: "testuser") + monkeypatch.setattr(gateway, "Path", lambda _path: SimpleNamespace(exists=lambda: False)) + monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (False, "")) + monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/loginctl") + monkeypatch.setattr( + gateway.subprocess, + "run", + lambda *args, **kwargs: SimpleNamespace(returncode=1, stdout="", stderr="Permission denied"), + ) + + gateway._ensure_linger_enabled() + + out = capsys.readouterr().out + assert "sudo loginctl enable-linger testuser" in out + assert "Permission denied" in out + + +def test_systemd_install_calls_linger_helper(monkeypatch, tmp_path, capsys): + unit_path = tmp_path / "systemd" / "user" / "hermes-gateway.service" + + monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda: unit_path) + + calls = [] + + def fake_run(cmd, check=False, **kwargs): + calls.append((cmd, check)) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + helper_calls = [] + monkeypatch.setattr(gateway.subprocess, "run", fake_run) + monkeypatch.setattr(gateway, "_ensure_linger_enabled", lambda: helper_calls.append(True)) + + gateway.systemd_install(force=False) + + out = capsys.readouterr().out + assert unit_path.exists() + assert [cmd for cmd, _ in calls] == [ + ["systemctl", "--user", "daemon-reload"], + ["systemctl", "--user", "enable", gateway.SERVICE_NAME], + ] + assert helper_calls == [True] + assert "Service installed and enabled" in out diff --git a/tests/hermes_cli/test_gateway_runtime_health.py b/tests/hermes_cli/test_gateway_runtime_health.py new file mode 100644 index 00000000..15c0705c --- /dev/null +++ b/tests/hermes_cli/test_gateway_runtime_health.py @@ -0,0 +1,22 @@ +from hermes_cli.gateway import _runtime_health_lines + + +def test_runtime_health_lines_include_fatal_platform_and_startup_reason(monkeypatch): + monkeypatch.setattr( + "gateway.status.read_runtime_status", + lambda: { + "gateway_state": "startup_failed", + "exit_reason": "telegram conflict", + "platforms": { + "telegram": { + "state": "fatal", + "error_message": "another poller is active", + } + }, + }, + ) + + lines = _runtime_health_lines() + + assert "⚠ telegram: another poller is active" in lines + assert "⚠ Last startup issue: telegram conflict" in lines diff --git a/tests/hermes_cli/test_placeholder_usage.py b/tests/hermes_cli/test_placeholder_usage.py new file mode 100644 index 00000000..3479d8f5 --- /dev/null +++ b/tests/hermes_cli/test_placeholder_usage.py @@ -0,0 +1,48 @@ +"""Tests for CLI placeholder text in config/setup output.""" + +import os +from argparse import Namespace +from unittest.mock import patch + +import pytest + +from hermes_cli.config import config_command, show_config +from hermes_cli.setup import _print_setup_summary + + +def test_config_set_usage_marks_placeholders(capsys): + args = Namespace(config_command="set", key=None, value=None) + + with pytest.raises(SystemExit) as exc: + config_command(args) + + assert exc.value.code == 1 + out = capsys.readouterr().out + assert "Usage: hermes config set " in out + + +def test_config_unknown_command_help_marks_placeholders(capsys): + args = Namespace(config_command="wat") + + with pytest.raises(SystemExit) as exc: + config_command(args) + + assert exc.value.code == 1 + out = capsys.readouterr().out + assert "hermes config set Set a config value" in out + + +def test_show_config_marks_placeholders(tmp_path, capsys): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + show_config() + + out = capsys.readouterr().out + assert "hermes config set " in out + + +def test_setup_summary_marks_placeholders(tmp_path, capsys): + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + _print_setup_summary({"tts": {"provider": "edge"}}, tmp_path) + + out = capsys.readouterr().out + assert "hermes config set " in out diff --git a/tests/hermes_cli/test_setup_model_provider.py b/tests/hermes_cli/test_setup_model_provider.py index f7c3ce38..ee5d7d26 100644 --- a/tests/hermes_cli/test_setup_model_provider.py +++ b/tests/hermes_cli/test_setup_model_provider.py @@ -3,7 +3,7 @@ from __future__ import annotations from hermes_cli.config import load_config, save_config, save_env_value -from hermes_cli.setup import setup_model_provider +from hermes_cli.setup import _print_setup_summary, setup_model_provider def _read_env(home): @@ -50,11 +50,15 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m calls = {"count": 0} - def fake_prompt_choice(_question, choices, default=0): + def fake_prompt_choice(question, choices, default=0): calls["count"] += 1 if calls["count"] == 1: assert choices[-1] == "Keep current (Custom: https://example.invalid/v1)" return len(choices) - 1 + if calls["count"] == 2: + assert question == "Configure vision:" + assert choices[-1] == "Skip for now" + return len(choices) - 1 raise AssertionError("Model menu should not appear for keep-current custom") monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice) @@ -70,7 +74,7 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m assert reloaded["model"]["provider"] == "custom" assert reloaded["model"]["default"] == "custom/model" assert reloaded["model"]["base_url"] == "https://example.invalid/v1" - assert calls["count"] == 1 + assert calls["count"] == 2 def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tmp_path, monkeypatch): @@ -88,13 +92,17 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm captured = {"provider_choices": None, "model_choices": None} calls = {"count": 0} - def fake_prompt_choice(_question, choices, default=0): + def fake_prompt_choice(question, choices, default=0): calls["count"] += 1 if calls["count"] == 1: captured["provider_choices"] = list(choices) assert choices[-1] == "Keep current (Anthropic)" return len(choices) - 1 if calls["count"] == 2: + assert question == "Configure vision:" + assert choices[-1] == "Skip for now" + return len(choices) - 1 + if calls["count"] == 3: captured["model_choices"] = list(choices) return len(choices) - 1 # keep current model raise AssertionError("Unexpected extra prompt_choice call") @@ -113,7 +121,43 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm assert captured["model_choices"] is not None assert captured["model_choices"][0] == "claude-opus-4-6" assert "anthropic/claude-opus-4.6 (recommended)" not in captured["model_choices"] - assert calls["count"] == 2 + assert calls["count"] == 3 + + +def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + + config = load_config() + config["model"] = { + "default": "claude-opus-4-6", + "provider": "anthropic", + } + save_config(config) + + picks = iter([ + 9, # keep current provider + 1, # configure vision with OpenAI + 5, # use default gpt-4o-mini vision model + 4, # keep current Anthropic model + ]) + + monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: next(picks)) + monkeypatch.setattr( + "hermes_cli.setup.prompt", + lambda message, *args, **kwargs: "sk-openai" if "OpenAI API key" in message else "", + ) + monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False) + monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) + monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) + monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: []) + + setup_model_provider(config) + env = _read_env(tmp_path) + + assert env.get("OPENAI_API_KEY") == "sk-openai" + assert env.get("OPENAI_BASE_URL") == "https://api.openai.com/v1" + assert env.get("AUXILIARY_VISION_MODEL") == "gpt-4o-mini" def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(tmp_path, monkeypatch): @@ -144,7 +188,7 @@ def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config( "hermes_cli.auth.resolve_codex_runtime_credentials", lambda *args, **kwargs: { "base_url": "https://chatgpt.com/backend-api/codex", - "api_key": "codex-access-token", + "api_key": "codex-...oken", }, ) monkeypatch.setattr( @@ -163,3 +207,22 @@ def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config( assert reloaded["model"]["provider"] == "openai-codex" assert reloaded["model"]["default"] == "openai/gpt-5.3-codex" assert reloaded["model"]["base_url"] == "https://chatgpt.com/backend-api/codex" + + +def test_setup_summary_marks_codex_auth_as_vision_available(tmp_path, monkeypatch, capsys): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + + (tmp_path / "auth.json").write_text( + '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token":"tok"}}}}' + ) + + monkeypatch.setattr("shutil.which", lambda _name: None) + + _print_setup_summary(load_config(), tmp_path) + output = capsys.readouterr().out + + assert "Vision (image analysis)" in output + assert "missing run 'hermes setup' to configure" not in output + assert "Mixture of Agents" in output + assert "missing OPENROUTER_API_KEY" in output diff --git a/tests/hermes_cli/test_skills_hub.py b/tests/hermes_cli/test_skills_hub.py index 4e3af6c7..d1169120 100644 --- a/tests/hermes_cli/test_skills_hub.py +++ b/tests/hermes_cli/test_skills_hub.py @@ -3,7 +3,7 @@ from io import StringIO import pytest from rich.console import Console -from hermes_cli.skills_hub import do_check, do_list, do_update +from hermes_cli.skills_hub import do_check, do_list, do_update, handle_skills_slash class _DummyLockFile: diff --git a/tests/hermes_cli/test_skills_install_flags.py b/tests/hermes_cli/test_skills_install_flags.py new file mode 100644 index 00000000..bca0404d --- /dev/null +++ b/tests/hermes_cli/test_skills_install_flags.py @@ -0,0 +1,26 @@ +import sys +from types import SimpleNamespace + + +def test_cli_skills_install_accepts_yes_alias(monkeypatch): + from hermes_cli.main import main + + captured = {} + + def fake_skills_command(args): + captured["identifier"] = args.identifier + captured["force"] = args.force + + monkeypatch.setattr("hermes_cli.skills_hub.skills_command", fake_skills_command) + monkeypatch.setattr( + sys, + "argv", + ["hermes", "skills", "install", "official/email/agentmail", "--yes"], + ) + + main() + + assert captured == { + "identifier": "official/email/agentmail", + "force": True, + } diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py index bb0ccd06..b95ff780 100644 --- a/tests/test_agent_loop.py +++ b/tests/test_agent_loop.py @@ -484,3 +484,22 @@ class TestResizeToolPool: """resize_tool_pool should not raise.""" resize_tool_pool(16) # Small pool for testing resize_tool_pool(128) # Restore default + + def test_resize_shuts_down_previous_executor(self, monkeypatch): + """Replacing the global tool executor should shut down the old pool.""" + import environments.agent_loop as agent_loop_module + + old_executor = MagicMock() + new_executor = MagicMock() + + monkeypatch.setattr(agent_loop_module, "_tool_executor", old_executor) + monkeypatch.setattr( + agent_loop_module.concurrent.futures, + "ThreadPoolExecutor", + MagicMock(return_value=new_executor), + ) + + resize_tool_pool(16) + + old_executor.shutdown.assert_called_once_with(wait=False) + assert agent_loop_module._tool_executor is new_executor diff --git a/tests/test_cli_approval_ui.py b/tests/test_cli_approval_ui.py new file mode 100644 index 00000000..9b2e0bbb --- /dev/null +++ b/tests/test_cli_approval_ui.py @@ -0,0 +1,100 @@ +import queue +import threading +import time +from types import SimpleNamespace +from unittest.mock import MagicMock + +from cli import HermesCLI + + +def _make_cli_stub(): + cli = HermesCLI.__new__(HermesCLI) + cli._approval_state = None + cli._approval_deadline = 0 + cli._approval_lock = threading.Lock() + cli._invalidate = MagicMock() + cli._app = SimpleNamespace(invalidate=MagicMock()) + return cli + + +class TestCliApprovalUi: + def test_approval_callback_includes_view_for_long_commands(self): + cli = _make_cli_stub() + command = "sudo dd if=/tmp/githubcli-keyring.gpg of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress" + result = {} + + def _run_callback(): + result["value"] = cli._approval_callback(command, "disk copy") + + thread = threading.Thread(target=_run_callback, daemon=True) + thread.start() + + deadline = time.time() + 2 + while cli._approval_state is None and time.time() < deadline: + time.sleep(0.01) + + assert cli._approval_state is not None + assert "view" in cli._approval_state["choices"] + + cli._approval_state["response_queue"].put("deny") + thread.join(timeout=2) + assert result["value"] == "deny" + + def test_handle_approval_selection_view_expands_in_place(self): + cli = _make_cli_stub() + cli._approval_state = { + "command": "sudo dd if=/tmp/in of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress", + "description": "disk copy", + "choices": ["once", "session", "always", "deny", "view"], + "selected": 4, + "response_queue": queue.Queue(), + } + + cli._handle_approval_selection() + + assert cli._approval_state is not None + assert cli._approval_state["show_full"] is True + assert "view" not in cli._approval_state["choices"] + assert cli._approval_state["selected"] == 3 + assert cli._approval_state["response_queue"].empty() + + def test_approval_display_places_title_inside_box_not_border(self): + cli = _make_cli_stub() + cli._approval_state = { + "command": "sudo dd if=/tmp/in of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress", + "description": "disk copy", + "choices": ["once", "session", "always", "deny", "view"], + "selected": 0, + "response_queue": queue.Queue(), + } + + fragments = cli._get_approval_display_fragments() + rendered = "".join(text for _style, text in fragments) + lines = rendered.splitlines() + + assert lines[0].startswith("╭") + assert "Dangerous Command" not in lines[0] + assert any("Dangerous Command" in line for line in lines[1:3]) + assert "Show full command" in rendered + assert "githubcli-archive-keyring.gpg" not in rendered + + def test_approval_display_shows_full_command_after_view(self): + cli = _make_cli_stub() + full_command = "sudo dd if=/tmp/in of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress" + cli._approval_state = { + "command": full_command, + "description": "disk copy", + "choices": ["once", "session", "always", "deny"], + "selected": 0, + "show_full": True, + "response_queue": queue.Queue(), + } + + fragments = cli._get_approval_display_fragments() + rendered = "".join(text for _style, text in fragments) + + assert "..." not in rendered + assert "githubcli-" in rendered + assert "archive-" in rendered + assert "keyring.gpg" in rendered + assert "status=progress" in rendered diff --git a/tests/test_cli_prefix_matching.py b/tests/test_cli_prefix_matching.py new file mode 100644 index 00000000..ffec9195 --- /dev/null +++ b/tests/test_cli_prefix_matching.py @@ -0,0 +1,117 @@ +"""Tests for slash command prefix matching in HermesCLI.process_command.""" +from unittest.mock import MagicMock, patch +from cli import HermesCLI + + +def _make_cli(): + cli_obj = HermesCLI.__new__(HermesCLI) + cli_obj.config = {} + cli_obj.console = MagicMock() + cli_obj.agent = None + cli_obj.conversation_history = [] + cli_obj.session_id = None + cli_obj._pending_input = MagicMock() + return cli_obj + + +class TestSlashCommandPrefixMatching: + def test_unique_prefix_dispatches_command(self): + """/con should dispatch to /config when it uniquely matches.""" + cli_obj = _make_cli() + with patch.object(cli_obj, 'show_config') as mock_config: + cli_obj.process_command("/con") + mock_config.assert_called_once() + + def test_unique_prefix_with_args_does_not_recurse(self): + """/con set key value should expand to /config set key value without infinite recursion.""" + cli_obj = _make_cli() + dispatched = [] + + original = cli_obj.process_command.__func__ + + def counting_process_command(self_inner, cmd): + dispatched.append(cmd) + if len(dispatched) > 5: + raise RecursionError("process_command called too many times") + return original(self_inner, cmd) + + with patch.object(type(cli_obj), 'process_command', counting_process_command): + try: + cli_obj.process_command("/con set key value") + except RecursionError: + assert False, "process_command recursed infinitely" + + # Should have been called at most twice: once for /con set..., once for /config set... + assert len(dispatched) <= 2 + + def test_exact_command_with_args_does_not_recurse(self): + """/config set key value hits exact branch and does not loop back to prefix.""" + cli_obj = _make_cli() + call_count = [0] + + original_pc = HermesCLI.process_command + + def guarded(self_inner, cmd): + call_count[0] += 1 + if call_count[0] > 10: + raise RecursionError("Infinite recursion detected") + return original_pc(self_inner, cmd) + + with patch.object(HermesCLI, 'process_command', guarded): + try: + cli_obj.process_command("/config set key value") + except RecursionError: + assert False, "Recursed infinitely on /config set key value" + + assert call_count[0] <= 3 + + def test_ambiguous_prefix_shows_suggestions(self): + """/re matches multiple commands — should show ambiguous message.""" + cli_obj = _make_cli() + cli_obj.process_command("/re") + printed = " ".join(str(c) for c in cli_obj.console.print.call_args_list) + assert "Ambiguous" in printed or "Did you mean" in printed + + def test_unknown_command_shows_error(self): + """/xyz should show unknown command error.""" + cli_obj = _make_cli() + cli_obj.process_command("/xyz") + printed = " ".join(str(c) for c in cli_obj.console.print.call_args_list) + assert "Unknown command" in printed + + def test_exact_command_still_works(self): + """/help should still work as exact match.""" + cli_obj = _make_cli() + with patch.object(cli_obj, 'show_help') as mock_help: + cli_obj.process_command("/help") + mock_help.assert_called_once() + + def test_skill_command_prefix_matches(self): + """A prefix that uniquely matches a skill command should dispatch it.""" + cli_obj = _make_cli() + fake_skill = {"/test-skill-xyz": {"name": "Test Skill", "description": "test"}} + printed = [] + cli_obj.console.print = lambda *a, **kw: printed.append(str(a)) + + import cli as cli_mod + with patch.object(cli_mod, '_skill_commands', fake_skill): + cli_obj.process_command("/test-skill-xy") + + # Should NOT show "Unknown command" — should have dispatched or attempted skill + unknown = any("Unknown command" in p for p in printed) + assert not unknown, f"Expected skill prefix to match, got: {printed}" + + def test_ambiguous_between_builtin_and_skill(self): + """Ambiguous prefix spanning builtin + skill commands shows suggestions.""" + cli_obj = _make_cli() + # /help-extra is a fake skill that shares /hel prefix with /help + fake_skill = {"/help-extra": {"name": "Help Extra", "description": "test"}} + + import cli as cli_mod + with patch.object(cli_mod, '_skill_commands', fake_skill), patch.object(cli_obj, 'show_help') as mock_help: + cli_obj.process_command("/help") + + # /help is an exact match so should work normally, not show ambiguous + mock_help.assert_called_once() + printed = " ".join(str(c) for c in cli_obj.console.print.call_args_list) + assert "Ambiguous" not in printed diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 8c22dd7a..ffc5752f 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -186,6 +186,11 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch): monkeypatch.delenv("LLM_MODEL", raising=False) monkeypatch.delenv("OPENAI_MODEL", raising=False) + # Ensure local user config does not leak a model into the test + monkeypatch.setitem(cli.CLI_CONFIG, "model", { + "default": "", + "base_url": "https://openrouter.ai/api/v1", + }) def _runtime_resolve(**kwargs): return { @@ -240,6 +245,11 @@ def test_codex_provider_uses_config_model(monkeypatch): monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + # Prevent live API call from overriding the config model + monkeypatch.setattr( + "hermes_cli.codex_models.get_codex_model_ids", + lambda access_token=None: ["gpt-5.2-codex"], + ) shell = cli.HermesCLI(compact=True, max_turns=1) diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py index 520205df..a53c716a 100644 --- a/tests/test_runtime_provider_resolution.py +++ b/tests/test_runtime_provider_resolution.py @@ -150,7 +150,7 @@ def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch): monkeypatch.setenv("OPENAI_BASE_URL", "https://my-vllm-server.example.com/v1") monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) monkeypatch.setenv("OPENAI_API_KEY", "sk-vllm-key") - monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-should-not-leak") + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-...leak") resolved = rp.resolve_runtime_provider(requested="auto") @@ -158,6 +158,107 @@ def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch): assert resolved["api_key"] == "sk-vllm-key" +def test_named_custom_provider_uses_saved_credentials(monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.setattr( + rp, + "load_config", + lambda: { + "custom_providers": [ + { + "name": "Local", + "base_url": "http://1.2.3.4:1234/v1", + "api_key": "local-provider-key", + } + ] + }, + ) + monkeypatch.setattr( + rp, + "resolve_provider", + lambda *a, **k: (_ for _ in ()).throw( + AssertionError( + "resolve_provider should not be called for named custom providers" + ) + ), + ) + + resolved = rp.resolve_runtime_provider(requested="local") + + assert resolved["provider"] == "openrouter" + assert resolved["api_mode"] == "chat_completions" + assert resolved["base_url"] == "http://1.2.3.4:1234/v1" + assert resolved["api_key"] == "local-provider-key" + assert resolved["requested_provider"] == "local" + assert resolved["source"] == "custom_provider:Local" + + +def test_named_custom_provider_falls_back_to_openai_api_key(monkeypatch): + monkeypatch.setenv("OPENAI_API_KEY", "env-openai-key") + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.setattr( + rp, + "load_config", + lambda: { + "custom_providers": [ + { + "name": "Local LLM", + "base_url": "http://localhost:1234/v1", + } + ] + }, + ) + monkeypatch.setattr( + rp, + "resolve_provider", + lambda *a, **k: (_ for _ in ()).throw( + AssertionError( + "resolve_provider should not be called for named custom providers" + ) + ), + ) + + resolved = rp.resolve_runtime_provider(requested="custom:local-llm") + + assert resolved["base_url"] == "http://localhost:1234/v1" + assert resolved["api_key"] == "env-openai-key" + assert resolved["requested_provider"] == "custom:local-llm" + + +def test_named_custom_provider_does_not_shadow_builtin_provider(monkeypatch): + monkeypatch.setattr( + rp, + "load_config", + lambda: { + "custom_providers": [ + { + "name": "nous", + "base_url": "http://localhost:1234/v1", + "api_key": "shadow-key", + } + ] + }, + ) + monkeypatch.setattr( + rp, + "resolve_nous_runtime_credentials", + lambda **kwargs: { + "base_url": "https://inference-api.nousresearch.com/v1", + "api_key": "nous-runtime-key", + "source": "portal", + "expires_at": None, + }, + ) + + resolved = rp.resolve_runtime_provider(requested="nous") + + assert resolved["provider"] == "nous" + assert resolved["base_url"] == "https://inference-api.nousresearch.com/v1" + assert resolved["api_key"] == "nous-runtime-key" + assert resolved["requested_provider"] == "nous" + + def test_explicit_openrouter_skips_openai_base_url(monkeypatch): """When the user explicitly requests openrouter, OPENAI_BASE_URL (which may point to a custom endpoint) must not override the diff --git a/tests/test_timezone.py b/tests/test_timezone.py index 9902817d..728d47dd 100644 --- a/tests/test_timezone.py +++ b/tests/test_timezone.py @@ -328,6 +328,34 @@ class TestCronTimezone: "Overdue job was skipped — _ensure_aware likely shifted absolute time" ) + def test_get_due_jobs_naive_cross_timezone(self, tmp_path, monkeypatch): + """Naive past timestamps must be detected as due even when Hermes tz + is behind system local tz — the scenario that triggered #806.""" + import cron.jobs as jobs_module + monkeypatch.setattr(jobs_module, "CRON_DIR", tmp_path / "cron") + monkeypatch.setattr(jobs_module, "JOBS_FILE", tmp_path / "cron" / "jobs.json") + monkeypatch.setattr(jobs_module, "OUTPUT_DIR", tmp_path / "cron" / "output") + + # Use a Hermes timezone far behind UTC so that the numeric wall time + # of the naive timestamp exceeds _hermes_now's wall time — this would + # have caused a false "not due" with the old replace(tzinfo=...) approach. + os.environ["HERMES_TIMEZONE"] = "Pacific/Midway" # UTC-11 + hermes_time.reset_cache() + + from cron.jobs import create_job, load_jobs, save_jobs, get_due_jobs + create_job(prompt="Cross-tz job", schedule="every 1h") + jobs = load_jobs() + + # Force a naive past timestamp (system-local wall time, 10 min ago) + naive_past = (datetime.now() - timedelta(minutes=10)).isoformat() + jobs[0]["next_run_at"] = naive_past + save_jobs(jobs) + + due = get_due_jobs() + assert len(due) == 1, ( + "Naive past timestamp should be due regardless of Hermes timezone" + ) + def test_create_job_stores_tz_aware_timestamps(self, tmp_path, monkeypatch): """New jobs store timezone-aware created_at and next_run_at.""" import cron.jobs as jobs_module diff --git a/tests/test_trajectory_compressor.py b/tests/test_trajectory_compressor.py index 75fbd5a2..c95a3af9 100644 --- a/tests/test_trajectory_compressor.py +++ b/tests/test_trajectory_compressor.py @@ -1,7 +1,10 @@ """Tests for trajectory_compressor.py — config, metrics, and compression logic.""" import json -from unittest.mock import patch, MagicMock +from types import SimpleNamespace +from unittest.mock import AsyncMock, patch, MagicMock + +import pytest from trajectory_compressor import ( CompressionConfig, @@ -384,3 +387,32 @@ class TestTokenCounting: tc.tokenizer.encode = MagicMock(side_effect=Exception("fail")) # Should fallback to len(text) // 4 assert tc.count_tokens("12345678") == 2 + + +class TestGenerateSummary: + def test_generate_summary_handles_none_content(self): + tc = _make_compressor() + tc.client = MagicMock() + tc.client.chat.completions.create.return_value = SimpleNamespace( + choices=[SimpleNamespace(message=SimpleNamespace(content=None))] + ) + metrics = TrajectoryMetrics() + + summary = tc._generate_summary("Turn content", metrics) + + assert summary == "[CONTEXT SUMMARY]:" + + @pytest.mark.asyncio + async def test_generate_summary_async_handles_none_content(self): + tc = _make_compressor() + tc.async_client = MagicMock() + tc.async_client.chat.completions.create = AsyncMock( + return_value=SimpleNamespace( + choices=[SimpleNamespace(message=SimpleNamespace(content=None))] + ) + ) + metrics = TrajectoryMetrics() + + summary = await tc._generate_summary_async("Turn content", metrics) + + assert summary == "[CONTEXT SUMMARY]:" diff --git a/tests/tools/test_browser_cleanup.py b/tests/tools/test_browser_cleanup.py new file mode 100644 index 00000000..9dfabe64 --- /dev/null +++ b/tests/tools/test_browser_cleanup.py @@ -0,0 +1,96 @@ +"""Regression tests for browser session cleanup and screenshot recovery.""" + +from unittest.mock import patch + + +class TestScreenshotPathRecovery: + def test_extracts_standard_absolute_path(self): + from tools.browser_tool import _extract_screenshot_path_from_text + + assert ( + _extract_screenshot_path_from_text("Screenshot saved to /tmp/foo.png") + == "/tmp/foo.png" + ) + + def test_extracts_quoted_absolute_path(self): + from tools.browser_tool import _extract_screenshot_path_from_text + + assert ( + _extract_screenshot_path_from_text( + "Screenshot saved to '/Users/david/.hermes/browser_screenshots/shot.png'" + ) + == "/Users/david/.hermes/browser_screenshots/shot.png" + ) + + +class TestBrowserCleanup: + def setup_method(self): + from tools import browser_tool + + self.browser_tool = browser_tool + self.orig_active_sessions = browser_tool._active_sessions.copy() + self.orig_session_last_activity = browser_tool._session_last_activity.copy() + self.orig_recording_sessions = browser_tool._recording_sessions.copy() + self.orig_cleanup_done = browser_tool._cleanup_done + + def teardown_method(self): + self.browser_tool._active_sessions.clear() + self.browser_tool._active_sessions.update(self.orig_active_sessions) + self.browser_tool._session_last_activity.clear() + self.browser_tool._session_last_activity.update(self.orig_session_last_activity) + self.browser_tool._recording_sessions.clear() + self.browser_tool._recording_sessions.update(self.orig_recording_sessions) + self.browser_tool._cleanup_done = self.orig_cleanup_done + + def test_cleanup_browser_clears_tracking_state(self): + browser_tool = self.browser_tool + browser_tool._active_sessions["task-1"] = { + "session_name": "sess-1", + "bb_session_id": None, + } + browser_tool._session_last_activity["task-1"] = 123.0 + + with ( + patch("tools.browser_tool._maybe_stop_recording") as mock_stop, + patch( + "tools.browser_tool._run_browser_command", + return_value={"success": True}, + ) as mock_run, + patch("tools.browser_tool.os.path.exists", return_value=False), + ): + browser_tool.cleanup_browser("task-1") + + assert "task-1" not in browser_tool._active_sessions + assert "task-1" not in browser_tool._session_last_activity + mock_stop.assert_called_once_with("task-1") + mock_run.assert_called_once_with("task-1", "close", [], timeout=10) + + def test_browser_close_delegates_to_cleanup_browser(self): + import json + + browser_tool = self.browser_tool + browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"} + + with patch("tools.browser_tool.cleanup_browser") as mock_cleanup: + result = json.loads(browser_tool.browser_close("task-2")) + + assert result == {"success": True, "closed": True} + mock_cleanup.assert_called_once_with("task-2") + + def test_emergency_cleanup_clears_all_tracking_state(self): + browser_tool = self.browser_tool + browser_tool._cleanup_done = False + browser_tool._active_sessions["task-1"] = {"session_name": "sess-1"} + browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"} + browser_tool._session_last_activity["task-1"] = 1.0 + browser_tool._session_last_activity["task-2"] = 2.0 + browser_tool._recording_sessions.update({"task-1", "task-2"}) + + with patch("tools.browser_tool.cleanup_all_browsers") as mock_cleanup_all: + browser_tool._emergency_cleanup_all_sessions() + + mock_cleanup_all.assert_called_once_with() + assert browser_tool._active_sessions == {} + assert browser_tool._session_last_activity == {} + assert browser_tool._recording_sessions == set() + assert browser_tool._cleanup_done is True diff --git a/tests/tools/test_force_dangerous_override.py b/tests/tools/test_force_dangerous_override.py index ab9600f2..3a727bf1 100644 --- a/tests/tools/test_force_dangerous_override.py +++ b/tests/tools/test_force_dangerous_override.py @@ -1,11 +1,8 @@ -"""Tests for the --force flag dangerous verdict bypass fix in skills_guard.py. +"""Regression tests for skills guard policy precedence. -Regression test: the old code had `if result.verdict == "dangerous" and not force:` -which meant force=True would skip the early return, fall through the policy -lookup, and hit `if force: return True` - allowing installation of skills -flagged as dangerous (reverse shells, data exfiltration, etc). - -The docstring explicitly states: "never overrides dangerous". +Official/builtin skills should follow the INSTALL_POLICY table even when their +scan verdict is dangerous, and --force should override blocked verdicts for +non-builtin sources. """ @@ -44,10 +41,6 @@ def _new_should_allow(verdict, trust_level, force): } VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2} - # Fixed: no `and not force` - dangerous is always blocked - if verdict == "dangerous": - return False - policy = INSTALL_POLICY.get(trust_level, INSTALL_POLICY["community"]) vi = VERDICT_INDEX.get(verdict, 2) decision = policy[vi] @@ -61,35 +54,28 @@ def _new_should_allow(verdict, trust_level, force): return False -class TestForceNeverOverridesDangerous: - """The core bug: --force bypassed the dangerous verdict block.""" +class TestPolicyPrecedenceForDangerousVerdicts: + def test_builtin_dangerous_is_allowed_by_policy(self): + assert _new_should_allow("dangerous", "builtin", force=False) is True - def test_old_code_allows_dangerous_with_force(self): - """Old code: force=True lets dangerous skills through.""" - assert _old_should_allow("dangerous", "community", force=True) is True + def test_trusted_dangerous_is_blocked_without_force(self): + assert _new_should_allow("dangerous", "trusted", force=False) is False - def test_new_code_blocks_dangerous_with_force(self): - """Fixed code: force=True still blocks dangerous skills.""" - assert _new_should_allow("dangerous", "community", force=True) is False + def test_force_overrides_dangerous_for_community(self): + assert _new_should_allow("dangerous", "community", force=True) is True - def test_new_code_blocks_dangerous_trusted_with_force(self): - """Fixed code: even trusted + force cannot install dangerous.""" - assert _new_should_allow("dangerous", "trusted", force=True) is False + def test_force_overrides_dangerous_for_trusted(self): + assert _new_should_allow("dangerous", "trusted", force=True) is True def test_force_still_overrides_caution(self): - """force=True should still work for caution verdicts.""" assert _new_should_allow("caution", "community", force=True) is True def test_caution_community_blocked_without_force(self): - """Caution + community is blocked without force (unchanged).""" assert _new_should_allow("caution", "community", force=False) is False def test_safe_always_allowed(self): - """Safe verdict is always allowed regardless of force.""" assert _new_should_allow("safe", "community", force=False) is True assert _new_should_allow("safe", "community", force=True) is True - def test_dangerous_blocked_without_force(self): - """Dangerous is blocked without force (both old and new agree).""" - assert _old_should_allow("dangerous", "community", force=False) is False - assert _new_should_allow("dangerous", "community", force=False) is False + def test_old_code_happened_to_allow_forced_dangerous_community(self): + assert _old_should_allow("dangerous", "community", force=True) is True diff --git a/tests/tools/test_memory_tool.py b/tests/tools/test_memory_tool.py index 0ed3b12e..48cb6a83 100644 --- a/tests/tools/test_memory_tool.py +++ b/tests/tools/test_memory_tool.py @@ -9,9 +9,24 @@ from tools.memory_tool import ( memory_tool, _scan_memory_content, ENTRY_DELIMITER, + MEMORY_SCHEMA, ) +# ========================================================================= +# Tool schema guidance +# ========================================================================= + +class TestMemorySchema: + def test_discourages_diary_style_task_logs(self): + description = MEMORY_SCHEMA["description"] + assert "Do NOT save task progress" in description + assert "session_search" in description + assert "like a diary" not in description + assert "temporary task state" in description + assert ">80%" not in description + + # ========================================================================= # Security scanning # ========================================================================= diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py index 3ad44f0f..d5599894 100644 --- a/tests/tools/test_send_message_tool.py +++ b/tests/tools/test_send_message_tool.py @@ -2,6 +2,7 @@ import asyncio import json +import os import sys from pathlib import Path from types import SimpleNamespace @@ -29,6 +30,118 @@ def _install_telegram_mock(monkeypatch, bot): class TestSendMessageTool: + def test_cron_duplicate_target_is_skipped_and_explained(self): + home = SimpleNamespace(chat_id="-1001") + config, _telegram_cfg = _make_config() + config.get_home_channel = lambda _platform: home + + with patch.dict( + os.environ, + { + "HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram", + "HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001", + }, + clear=False, + ), \ + patch("gateway.config.load_gateway_config", return_value=config), \ + patch("tools.interrupt.is_interrupted", return_value=False), \ + patch("model_tools._run_async", side_effect=_run_async_immediately), \ + patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \ + patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock: + result = json.loads( + send_message_tool( + { + "action": "send", + "target": "telegram", + "message": "hello", + } + ) + ) + + assert result["success"] is True + assert result["skipped"] is True + assert result["reason"] == "cron_auto_delivery_duplicate_target" + assert "final response" in result["note"] + send_mock.assert_not_awaited() + mirror_mock.assert_not_called() + + def test_cron_different_target_still_sends(self): + config, telegram_cfg = _make_config() + + with patch.dict( + os.environ, + { + "HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram", + "HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001", + }, + clear=False, + ), \ + patch("gateway.config.load_gateway_config", return_value=config), \ + patch("tools.interrupt.is_interrupted", return_value=False), \ + patch("model_tools._run_async", side_effect=_run_async_immediately), \ + patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \ + patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock: + result = json.loads( + send_message_tool( + { + "action": "send", + "target": "telegram:-1002", + "message": "hello", + } + ) + ) + + assert result["success"] is True + assert result.get("skipped") is not True + send_mock.assert_awaited_once_with( + Platform.TELEGRAM, + telegram_cfg, + "-1002", + "hello", + thread_id=None, + media_files=[], + ) + mirror_mock.assert_called_once_with("telegram", "-1002", "hello", source_label="cli", thread_id=None) + + def test_cron_same_chat_different_thread_still_sends(self): + config, telegram_cfg = _make_config() + + with patch.dict( + os.environ, + { + "HERMES_CRON_AUTO_DELIVER_PLATFORM": "telegram", + "HERMES_CRON_AUTO_DELIVER_CHAT_ID": "-1001", + "HERMES_CRON_AUTO_DELIVER_THREAD_ID": "17585", + }, + clear=False, + ), \ + patch("gateway.config.load_gateway_config", return_value=config), \ + patch("tools.interrupt.is_interrupted", return_value=False), \ + patch("model_tools._run_async", side_effect=_run_async_immediately), \ + patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \ + patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock: + result = json.loads( + send_message_tool( + { + "action": "send", + "target": "telegram:-1001:99999", + "message": "hello", + } + ) + ) + + assert result["success"] is True + assert result.get("skipped") is not True + send_mock.assert_awaited_once_with( + Platform.TELEGRAM, + telegram_cfg, + "-1001", + "hello", + thread_id="99999", + media_files=[], + ) + mirror_mock.assert_called_once_with("telegram", "-1001", "hello", source_label="cli", thread_id="99999") + def test_sends_to_explicit_telegram_topic_target(self): config, telegram_cfg = _make_config() diff --git a/tests/tools/test_session_search.py b/tests/tools/test_session_search.py index c3624714..0d741476 100644 --- a/tests/tools/test_session_search.py +++ b/tests/tools/test_session_search.py @@ -9,9 +9,21 @@ from tools.session_search_tool import ( _format_conversation, _truncate_around_matches, MAX_SESSION_CHARS, + SESSION_SEARCH_SCHEMA, ) +# ========================================================================= +# Tool schema guidance +# ========================================================================= + +class TestSessionSearchSchema: + def test_keeps_cross_session_recall_guidance_without_current_session_nudge(self): + description = SESSION_SEARCH_SCHEMA["description"] + assert "past conversations" in description + assert "recent turns of the current session" not in description + + # ========================================================================= # _format_timestamp # ========================================================================= diff --git a/tests/tools/test_skills_guard.py b/tests/tools/test_skills_guard.py index 70eb9fc6..7bcf55e8 100644 --- a/tests/tools/test_skills_guard.py +++ b/tests/tools/test_skills_guard.py @@ -46,9 +46,9 @@ from tools.skills_guard import ( class TestResolveTrustLevel: - def test_builtin_not_exposed(self): - # builtin is only used internally, not resolved from source string - assert _resolve_trust_level("openai/skills") == "trusted" + def test_official_sources_resolve_to_builtin(self): + assert _resolve_trust_level("official") == "builtin" + assert _resolve_trust_level("official/email/agentmail") == "builtin" def test_trusted_repos(self): assert _resolve_trust_level("openai/skills") == "trusted" @@ -116,11 +116,17 @@ class TestShouldAllowInstall: allowed, _ = should_allow_install(self._result("trusted", "caution", f)) assert allowed is True - def test_dangerous_blocked_even_trusted(self): + def test_trusted_dangerous_blocked_without_force(self): f = [Finding("x", "critical", "c", "f", 1, "m", "d")] allowed, _ = should_allow_install(self._result("trusted", "dangerous", f)) assert allowed is False + def test_builtin_dangerous_allowed_without_force(self): + f = [Finding("x", "critical", "c", "f", 1, "m", "d")] + allowed, reason = should_allow_install(self._result("builtin", "dangerous", f)) + assert allowed is True + assert "builtin source" in reason + def test_force_overrides_caution(self): f = [Finding("x", "high", "c", "f", 1, "m", "d")] allowed, reason = should_allow_install(self._result("community", "caution", f), force=True) @@ -132,22 +138,21 @@ class TestShouldAllowInstall: allowed, _ = should_allow_install(self._result("community", "dangerous", f), force=False) assert allowed is False - def test_force_never_overrides_dangerous(self): - """--force must not bypass dangerous verdict (regression test).""" + def test_force_overrides_dangerous_for_community(self): f = [Finding("x", "critical", "c", "f", 1, "m", "d")] allowed, reason = should_allow_install( self._result("community", "dangerous", f), force=True ) - assert allowed is False - assert "DANGEROUS" in reason + assert allowed is True + assert "Force-installed" in reason - def test_force_never_overrides_dangerous_trusted(self): - """--force must not bypass dangerous even for trusted sources.""" + def test_force_overrides_dangerous_for_trusted(self): f = [Finding("x", "critical", "c", "f", 1, "m", "d")] - allowed, _ = should_allow_install( + allowed, reason = should_allow_install( self._result("trusted", "dangerous", f), force=True ) - assert allowed is False + assert allowed is True + assert "Force-installed" in reason # --------------------------------------------------------------------------- diff --git a/tools/browser_tool.py b/tools/browser_tool.py index b3516c4f..ecdff753 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -53,6 +53,7 @@ import atexit import json import logging import os +import re import signal import subprocess import shutil @@ -165,63 +166,18 @@ def _emergency_cleanup_all_sessions(): if not _active_sessions: return - logger.info("Emergency cleanup: closing %s active session(s)...", len(_active_sessions)) - + logger.info("Emergency cleanup: closing %s active session(s)...", + len(_active_sessions)) + try: - if _is_local_mode(): - # Local mode: just close agent-browser sessions via CLI - for task_id, session_info in list(_active_sessions.items()): - session_name = session_info.get("session_name") - if session_name: - try: - browser_cmd = _find_agent_browser() - task_socket_dir = os.path.join( - _socket_safe_tmpdir(), - f"agent-browser-{session_name}" - ) - env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir} - subprocess.run( - browser_cmd.split() + ["--session", session_name, "--json", "close"], - capture_output=True, timeout=5, env=env, - ) - logger.info("Closed local session %s", session_name) - except Exception as e: - logger.debug("Error closing local session %s: %s", session_name, e) - else: - # Cloud mode: release Browserbase sessions via API - api_key = os.environ.get("BROWSERBASE_API_KEY") - project_id = os.environ.get("BROWSERBASE_PROJECT_ID") - - if not api_key or not project_id: - logger.warning("Cannot cleanup - missing BROWSERBASE credentials") - return - - for task_id, session_info in list(_active_sessions.items()): - bb_session_id = session_info.get("bb_session_id") - if bb_session_id: - try: - response = requests.post( - f"https://api.browserbase.com/v1/sessions/{bb_session_id}", - headers={ - "X-BB-API-Key": api_key, - "Content-Type": "application/json" - }, - json={ - "projectId": project_id, - "status": "REQUEST_RELEASE" - }, - timeout=5 # Short timeout for cleanup - ) - if response.status_code in (200, 201, 204): - logger.info("Closed session %s", bb_session_id) - else: - logger.warning("Failed to close session %s: HTTP %s", bb_session_id, response.status_code) - except Exception as e: - logger.error("Error closing session %s: %s", bb_session_id, e) - - _active_sessions.clear() + cleanup_all_browsers() except Exception as e: logger.error("Emergency cleanup error: %s", e) + finally: + with _cleanup_lock: + _active_sessions.clear() + _session_last_activity.clear() + _recording_sessions.clear() # Register cleanup via atexit only. Previous versions installed SIGINT/SIGTERM @@ -640,18 +596,14 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]: def _create_local_session(task_id: str) -> Dict[str, str]: - """Create a lightweight local browser session (no cloud API call). - - Returns the same dict shape as ``_create_browserbase_session`` so the rest - of the code can treat both modes uniformly. - """ import uuid - session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}" - logger.info("Created local browser session %s", session_name) + session_name = f"h_{uuid.uuid4().hex[:10]}" + logger.info("Created local browser session %s for task %s", + session_name, task_id) return { "session_name": session_name, - "bb_session_id": None, # Not applicable in local mode - "cdp_url": None, # Not applicable in local mode + "bb_session_id": None, + "cdp_url": None, "features": {"local": True}, } @@ -772,6 +724,27 @@ def _find_agent_browser() -> str: ) +def _extract_screenshot_path_from_text(text: str) -> Optional[str]: + """Extract a screenshot file path from agent-browser human-readable output.""" + if not text: + return None + + patterns = [ + r"Screenshot saved to ['\"](?P/[^'\"]+?\.png)['\"]", + r"Screenshot saved to (?P/\S+?\.png)(?:\s|$)", + r"(?P/\S+?\.png)(?:\s|$)", + ] + + for pattern in patterns: + match = re.search(pattern, text) + if match: + path = match.group("path").strip().strip("'\"") + if path: + return path + + return None + + def _run_browser_command( task_id: str, command: str, @@ -841,9 +814,20 @@ def _run_browser_command( command, task_id, task_socket_dir, len(task_socket_dir)) browser_env = {**os.environ} - # Ensure PATH includes standard dirs (systemd services may have minimal PATH) - if "/usr/bin" not in browser_env.get("PATH", "").split(":"): - browser_env["PATH"] = f"{browser_env.get('PATH', '')}:{_SANE_PATH}" + + # Ensure PATH includes Hermes-managed Node first, then standard system dirs. + hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) + hermes_node_bin = str(hermes_home / "node" / "bin") + + existing_path = browser_env.get("PATH", "") + path_parts = [p for p in existing_path.split(":") if p] + candidate_dirs = [hermes_node_bin] + [p for p in _SANE_PATH.split(":") if p] + + for part in reversed(candidate_dirs): + if os.path.isdir(part) and part not in path_parts: + path_parts.insert(0, part) + + browser_env["PATH"] = ":".join(path_parts) browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir result = subprocess.run( @@ -866,10 +850,11 @@ def _run_browser_command( command, " ".join(cmd_parts[:4]) + "...", (result.stderr or "")[:200]) - # Parse JSON output - if result.stdout.strip(): + stdout_text = result.stdout.strip() + + if stdout_text: try: - parsed = json.loads(result.stdout.strip()) + parsed = json.loads(stdout_text) # Warn if snapshot came back empty (common sign of daemon/CDP issues) if command == "snapshot" and parsed.get("success"): snap_data = parsed.get("data", {}) @@ -879,13 +864,33 @@ def _run_browser_command( "returncode=%s", result.returncode) return parsed except json.JSONDecodeError: - # Non-JSON output indicates agent-browser crash or version mismatch - raw = result.stdout.strip()[:500] + raw = stdout_text[:2000] logger.warning("browser '%s' returned non-JSON output (rc=%s): %s", - command, result.returncode, raw[:200]) + command, result.returncode, raw[:500]) + + if command == "screenshot": + stderr_text = (result.stderr or "").strip() + combined_text = "\n".join( + part for part in [stdout_text, stderr_text] if part + ) + recovered_path = _extract_screenshot_path_from_text(combined_text) + + if recovered_path and Path(recovered_path).exists(): + logger.info( + "browser 'screenshot' recovered file from non-JSON output: %s", + recovered_path, + ) + return { + "success": True, + "data": { + "path": recovered_path, + "raw": raw, + }, + } + return { - "success": True, - "data": {"raw": raw} + "success": False, + "error": f"Non-JSON output from agent-browser for '{command}': {raw}" } # Check for errors @@ -1250,46 +1255,26 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str: def browser_close(task_id: Optional[str] = None) -> str: """ Close the browser session. - + Args: task_id: Task identifier for session isolation - + Returns: JSON string with close result """ effective_task_id = task_id or "default" - - # Stop auto-recording before closing - _maybe_stop_recording(effective_task_id) - - result = _run_browser_command(effective_task_id, "close", []) - - # Close the backend session (Browserbase API in cloud mode, nothing extra in local mode) - session_key = task_id if task_id and task_id in _active_sessions else "default" - if session_key in _active_sessions: - session_info = _active_sessions[session_key] - bb_session_id = session_info.get("bb_session_id") - if bb_session_id: - # Cloud mode: release the Browserbase session via API - try: - config = _get_browserbase_config() - _close_browserbase_session(bb_session_id, config["api_key"], config["project_id"]) - except Exception as e: - logger.warning("Could not close BrowserBase session: %s", e) - del _active_sessions[session_key] - - if result.get("success"): - return json.dumps({ - "success": True, - "closed": True - }, ensure_ascii=False) - else: - # Even if close fails, session was released - return json.dumps({ - "success": True, - "closed": True, - "warning": result.get("error", "Session may not have been active") - }, ensure_ascii=False) + with _cleanup_lock: + had_session = effective_task_id in _active_sessions + + cleanup_browser(effective_task_id) + + response = { + "success": True, + "closed": True, + } + if not had_session: + response["warning"] = "Session may not have been active" + return json.dumps(response, ensure_ascii=False) def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str: @@ -1481,9 +1466,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] _cleanup_old_screenshots(screenshots_dir, max_age_hours=24) # Take screenshot using agent-browser - screenshot_args = [str(screenshot_path)] + screenshot_args = [] if annotate: - screenshot_args.insert(0, "--annotate") + screenshot_args.append("--annotate") + screenshot_args.append("--full") + screenshot_args.append(str(screenshot_path)) result = _run_browser_command( effective_task_id, "screenshot", @@ -1498,7 +1485,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] "success": False, "error": f"Failed to take screenshot ({mode} mode): {error_detail}" }, ensure_ascii=False) - + + actual_screenshot_path = result.get("data", {}).get("path") + if actual_screenshot_path: + screenshot_path = Path(actual_screenshot_path) + # Check if screenshot file was created if not screenshot_path.exists(): mode = "local" if _is_local_mode() else "cloud" diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index 219cf6f9..124223c7 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -304,6 +304,12 @@ Jobs run in a fresh session with no current-chat context, so prompts must be sel If skill or skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction. On update, passing skills=[] clears attached skills. +NOTE: The agent's final response is auto-delivered to the target — do NOT use +send_message in the prompt for that same destination. Same-target send_message +calls are skipped to avoid duplicate cron deliveries. Put the primary +user-facing content in the final response, and use send_message only for +additional or different targets. + Important safety rule: cron-run sessions should not recursively schedule more cron jobs.""", "parameters": { "type": "object", diff --git a/tools/memory_tool.py b/tools/memory_tool.py index 2ce76312..b921a84f 100644 --- a/tools/memory_tool.py +++ b/tools/memory_tool.py @@ -435,24 +435,25 @@ def check_memory_requirements() -> bool: MEMORY_SCHEMA = { "name": "memory", "description": ( - "Save important information to persistent memory that survives across sessions. " - "Your memory appears in your system prompt at session start -- it's how you " - "remember things about the user and your environment between conversations.\n\n" + "Save durable information to persistent memory that survives across sessions. " + "Memory is injected into future turns, so keep it compact and focused on facts " + "that will still matter later.\n\n" "WHEN TO SAVE (do this proactively, don't wait to be asked):\n" "- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n" "- You discover something about the environment (OS, installed tools, project structure)\n" "- User corrects you or says 'remember this' / 'don't do that again'\n" "- You learn a convention, API quirk, or workflow specific to this user's setup\n" - "- You completed something - log it like a diary entry\n" - "- After completing a complex task, save a brief note about what was done\n\n" - "- If you've discovered a new way to do something, solved a problem that could be necessary later, save it as a skill with the skill tool\n\n" + "- You identify a stable fact that will be useful again in future sessions\n\n" + "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO " + "state to memory; use session_search to recall those from past transcripts.\n" + "If you've discovered a new way to do something, solved a problem that could be " + "necessary later, save it as a skill with the skill tool.\n\n" "TWO TARGETS:\n" "- 'user': who the user is -- name, role, preferences, communication style, pet peeves\n" "- 'memory': your notes -- environment facts, project conventions, tool quirks, lessons learned\n\n" "ACTIONS: add (new entry), replace (update existing -- old_text identifies it), " - "remove (delete -- old_text identifies it).\n" - "Capacity shown in system prompt. When >80%, consolidate entries before adding new ones.\n\n" - "SKIP: trivial/obvious info, things easily re-discovered, raw data dumps." + "remove (delete -- old_text identifies it).\n\n" + "SKIP: trivial/obvious info, things easily re-discovered, raw data dumps, and temporary task state." ), "parameters": { "type": "object", diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index 537f6335..6a7260fd 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -153,6 +153,10 @@ def _handle_send(args): f"or set a home channel via: hermes config set {platform_name.upper()}_HOME_CHANNEL " }) + duplicate_skip = _maybe_skip_cron_duplicate_send(platform_name, chat_id, thread_id) + if duplicate_skip: + return json.dumps(duplicate_skip) + try: from model_tools import _run_async result = _run_async( @@ -213,6 +217,51 @@ def _describe_media_for_mirror(media_files): return f"[Sent {len(media_files)} media attachments]" +def _get_cron_auto_delivery_target(): + """Return the cron scheduler's auto-delivery target for the current run, if any.""" + platform = os.getenv("HERMES_CRON_AUTO_DELIVER_PLATFORM", "").strip().lower() + chat_id = os.getenv("HERMES_CRON_AUTO_DELIVER_CHAT_ID", "").strip() + if not platform or not chat_id: + return None + thread_id = os.getenv("HERMES_CRON_AUTO_DELIVER_THREAD_ID", "").strip() or None + return { + "platform": platform, + "chat_id": chat_id, + "thread_id": thread_id, + } + + +def _maybe_skip_cron_duplicate_send(platform_name: str, chat_id: str, thread_id: str | None): + """Skip redundant cron send_message calls when the scheduler will auto-deliver there.""" + auto_target = _get_cron_auto_delivery_target() + if not auto_target: + return None + + same_target = ( + auto_target["platform"] == platform_name + and str(auto_target["chat_id"]) == str(chat_id) + and auto_target.get("thread_id") == thread_id + ) + if not same_target: + return None + + target_label = f"{platform_name}:{chat_id}" + if thread_id is not None: + target_label += f":{thread_id}" + + return { + "success": True, + "skipped": True, + "reason": "cron_auto_delivery_duplicate_target", + "target": target_label, + "note": ( + f"Skipped send_message to {target_label}. This cron job will already auto-deliver " + "its final response to that same target. Put the intended user-facing content in " + "your final response instead, or use a different target if you want an additional message." + ), + } + + async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, media_files=None): """Route a message to the appropriate platform sender.""" from gateway.config import Platform diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index cd1b98fd..f4143fa1 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -341,8 +341,8 @@ SESSION_SEARCH_SCHEMA = { "- The user references a project, person, or concept that seems familiar but isn't in memory\n" "- You want to check if you've solved a similar problem before\n" "- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n" - "Don't hesitate to search -- it's fast and cheap. Better to search and confirm " - "than to guess or ask the user to repeat themselves.\n\n" + "Don't hesitate to search when it is actually cross-session -- it's fast and cheap. " + "Better to search and confirm than to guess or ask the user to repeat themselves.\n\n" "Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), " "phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). " "IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses " diff --git a/tools/skills_guard.py b/tools/skills_guard.py index c354d654..df62edbe 100644 --- a/tools/skills_guard.py +++ b/tools/skills_guard.py @@ -645,14 +645,11 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool, Args: result: Scan result from scan_skill() - force: If True, override blocks for caution verdicts (never overrides dangerous) + force: If True, override blocked policy decisions for this scan result Returns: (allowed, reason) tuple """ - if result.verdict == "dangerous": - return False, f"Scan verdict is DANGEROUS ({len(result.findings)} findings). Blocked." - policy = INSTALL_POLICY.get(result.trust_level, INSTALL_POLICY["community"]) vi = VERDICT_INDEX.get(result.verdict, 2) decision = policy[vi] @@ -661,7 +658,10 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool, return True, f"Allowed ({result.trust_level} source, {result.verdict} verdict)" if force: - return True, f"Force-installed despite {result.verdict} verdict ({len(result.findings)} findings)" + return True, ( + f"Force-installed despite blocked {result.verdict} verdict " + f"({len(result.findings)} findings)" + ) return False, ( f"Blocked ({result.trust_level} source + {result.verdict} verdict, " diff --git a/tools/vision_tools.py b/tools/vision_tools.py index c1b09a22..264532e9 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -354,6 +354,7 @@ async def vision_analyze_tool( # Prepare error response result = { "success": False, + "error": error_msg, "analysis": analysis, } diff --git a/trajectory_compressor.py b/trajectory_compressor.py index ef81d6e2..1bfed6bf 100644 --- a/trajectory_compressor.py +++ b/trajectory_compressor.py @@ -495,6 +495,21 @@ class TrajectoryCompressor: parts.append(f"[Turn {i} - {role.upper()}]:\n{value}") return "\n\n".join(parts) + + @staticmethod + def _coerce_summary_content(content: Any) -> str: + """Normalize summary-model output to a safe string.""" + if not isinstance(content, str): + content = str(content) if content else "" + return content.strip() + + @staticmethod + def _ensure_summary_prefix(summary: str) -> str: + """Normalize summary text to include the expected prefix exactly once.""" + text = (summary or "").strip() + if text.startswith("[CONTEXT SUMMARY]:"): + return text + return "[CONTEXT SUMMARY]:" if not text else f"[CONTEXT SUMMARY]: {text}" def _generate_summary(self, content: str, metrics: TrajectoryMetrics) -> str: """ @@ -545,13 +560,8 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" max_tokens=self.config.summary_target_tokens * 2, ) - summary = response.choices[0].message.content.strip() - - # Ensure it starts with the prefix - if not summary.startswith("[CONTEXT SUMMARY]:"): - summary = "[CONTEXT SUMMARY]: " + summary - - return summary + summary = self._coerce_summary_content(response.choices[0].message.content) + return self._ensure_summary_prefix(summary) except Exception as e: metrics.summarization_errors += 1 @@ -612,13 +622,8 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" max_tokens=self.config.summary_target_tokens * 2, ) - summary = response.choices[0].message.content.strip() - - # Ensure it starts with the prefix - if not summary.startswith("[CONTEXT SUMMARY]:"): - summary = "[CONTEXT SUMMARY]: " + summary - - return summary + summary = self._coerce_summary_content(response.choices[0].message.content) + return self._ensure_summary_prefix(summary) except Exception as e: metrics.summarization_errors += 1 diff --git a/website/docs/developer-guide/adding-providers.md b/website/docs/developer-guide/adding-providers.md new file mode 100644 index 00000000..7b4695dc --- /dev/null +++ b/website/docs/developer-guide/adding-providers.md @@ -0,0 +1,424 @@ +--- +sidebar_position: 5 +title: "Adding Providers" +description: "How to add a new inference provider to Hermes Agent — auth, runtime resolution, CLI flows, adapters, tests, and docs" +--- + +# Adding Providers + +Hermes can already talk to any OpenAI-compatible endpoint through the custom provider path. Do not add a built-in provider unless you want first-class UX for that service: + +- provider-specific auth or token refresh +- a curated model catalog +- setup / `hermes model` menu entries +- provider aliases for `provider:model` syntax +- a non-OpenAI API shape that needs an adapter + +If the provider is just "another OpenAI-compatible base URL and API key", a named custom provider may be enough. + +## The mental model + +A built-in provider has to line up across a few layers: + +1. `hermes_cli/auth.py` decides how credentials are found. +2. `hermes_cli/runtime_provider.py` turns that into runtime data: + - `provider` + - `api_mode` + - `base_url` + - `api_key` + - `source` +3. `run_agent.py` uses `api_mode` to decide how requests are built and sent. +4. `hermes_cli/models.py`, `hermes_cli/main.py`, and `hermes_cli/setup.py` make the provider show up in the CLI. +5. `agent/auxiliary_client.py` and `agent/model_metadata.py` keep side tasks and token budgeting working. + +The important abstraction is `api_mode`. + +- Most providers use `chat_completions`. +- Codex uses `codex_responses`. +- Anthropic uses `anthropic_messages`. +- A new non-OpenAI protocol usually means adding a new adapter and a new `api_mode` branch. + +## Choose the implementation path first + +### Path A — OpenAI-compatible provider + +Use this when the provider accepts standard chat-completions style requests. + +Typical work: + +- add auth metadata +- add model catalog / aliases +- add runtime resolution +- add CLI menu wiring +- add aux-model defaults +- add tests and user docs + +You usually do not need a new adapter or a new `api_mode`. + +### Path B — Native provider + +Use this when the provider does not behave like OpenAI chat completions. + +Examples in-tree today: + +- `codex_responses` +- `anthropic_messages` + +This path includes everything from Path A plus: + +- a provider adapter in `agent/` +- `run_agent.py` branches for request building, dispatch, usage extraction, interrupt handling, and response normalization +- adapter tests + +## File checklist + +### Required for every built-in provider + +1. `hermes_cli/auth.py` +2. `hermes_cli/models.py` +3. `hermes_cli/runtime_provider.py` +4. `hermes_cli/main.py` +5. `hermes_cli/setup.py` +6. `agent/auxiliary_client.py` +7. `agent/model_metadata.py` +8. tests +9. user-facing docs under `website/docs/` + +### Additional for native / non-OpenAI providers + +10. `agent/_adapter.py` +11. `run_agent.py` +12. `pyproject.toml` if a provider SDK is required + +## Step 1: Pick one canonical provider id + +Choose a single provider id and use it everywhere. + +Examples from the repo: + +- `openai-codex` +- `kimi-coding` +- `minimax-cn` + +That same id should appear in: + +- `PROVIDER_REGISTRY` in `hermes_cli/auth.py` +- `_PROVIDER_LABELS` in `hermes_cli/models.py` +- `_PROVIDER_ALIASES` in both `hermes_cli/auth.py` and `hermes_cli/models.py` +- CLI `--provider` choices in `hermes_cli/main.py` +- setup / model selection branches +- auxiliary-model defaults +- tests + +If the id differs between those files, the provider will feel half-wired: auth may work while `/model`, setup, or runtime resolution silently misses it. + +## Step 2: Add auth metadata in `hermes_cli/auth.py` + +For API-key providers, add a `ProviderConfig` entry to `PROVIDER_REGISTRY` with: + +- `id` +- `name` +- `auth_type="api_key"` +- `inference_base_url` +- `api_key_env_vars` +- optional `base_url_env_var` + +Also add aliases to `_PROVIDER_ALIASES`. + +Use the existing providers as templates: + +- simple API-key path: Z.AI, MiniMax +- API-key path with endpoint detection: Kimi, Z.AI +- native token resolution: Anthropic +- OAuth / auth-store path: Nous, OpenAI Codex + +Questions to answer here: + +- What env vars should Hermes check, and in what priority order? +- Does the provider need base-URL overrides? +- Does it need endpoint probing or token refresh? +- What should the auth error say when credentials are missing? + +If the provider needs something more than "look up an API key", add a dedicated credential resolver instead of shoving logic into unrelated branches. + +## Step 3: Add model catalog and aliases in `hermes_cli/models.py` + +Update the provider catalog so the provider works in menus and in `provider:model` syntax. + +Typical edits: + +- `_PROVIDER_MODELS` +- `_PROVIDER_LABELS` +- `_PROVIDER_ALIASES` +- provider display order inside `list_available_providers()` +- `provider_model_ids()` if the provider supports a live `/models` fetch + +If the provider exposes a live model list, prefer that first and keep `_PROVIDER_MODELS` as the static fallback. + +This file is also what makes inputs like these work: + +```text +anthropic:claude-sonnet-4-6 +kimi:model-name +``` + +If aliases are missing here, the provider may authenticate correctly but still fail in `/model` parsing. + +## Step 4: Resolve runtime data in `hermes_cli/runtime_provider.py` + +`resolve_runtime_provider()` is the shared path used by CLI, gateway, cron, ACP, and helper clients. + +Add a branch that returns a dict with at least: + +```python +{ + "provider": "your-provider", + "api_mode": "chat_completions", # or your native mode + "base_url": "https://...", + "api_key": "...", + "source": "env|portal|auth-store|explicit", + "requested_provider": requested_provider, +} +``` + +If the provider is OpenAI-compatible, `api_mode` should usually stay `chat_completions`. + +Be careful with API-key precedence. Hermes already contains logic to avoid leaking an OpenRouter key to unrelated endpoints. A new provider should be equally explicit about which key goes to which base URL. + +## Step 5: Wire the CLI in `hermes_cli/main.py` and `hermes_cli/setup.py` + +A provider is not discoverable until it shows up in the interactive flows. + +Update: + +### `hermes_cli/main.py` + +- `provider_labels` +- provider dispatch inside the `model` command +- `--provider` argument choices +- login/logout choices if the provider supports those flows +- a `_model_flow_()` function, or reuse `_model_flow_api_key_provider()` if it fits + +### `hermes_cli/setup.py` + +- `provider_choices` +- auth branch for the provider +- model-selection branch +- any provider-specific explanatory text +- any place where a provider should be excluded from OpenRouter-only prompts or routing settings + +If you only update one of these files, `hermes model` and `hermes setup` will drift. + +## Step 6: Keep auxiliary calls working + +Two files matter here: + +### `agent/auxiliary_client.py` + +Add a cheap / fast default aux model to `_API_KEY_PROVIDER_AUX_MODELS` if this is a direct API-key provider. + +Auxiliary tasks include things like: + +- vision summarization +- web extraction summarization +- context compression summaries +- session-search summaries +- memory flushes + +If the provider has no sensible aux default, side tasks may fall back badly or use an expensive main model unexpectedly. + +### `agent/model_metadata.py` + +Add context lengths for the provider's models so token budgeting, compression thresholds, and limits stay sane. + +## Step 7: If the provider is native, add an adapter and `run_agent.py` support + +If the provider is not plain chat completions, isolate the provider-specific logic in `agent/_adapter.py`. + +Keep `run_agent.py` focused on orchestration. It should call adapter helpers, not hand-build provider payloads inline all over the file. + +A native provider usually needs work in these places: + +### New adapter file + +Typical responsibilities: + +- build the SDK / HTTP client +- resolve tokens +- convert OpenAI-style conversation messages to the provider's request format +- convert tool schemas if needed +- normalize provider responses back into what `run_agent.py` expects +- extract usage and finish-reason data + +### `run_agent.py` + +Search for `api_mode` and audit every switch point. At minimum, verify: + +- `__init__` chooses the new `api_mode` +- client construction works for the provider +- `_build_api_kwargs()` knows how to format requests +- `_api_call_with_interrupt()` dispatches to the right client call +- interrupt / client rebuild paths work +- response validation accepts the provider's shape +- finish-reason extraction is correct +- token-usage extraction is correct +- fallback-model activation can switch into the new provider cleanly +- summary-generation and memory-flush paths still work + +Also search `run_agent.py` for `self.client.`. Any code path that assumes the standard OpenAI client exists can break when a native provider uses a different client object or `self.client = None`. + +### Prompt caching and provider-specific request fields + +Prompt caching and provider-specific knobs are easy to regress. + +Examples already in-tree: + +- Anthropic has a native prompt-caching path +- OpenRouter gets provider-routing fields +- not every provider should receive every request-side option + +When you add a native provider, double-check that Hermes is only sending fields that provider actually understands. + +## Step 8: Tests + +At minimum, touch the tests that guard provider wiring. + +Common places: + +- `tests/test_runtime_provider_resolution.py` +- `tests/test_cli_provider_resolution.py` +- `tests/test_cli_model_command.py` +- `tests/test_setup_model_selection.py` +- `tests/test_provider_parity.py` +- `tests/test_run_agent.py` +- `tests/test__adapter.py` for a native provider + +For docs-only examples, the exact file set may differ. The point is to cover: + +- auth resolution +- CLI menu / provider selection +- runtime provider resolution +- agent execution path +- provider:model parsing +- any adapter-specific message conversion + +Run tests with xdist disabled: + +```bash +source .venv/bin/activate +python -m pytest tests/test_runtime_provider_resolution.py tests/test_cli_provider_resolution.py tests/test_cli_model_command.py tests/test_setup_model_selection.py -n0 -q +``` + +For deeper changes, run the full suite before pushing: + +```bash +source .venv/bin/activate +python -m pytest tests/ -n0 -q +``` + +## Step 9: Live verification + +After tests, run a real smoke test. + +```bash +source .venv/bin/activate +python -m hermes_cli.main chat -q "Say hello" --provider your-provider --model your-model +``` + +Also test the interactive flows if you changed menus: + +```bash +source .venv/bin/activate +python -m hermes_cli.main model +python -m hermes_cli.main setup +``` + +For native providers, verify at least one tool call too, not just a plain text response. + +## Step 10: Update user-facing docs + +If the provider is meant to ship as a first-class option, update the user docs too: + +- `website/docs/getting-started/quickstart.md` +- `website/docs/user-guide/configuration.md` +- `website/docs/reference/environment-variables.md` + +A developer can wire the provider perfectly and still leave users unable to discover the required env vars or setup flow. + +## OpenAI-compatible provider checklist + +Use this if the provider is standard chat completions. + +- [ ] `ProviderConfig` added in `hermes_cli/auth.py` +- [ ] aliases added in `hermes_cli/auth.py` and `hermes_cli/models.py` +- [ ] model catalog added in `hermes_cli/models.py` +- [ ] runtime branch added in `hermes_cli/runtime_provider.py` +- [ ] CLI wiring added in `hermes_cli/main.py` +- [ ] setup wiring added in `hermes_cli/setup.py` +- [ ] aux model added in `agent/auxiliary_client.py` +- [ ] context lengths added in `agent/model_metadata.py` +- [ ] runtime / CLI tests updated +- [ ] user docs updated + +## Native provider checklist + +Use this when the provider needs a new protocol path. + +- [ ] everything in the OpenAI-compatible checklist +- [ ] adapter added in `agent/_adapter.py` +- [ ] new `api_mode` supported in `run_agent.py` +- [ ] interrupt / rebuild path works +- [ ] usage and finish-reason extraction works +- [ ] fallback path works +- [ ] adapter tests added +- [ ] live smoke test passes + +## Common pitfalls + +### 1. Adding the provider to auth but not to model parsing + +That makes credentials resolve correctly while `/model` and `provider:model` inputs fail. + +### 2. Forgetting that `config["model"]` can be a string or a dict + +A lot of provider-selection code has to normalize both forms. + +### 3. Assuming a built-in provider is required + +If the service is just OpenAI-compatible, a custom provider may already solve the user problem with less maintenance. + +### 4. Forgetting auxiliary paths + +The main chat path can work while summarization, memory flushes, or vision helpers fail because aux routing was never updated. + +### 5. Native-provider branches hiding in `run_agent.py` + +Search for `api_mode` and `self.client.`. Do not assume the obvious request path is the only one. + +### 6. Sending OpenRouter-only knobs to other providers + +Fields like provider routing belong only on the providers that support them. + +### 7. Updating `hermes model` but not `hermes setup` + +Both flows need to know about the provider. + +## Good search targets while implementing + +If you are hunting for all the places a provider touches, search these symbols: + +- `PROVIDER_REGISTRY` +- `_PROVIDER_ALIASES` +- `_PROVIDER_MODELS` +- `resolve_runtime_provider` +- `_model_flow_` +- `provider_choices` +- `api_mode` +- `_API_KEY_PROVIDER_AUX_MODELS` +- `self.client.` + +## Related docs + +- [Provider Runtime Resolution](./provider-runtime.md) +- [Architecture](./architecture.md) +- [Contributing](./contributing.md) diff --git a/website/docs/developer-guide/architecture.md b/website/docs/developer-guide/architecture.md index 2ff14817..1fb9ff41 100644 --- a/website/docs/developer-guide/architecture.md +++ b/website/docs/developer-guide/architecture.md @@ -41,12 +41,13 @@ If you are new to the codebase, read in this order: 2. [Agent Loop Internals](./agent-loop.md) 3. [Prompt Assembly](./prompt-assembly.md) 4. [Provider Runtime Resolution](./provider-runtime.md) -5. [Tools Runtime](./tools-runtime.md) -6. [Session Storage](./session-storage.md) -7. [Gateway Internals](./gateway-internals.md) -8. [Context Compression & Prompt Caching](./context-compression-and-caching.md) -9. [ACP Internals](./acp-internals.md) -10. [Environments, Benchmarks & Data Generation](./environments.md) +5. [Adding Providers](./adding-providers.md) +6. [Tools Runtime](./tools-runtime.md) +7. [Session Storage](./session-storage.md) +8. [Gateway Internals](./gateway-internals.md) +9. [Context Compression & Prompt Caching](./context-compression-and-caching.md) +10. [ACP Internals](./acp-internals.md) +11. [Environments, Benchmarks & Data Generation](./environments.md) ## Major subsystems diff --git a/website/docs/developer-guide/contributing.md b/website/docs/developer-guide/contributing.md index f14ab9b4..5f653eae 100644 --- a/website/docs/developer-guide/contributing.md +++ b/website/docs/developer-guide/contributing.md @@ -20,6 +20,12 @@ We value contributions in this order: 6. **New tools** — rarely needed; most capabilities should be skills 7. **Documentation** — fixes, clarifications, new examples +## Common contribution paths + +- Building a new tool? Start with [Adding Tools](./adding-tools.md) +- Building a new skill? Start with [Creating Skills](./creating-skills.md) +- Building a new inference provider? Start with [Adding Providers](./adding-providers.md) + ## Development Setup ### Prerequisites diff --git a/website/docs/developer-guide/provider-runtime.md b/website/docs/developer-guide/provider-runtime.md index 9bfd48c2..68fe537c 100644 --- a/website/docs/developer-guide/provider-runtime.md +++ b/website/docs/developer-guide/provider-runtime.md @@ -20,6 +20,8 @@ Primary implementation: - `hermes_cli/auth.py` - `agent/auxiliary_client.py` +If you are trying to add a new first-class inference provider, read [Adding Providers](./adding-providers.md) alongside this page. + ## Resolution precedence At a high level, provider resolution uses: diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md index e273f6da..a43d7370 100644 --- a/website/docs/getting-started/installation.md +++ b/website/docs/getting-started/installation.md @@ -119,6 +119,7 @@ uv pip install -e "." | `cli` | Terminal menu UI for setup wizard | `uv pip install -e ".[cli]"` | | `modal` | Modal cloud execution backend | `uv pip install -e ".[modal]"` | | `tts-premium` | ElevenLabs premium voices | `uv pip install -e ".[tts-premium]"` | +| `voice` | CLI microphone input + audio playback | `uv pip install -e ".[voice]"` | | `pty` | PTY terminal support | `uv pip install -e ".[pty]"` | | `honcho` | AI-native memory (Honcho integration) | `uv pip install -e ".[honcho]"` | | `mcp` | Model Context Protocol support | `uv pip install -e ".[mcp]"` | diff --git a/website/docs/getting-started/learning-path.md b/website/docs/getting-started/learning-path.md index 2c08f077..bcdbb44d 100644 --- a/website/docs/getting-started/learning-path.md +++ b/website/docs/getting-started/learning-path.md @@ -54,7 +54,9 @@ Deploy Hermes Agent as a bot on your favorite messaging platform. 3. [Messaging Overview](/docs/user-guide/messaging) 4. [Telegram Setup](/docs/user-guide/messaging/telegram) 5. [Discord Setup](/docs/user-guide/messaging/discord) -6. [Security](/docs/user-guide/security) +6. [Voice Mode](/docs/user-guide/features/voice-mode) +7. [Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes) +8. [Security](/docs/user-guide/security) For full project examples, see: - [Daily Briefing Bot](/docs/guides/daily-briefing-bot) diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md index e743baf6..7fed47a2 100644 --- a/website/docs/getting-started/quickstart.md +++ b/website/docs/getting-started/quickstart.md @@ -129,6 +129,25 @@ Chat with Hermes from your phone or other surfaces via Telegram, Discord, Slack, hermes gateway setup # Interactive platform configuration ``` +### Add voice mode + +Want microphone input in the CLI or spoken replies in messaging? + +```bash +pip install hermes-agent[voice] + +# Optional but recommended for free local speech-to-text +pip install faster-whisper +``` + +Then start Hermes and enable it inside the CLI: + +```text +/voice on +``` + +Press `Ctrl+B` to record, or use `/voice tts` to have Hermes speak its replies. See [Voice Mode](../user-guide/features/voice-mode.md) for the full setup across CLI, Telegram, Discord, and Discord voice channels. + ### Schedule automated tasks ``` diff --git a/website/docs/guides/use-voice-mode-with-hermes.md b/website/docs/guides/use-voice-mode-with-hermes.md new file mode 100644 index 00000000..dc35dcc6 --- /dev/null +++ b/website/docs/guides/use-voice-mode-with-hermes.md @@ -0,0 +1,422 @@ +--- +sidebar_position: 7 +title: "Use Voice Mode with Hermes" +description: "A practical guide to setting up and using Hermes voice mode across CLI, Telegram, Discord, and Discord voice channels" +--- + +# Use Voice Mode with Hermes + +This guide is the practical companion to the [Voice Mode feature reference](/docs/user-guide/features/voice-mode). + +If the feature page explains what voice mode can do, this guide shows how to actually use it well. + +## What voice mode is good for + +Voice mode is especially useful when: +- you want a hands-free CLI workflow +- you want spoken responses in Telegram or Discord +- you want Hermes sitting in a Discord voice channel for live conversation +- you want quick idea capture, debugging, or back-and-forth while walking around instead of typing + +## Choose your voice mode setup + +There are really three different voice experiences in Hermes. + +| Mode | Best for | Platform | +|---|---|---| +| Interactive microphone loop | Personal hands-free use while coding or researching | CLI | +| Voice replies in chat | Spoken responses alongside normal messaging | Telegram, Discord | +| Live voice channel bot | Group or personal live conversation in a VC | Discord voice channels | + +A good path is: +1. get text working first +2. enable voice replies second +3. move to Discord voice channels last if you want the full experience + +## Step 1: make sure normal Hermes works first + +Before touching voice mode, verify that: +- Hermes starts +- your provider is configured +- the agent can answer text prompts normally + +```bash +hermes +``` + +Ask something simple: + +```text +What tools do you have available? +``` + +If that is not solid yet, fix text mode first. + +## Step 2: install the right extras + +### CLI microphone + playback + +```bash +pip install hermes-agent[voice] +``` + +### Messaging platforms + +```bash +pip install hermes-agent[messaging] +``` + +### Premium ElevenLabs TTS + +```bash +pip install hermes-agent[tts-premium] +``` + +### Everything + +```bash +pip install hermes-agent[all] +``` + +## Step 3: install system dependencies + +### macOS + +```bash +brew install portaudio ffmpeg opus +``` + +### Ubuntu / Debian + +```bash +sudo apt install portaudio19-dev ffmpeg libopus0 +``` + +Why these matter: +- `portaudio` → microphone input / playback for CLI voice mode +- `ffmpeg` → audio conversion for TTS and messaging delivery +- `opus` → Discord voice codec support + +## Step 4: choose STT and TTS providers + +Hermes supports both local and cloud speech stacks. + +### Easiest / cheapest setup + +Use local STT and free Edge TTS: +- STT provider: `local` +- TTS provider: `edge` + +This is usually the best place to start. + +### Environment file example + +Add to `~/.hermes/.env`: + +```bash +# Cloud STT options (local needs no key) +GROQ_API_KEY=*** +VOICE_TOOLS_OPENAI_KEY=*** + +# Premium TTS (optional) +ELEVENLABS_API_KEY=*** +``` + +### Provider recommendations + +#### Speech-to-text + +- `local` → best default for privacy and zero-cost use +- `groq` → very fast cloud transcription +- `openai` → good paid fallback + +#### Text-to-speech + +- `edge` → free and good enough for most users +- `elevenlabs` → best quality +- `openai` → good middle ground + +## Step 5: recommended config + +```yaml +voice: + record_key: "ctrl+b" + max_recording_seconds: 120 + auto_tts: false + silence_threshold: 200 + silence_duration: 3.0 + +stt: + provider: "local" + local: + model: "base" + +tts: + provider: "edge" + edge: + voice: "en-US-AriaNeural" +``` + +This is a good conservative default for most people. + +## Use case 1: CLI voice mode + +## Turn it on + +Start Hermes: + +```bash +hermes +``` + +Inside the CLI: + +```text +/voice on +``` + +### Recording flow + +Default key: +- `Ctrl+B` + +Workflow: +1. press `Ctrl+B` +2. speak +3. wait for silence detection to stop recording automatically +4. Hermes transcribes and responds +5. if TTS is on, it speaks the answer +6. the loop can automatically restart for continuous use + +### Useful commands + +```text +/voice +/voice on +/voice off +/voice tts +/voice status +``` + +### Good CLI workflows + +#### Walk-up debugging + +Say: + +```text +I keep getting a docker permission error. Help me debug it. +``` + +Then continue hands-free: +- "Read the last error again" +- "Explain the root cause in simpler terms" +- "Now give me the exact fix" + +#### Research / brainstorming + +Great for: +- walking around while thinking +- dictating half-formed ideas +- asking Hermes to structure your thoughts in real time + +#### Accessibility / low-typing sessions + +If typing is inconvenient, voice mode is one of the fastest ways to stay in the full Hermes loop. + +## Tuning CLI behavior + +### Silence threshold + +If Hermes starts/stops too aggressively, tune: + +```yaml +voice: + silence_threshold: 250 +``` + +Higher threshold = less sensitive. + +### Silence duration + +If you pause a lot between sentences, increase: + +```yaml +voice: + silence_duration: 4.0 +``` + +### Record key + +If `Ctrl+B` conflicts with your terminal or tmux habits: + +```yaml +voice: + record_key: "ctrl+space" +``` + +## Use case 2: voice replies in Telegram or Discord + +This mode is simpler than full voice channels. + +Hermes stays a normal chat bot, but can speak replies. + +### Start the gateway + +```bash +hermes gateway +``` + +### Turn on voice replies + +Inside Telegram or Discord: + +```text +/voice on +``` + +or + +```text +/voice tts +``` + +### Modes + +| Mode | Meaning | +|---|---| +| `off` | text only | +| `voice_only` | speak only when the user sent voice | +| `all` | speak every reply | + +### When to use which mode + +- `/voice on` if you want spoken replies only for voice-originating messages +- `/voice tts` if you want a full spoken assistant all the time + +### Good messaging workflows + +#### Telegram assistant on your phone + +Use when: +- you are away from your machine +- you want to send voice notes and get quick spoken replies +- you want Hermes to function like a portable research or ops assistant + +#### Discord DMs with spoken output + +Useful when you want private interaction without server-channel mention behavior. + +## Use case 3: Discord voice channels + +This is the most advanced mode. + +Hermes joins a Discord VC, listens to user speech, transcribes it, runs the normal agent pipeline, and speaks replies back into the channel. + +## Required Discord permissions + +In addition to the normal text-bot setup, make sure the bot has: +- Connect +- Speak +- preferably Use Voice Activity + +Also enable privileged intents in the Developer Portal: +- Presence Intent +- Server Members Intent +- Message Content Intent + +## Join and leave + +In a Discord text channel where the bot is present: + +```text +/voice join +/voice leave +/voice status +``` + +### What happens when joined + +- users speak in the VC +- Hermes detects speech boundaries +- transcripts are posted in the associated text channel +- Hermes responds in text and audio +- the text channel is the one where `/voice join` was issued + +### Best practices for Discord VC use + +- keep `DISCORD_ALLOWED_USERS` tight +- use a dedicated bot/testing channel at first +- verify STT and TTS work in ordinary text-chat voice mode before trying VC mode + +## Voice quality recommendations + +### Best quality setup + +- STT: local `large-v3` or Groq `whisper-large-v3` +- TTS: ElevenLabs + +### Best speed / convenience setup + +- STT: local `base` or Groq +- TTS: Edge + +### Best zero-cost setup + +- STT: local +- TTS: Edge + +## Common failure modes + +### "No audio device found" + +Install `portaudio`. + +### "Bot joins but hears nothing" + +Check: +- your Discord user ID is in `DISCORD_ALLOWED_USERS` +- you are not muted +- privileged intents are enabled +- the bot has Connect/Speak permissions + +### "It transcribes but does not speak" + +Check: +- TTS provider config +- API key / quota for ElevenLabs or OpenAI +- `ffmpeg` install for Edge conversion paths + +### "Whisper outputs garbage" + +Try: +- quieter environment +- higher `silence_threshold` +- different STT provider/model +- shorter, clearer utterances + +### "It works in DMs but not in server channels" + +That is often mention policy. + +By default, the bot needs an `@mention` in Discord server text channels unless configured otherwise. + +## Suggested first-week setup + +If you want the shortest path to success: + +1. get text Hermes working +2. install `hermes-agent[voice]` +3. use CLI voice mode with local STT + Edge TTS +4. then enable `/voice on` in Telegram or Discord +5. only after that, try Discord VC mode + +That progression keeps the debugging surface small. + +## Where to read next + +- [Voice Mode feature reference](/docs/user-guide/features/voice-mode) +- [Messaging Gateway](/docs/user-guide/messaging) +- [Discord setup](/docs/user-guide/messaging/discord) +- [Telegram setup](/docs/user-guide/messaging/telegram) +- [Configuration](/docs/user-guide/configuration) diff --git a/website/docs/index.md b/website/docs/index.md index 3dbfcaf7..470c8d2e 100644 --- a/website/docs/index.md +++ b/website/docs/index.md @@ -33,6 +33,8 @@ It's not a coding copilot tethered to an IDE or a chatbot wrapper around a singl | 📚 **[Skills System](/docs/user-guide/features/skills)** | Procedural memory the agent creates and reuses | | 🔌 **[MCP Integration](/docs/user-guide/features/mcp)** | Connect to MCP servers, filter their tools, and extend Hermes safely | | 🧭 **[Use MCP with Hermes](/docs/guides/use-mcp-with-hermes)** | Practical MCP setup patterns, examples, and tutorials | +| 🎙️ **[Voice Mode](/docs/user-guide/features/voice-mode)** | Real-time voice interaction in CLI, Telegram, Discord, and Discord VC | +| 🗣️ **[Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes)** | Hands-on setup and usage patterns for Hermes voice workflows | | 🎭 **[Personality & SOUL.md](/docs/user-guide/features/personality)** | Define Hermes' default voice with a global SOUL.md | | 📄 **[Context Files](/docs/user-guide/features/context-files)** | Project context files that shape every conversation | | 🔒 **[Security](/docs/user-guide/security)** | Command approval, authorization, container isolation | diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index f179437a..6fcc96a2 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -31,7 +31,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `CLAUDE_CODE_OAUTH_TOKEN` | Claude Code setup-token (same as `ANTHROPIC_TOKEN`) | | `HERMES_MODEL` | Preferred model name (checked before `LLM_MODEL`, used by gateway) | | `LLM_MODEL` | Default model name (fallback when not set in config.yaml) | -| `VOICE_TOOLS_OPENAI_KEY` | OpenAI key for TTS and voice transcription (separate from custom endpoint) | +| `VOICE_TOOLS_OPENAI_KEY` | OpenAI key for OpenAI speech-to-text and text-to-speech providers | | `HERMES_HOME` | Override Hermes config directory (default: `~/.hermes`) | ## Provider Auth (OAuth) @@ -57,7 +57,12 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `BROWSERBASE_PROJECT_ID` | Browserbase project ID | | `BROWSER_INACTIVITY_TIMEOUT` | Browser session inactivity timeout in seconds | | `FAL_KEY` | Image generation ([fal.ai](https://fal.ai/)) | -| `ELEVENLABS_API_KEY` | Premium TTS voices ([elevenlabs.io](https://elevenlabs.io/)) | +| `GROQ_API_KEY` | Groq Whisper STT API key ([groq.com](https://groq.com/)) | +| `ELEVENLABS_API_KEY` | ElevenLabs premium TTS voices ([elevenlabs.io](https://elevenlabs.io/)) | +| `STT_GROQ_MODEL` | Override the Groq STT model (default: `whisper-large-v3-turbo`) | +| `GROQ_BASE_URL` | Override the Groq OpenAI-compatible STT endpoint | +| `STT_OPENAI_MODEL` | Override the OpenAI STT model (default: `whisper-1`) | +| `STT_OPENAI_BASE_URL` | Override the OpenAI-compatible STT endpoint | | `HONCHO_API_KEY` | Cross-session user modeling ([honcho.dev](https://honcho.dev/)) | | `TINKER_API_KEY` | RL training ([tinker-console.thinkingmachines.ai](https://tinker-console.thinkingmachines.ai/)) | | `WANDB_API_KEY` | RL training metrics ([wandb.ai](https://wandb.ai/)) | diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index 5960a0ec..9ef45460 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -45,6 +45,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/verbose` | Cycle tool progress display: off → new → all → verbose | | `/reasoning` | Manage reasoning effort and display (usage: /reasoning [level\|show\|hide]) | | `/skin` | Show or change the display skin/theme | +| `/voice [on\|off\|tts\|status]` | Toggle CLI voice mode and spoken playback. Recording uses `voice.record_key` (default: `Ctrl+B`). | ### Tools & Skills @@ -105,6 +106,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/usage` | Show token usage for the current session. | | `/insights [days]` | Show usage analytics. | | `/reasoning [level\|show\|hide]` | Change reasoning effort or toggle reasoning display. | +| `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. | | `/rollback [number]` | List or restore filesystem checkpoints. | | `/background <prompt>` | Run a prompt in a separate background session. | | `/reload-mcp` | Reload MCP servers from config. | @@ -116,4 +118,5 @@ The messaging gateway supports the following built-in commands inside Telegram, - `/skin`, `/tools`, `/toolsets`, `/config`, `/prompt`, `/cron`, `/skills`, `/platforms`, `/paste`, and `/verbose` are **CLI-only** commands. - `/status`, `/stop`, `/sethome`, `/resume`, `/background`, and `/update` are **messaging-only** commands. -- `/reload-mcp` and `/rollback` work in **both** the CLI and the messaging gateway. \ No newline at end of file +- `/voice`, `/reload-mcp`, and `/rollback` work in **both** the CLI and the messaging gateway. +- `/voice join`, `/voice channel`, and `/voice leave` are only meaningful on Discord. diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md index 6c8d558d..fb3c8383 100644 --- a/website/docs/user-guide/cli.md +++ b/website/docs/user-guide/cli.md @@ -77,6 +77,7 @@ When resuming a previous session (`hermes -c` or `hermes --resume `), a "Pre | `Alt+Enter` or `Ctrl+J` | New line (multi-line input) | | `Alt+V` | Paste an image from the clipboard when supported by the terminal | | `Ctrl+V` | Paste text and opportunistically attach clipboard images | +| `Ctrl+B` | Start/stop voice recording when voice mode is enabled (`voice.record_key`, default: `ctrl+b`) | | `Ctrl+C` | Interrupt agent (double-press within 2s to force exit) | | `Ctrl+D` | Exit | | `Tab` | Autocomplete slash commands | @@ -95,11 +96,15 @@ Common examples: | `/skills browse` | Browse the skills hub and official optional skills | | `/background ` | Run a prompt in a separate background session | | `/skin` | Show or switch the active CLI skin | +| `/voice on` | Enable CLI voice mode (press `Ctrl+B` to record) | +| `/voice tts` | Toggle spoken playback for Hermes replies | | `/reasoning high` | Increase reasoning effort | | `/title My Session` | Name the current session | For the full built-in CLI and messaging lists, see [Slash Commands Reference](../reference/slash-commands.md). +For setup, providers, silence tuning, and messaging/Discord voice usage, see [Voice Mode](features/voice-mode.md). + :::tip Commands are case-insensitive — `/HELP` works the same as `/help`. Installed skills also become slash commands automatically. ::: diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 13da3fe4..4615ff06 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -695,6 +695,8 @@ tts: voice: "alloy" # alloy, echo, fable, onyx, nova, shimmer ``` +This controls both the `text_to_speech` tool and spoken replies in voice mode (`/voice tts` in the CLI or messaging gateway). + ## Display Settings ```yaml @@ -719,10 +721,43 @@ display: ```yaml stt: - provider: "openai" # STT provider + provider: "local" # "local" | "groq" | "openai" + local: + model: "base" # tiny, base, small, medium, large-v3 + openai: + model: "whisper-1" # whisper-1 | gpt-4o-mini-transcribe | gpt-4o-transcribe + # model: "whisper-1" # Legacy fallback key still respected ``` -Requires `VOICE_TOOLS_OPENAI_KEY` in `.env` for OpenAI STT. +Provider behavior: + +- `local` uses `faster-whisper` running on your machine. Install it separately with `pip install faster-whisper`. +- `groq` uses Groq's Whisper-compatible endpoint and reads `GROQ_API_KEY`. +- `openai` uses the OpenAI speech API and reads `VOICE_TOOLS_OPENAI_KEY`. + +If the requested provider is unavailable, Hermes falls back automatically in this order: `local` → `groq` → `openai`. + +Groq and OpenAI model overrides are environment-driven: + +```bash +STT_GROQ_MODEL=whisper-large-v3-turbo +STT_OPENAI_MODEL=whisper-1 +GROQ_BASE_URL=https://api.groq.com/openai/v1 +STT_OPENAI_BASE_URL=https://api.openai.com/v1 +``` + +## Voice Mode (CLI) + +```yaml +voice: + record_key: "ctrl+b" # Push-to-talk key inside the CLI + max_recording_seconds: 120 # Hard stop for long recordings + auto_tts: false # Enable spoken replies automatically when /voice on + silence_threshold: 200 # RMS threshold for speech detection + silence_duration: 3.0 # Seconds of silence before auto-stop +``` + +Use `/voice on` in the CLI to enable microphone mode, `record_key` to start/stop recording, and `/voice tts` to toggle spoken replies. See [Voice Mode](/docs/user-guide/features/voice-mode) for end-to-end setup and platform-specific behavior. ## Quick Commands diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md index e9a4d4be..dd7d5606 100644 --- a/website/docs/user-guide/features/cron.md +++ b/website/docs/user-guide/features/cron.md @@ -194,6 +194,8 @@ The agent's final response is automatically delivered. You do not need to call ` ## Schedule formats +The agent's final response is automatically delivered — you do **not** need to include `send_message` in the cron prompt for that same destination. If a cron run calls `send_message` to the exact target the scheduler will already deliver to, Hermes skips that duplicate send and tells the model to put the user-facing content in the final response instead. Use `send_message` only for additional or different targets. + ### Relative delays (one-shot) ```text diff --git a/website/docs/user-guide/features/voice-mode.md b/website/docs/user-guide/features/voice-mode.md index ce151643..3dfe0db4 100644 --- a/website/docs/user-guide/features/voice-mode.md +++ b/website/docs/user-guide/features/voice-mode.md @@ -8,12 +8,14 @@ description: "Real-time voice conversations with Hermes Agent — CLI, Telegram, Hermes Agent supports full voice interaction across CLI and messaging platforms. Talk to the agent using your microphone, hear spoken replies, and have live voice conversations in Discord voice channels. +If you want a practical setup walkthrough with recommended configurations and real usage patterns, see [Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes). + ## Prerequisites Before using voice features, make sure you have: -1. **Hermes Agent installed** — `pip install hermes-agent` (see [Getting Started](../../getting-started.md)) -2. **An LLM provider configured** — set `OPENAI_API_KEY`, `OPENAI_BASE_URL`, and `LLM_MODEL` in `~/.hermes/.env` +1. **Hermes Agent installed** — `pip install hermes-agent` (see [Installation](/docs/getting-started/installation)) +2. **An LLM provider configured** — run `hermes model` or set your preferred provider credentials in `~/.hermes/.env` 3. **A working base setup** — run `hermes` to verify the agent responds to text before enabling voice :::tip diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md index 0fc7f8cb..2fd9a3a1 100644 --- a/website/docs/user-guide/messaging/discord.md +++ b/website/docs/user-guide/messaging/discord.md @@ -210,8 +210,13 @@ Replace the ID with the actual channel ID (right-click → Copy Channel ID with Hermes Agent supports Discord voice messages: -- **Incoming voice messages** are automatically transcribed using Whisper (requires `GROQ_API_KEY` or `VOICE_TOOLS_OPENAI_KEY` to be set in your environment). +- **Incoming voice messages** are automatically transcribed using the configured STT provider: local `faster-whisper` (no key), Groq Whisper (`GROQ_API_KEY`), or OpenAI Whisper (`VOICE_TOOLS_OPENAI_KEY`). - **Text-to-speech**: Use `/voice tts` to have the bot send spoken audio responses alongside text replies. +- **Discord voice channels**: Hermes can also join a voice channel, listen to users speaking, and talk back in the channel. + +For the full setup and operational guide, see: +- [Voice Mode](/docs/user-guide/features/voice-mode) +- [Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes) ## Troubleshooting diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md index debc841b..2530248e 100644 --- a/website/docs/user-guide/messaging/index.md +++ b/website/docs/user-guide/messaging/index.md @@ -8,6 +8,8 @@ description: "Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, Email, Home Assistant, or your browser. The gateway is a single background process that connects to all your configured platforms, handles sessions, runs cron jobs, and delivers voice messages. +For the full voice feature set — including CLI microphone mode, spoken replies in messaging, and Discord voice-channel conversations — see [Voice Mode](/docs/user-guide/features/voice-mode) and [Use Voice Mode with Hermes](/docs/guides/use-voice-mode-with-hermes). + ## Architecture ```text @@ -77,6 +79,7 @@ hermes gateway status # Check service status | `/usage` | Show token usage for this session | | `/insights [days]` | Show usage insights and analytics | | `/reasoning [level\|show\|hide]` | Change reasoning effort or toggle reasoning display | +| `/voice [on\|off\|tts\|join\|leave\|status]` | Control messaging voice replies and Discord voice-channel behavior | | `/rollback [number]` | List or restore filesystem checkpoints | | `/background ` | Run a prompt in a separate background session | | `/reload-mcp` | Reload MCP servers from config | diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md index 5ba6c7dd..2ff79f35 100644 --- a/website/docs/user-guide/messaging/slack.md +++ b/website/docs/user-guide/messaging/slack.md @@ -224,7 +224,7 @@ Make sure the bot has been **invited to the channel** (`/invite @Hermes Agent`). Hermes supports voice on Slack: -- **Incoming:** Voice/audio messages are automatically transcribed using Whisper (requires `VOICE_TOOLS_OPENAI_KEY`) +- **Incoming:** Voice/audio messages are automatically transcribed using the configured STT provider: local `faster-whisper`, Groq Whisper (`GROQ_API_KEY`), or OpenAI Whisper (`VOICE_TOOLS_OPENAI_KEY`) - **Outgoing:** TTS responses are sent as audio file attachments --- diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md index 123b8139..179f46b6 100644 --- a/website/docs/user-guide/messaging/telegram.md +++ b/website/docs/user-guide/messaging/telegram.md @@ -131,7 +131,11 @@ Group chat IDs are negative numbers (e.g., `-1001234567890`). Your personal DM c ### Incoming Voice (Speech-to-Text) -Voice messages you send on Telegram are automatically transcribed using OpenAI's Whisper API and injected as text into the conversation. This requires `VOICE_TOOLS_OPENAI_KEY` in `~/.hermes/.env`. +Voice messages you send on Telegram are automatically transcribed by Hermes's configured STT provider and injected as text into the conversation. + +- `local` uses `faster-whisper` on the machine running Hermes — no API key required +- `groq` uses Groq Whisper and requires `GROQ_API_KEY` +- `openai` uses OpenAI Whisper and requires `VOICE_TOOLS_OPENAI_KEY` ### Outgoing Voice (Text-to-Speech) @@ -173,7 +177,7 @@ Hermes Agent works in Telegram group chats with a few considerations: | Bot not responding at all | Verify `TELEGRAM_BOT_TOKEN` is correct. Check `hermes gateway` logs for errors. | | Bot responds with "unauthorized" | Your user ID is not in `TELEGRAM_ALLOWED_USERS`. Double-check with @userinfobot. | | Bot ignores group messages | Privacy mode is likely on. Disable it (Step 3) or make the bot a group admin. **Remember to remove and re-add the bot after changing privacy.** | -| Voice messages not transcribed | Check that `VOICE_TOOLS_OPENAI_KEY` is set and valid in `~/.hermes/.env`. | +| Voice messages not transcribed | Verify STT is available: install `faster-whisper` for local transcription, or set `GROQ_API_KEY` / `VOICE_TOOLS_OPENAI_KEY` in `~/.hermes/.env`. | | Voice replies are files, not bubbles | Install `ffmpeg` (needed for Edge TTS Opus conversion). | | Bot token revoked/invalid | Generate a new token via `/revoke` then `/newbot` or `/token` in BotFather. Update your `.env` file. | diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md index 8bdf28dd..af432fb8 100644 --- a/website/docs/user-guide/messaging/whatsapp.md +++ b/website/docs/user-guide/messaging/whatsapp.md @@ -137,7 +137,7 @@ with reconnection logic. Hermes supports voice on WhatsApp: -- **Incoming:** Voice messages (`.ogg` opus) are automatically transcribed using Whisper (requires `VOICE_TOOLS_OPENAI_KEY`) +- **Incoming:** Voice messages (`.ogg` opus) are automatically transcribed using the configured STT provider: local `faster-whisper`, Groq Whisper (`GROQ_API_KEY`), or OpenAI Whisper (`VOICE_TOOLS_OPENAI_KEY`) - **Outgoing:** TTS responses are sent as MP3 audio file attachments - Agent responses are prefixed with "⚕ **Hermes Agent**" for easy identification diff --git a/website/sidebars.ts b/website/sidebars.ts index ff91c4de..087f9240 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -24,6 +24,7 @@ const sidebars: SidebarsConfig = { 'guides/python-library', 'guides/use-mcp-with-hermes', 'guides/use-soul-with-hermes', + 'guides/use-voice-mode-with-hermes', ], }, { @@ -75,6 +76,7 @@ const sidebars: SidebarsConfig = { type: 'category', label: 'Web & Media', items: [ + 'user-guide/features/voice-mode', 'user-guide/features/browser', 'user-guide/features/vision', 'user-guide/features/image-generation', @@ -108,6 +110,7 @@ const sidebars: SidebarsConfig = { 'developer-guide/architecture', 'developer-guide/agent-loop', 'developer-guide/provider-runtime', + 'developer-guide/adding-providers', 'developer-guide/prompt-assembly', 'developer-guide/context-compression-and-caching', 'developer-guide/gateway-internals',