From 455bf2e853a6a9b137e7a2bd594a97c927954a01 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 28 Mar 2026 11:14:54 -0700 Subject: [PATCH] feat: activate plugin lifecycle hooks (pre/post_llm_call, session start/end) (#3542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The plugin system defined six lifecycle hooks but only pre_tool_call and post_tool_call were invoked. This activates the remaining four so that external plugins (e.g. memory systems) can hook into the conversation loop without touching core code. Hook semantics: - on_session_start: fires once when a new session is created - pre_llm_call: fires once per turn before the tool-calling loop; plugins can return {"context": "..."} to inject into the ephemeral system prompt (not cached, not persisted) - post_llm_call: fires once per turn after the loop completes, with user_message and assistant_response for sync/storage - on_session_end: fires at the end of every run_conversation call invoke_hook() now returns a list of non-None callback return values, enabling pre_llm_call context injection while remaining backward compatible (existing hooks that return None are unaffected). Salvaged from PR #2823. Co-authored-by: Nicolò Boschi --- hermes_cli/plugins.py | 21 +++-- run_agent.py | 82 ++++++++++++++++++++ tests/test_plugins.py | 36 +++++++++ website/docs/guides/build-a-hermes-plugin.md | 20 ++--- website/docs/user-guide/features/plugins.md | 8 +- 5 files changed, 149 insertions(+), 18 deletions(-) diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py index 5e27535a0..022c44816 100644 --- a/hermes_cli/plugins.py +++ b/hermes_cli/plugins.py @@ -385,16 +385,23 @@ class PluginManager: # Hook invocation # ----------------------------------------------------------------------- - def invoke_hook(self, hook_name: str, **kwargs: Any) -> None: + def invoke_hook(self, hook_name: str, **kwargs: Any) -> List[Any]: """Call all registered callbacks for *hook_name*. Each callback is wrapped in its own try/except so a misbehaving plugin cannot break the core agent loop. + + Returns a list of non-``None`` return values from callbacks. + This allows hooks like ``pre_llm_call`` to contribute context + that the agent core can collect and inject. """ callbacks = self._hooks.get(hook_name, []) + results: List[Any] = [] for cb in callbacks: try: - cb(**kwargs) + ret = cb(**kwargs) + if ret is not None: + results.append(ret) except Exception as exc: logger.warning( "Hook '%s' callback %s raised: %s", @@ -402,6 +409,7 @@ class PluginManager: getattr(cb, "__name__", repr(cb)), exc, ) + return results # ----------------------------------------------------------------------- # Introspection @@ -446,9 +454,12 @@ def discover_plugins() -> None: get_plugin_manager().discover_and_load() -def invoke_hook(hook_name: str, **kwargs: Any) -> None: - """Invoke a lifecycle hook on all loaded plugins.""" - get_plugin_manager().invoke_hook(hook_name, **kwargs) +def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]: + """Invoke a lifecycle hook on all loaded plugins. + + Returns a list of non-``None`` return values from plugin callbacks. + """ + return get_plugin_manager().invoke_hook(hook_name, **kwargs) def get_plugin_tool_names() -> Set[str]: diff --git a/run_agent.py b/run_agent.py index 501a1a463..65f67f8d9 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6024,6 +6024,22 @@ class AIAgent: self._cached_system_prompt = ( self._cached_system_prompt + "\n\n" + self._honcho_context ).strip() + + # Plugin hook: on_session_start + # Fired once when a brand-new session is created (not on + # continuation). Plugins can use this to initialise + # session-scoped state (e.g. warm a memory cache). + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( + "on_session_start", + session_id=self.session_id, + model=self.model, + platform=getattr(self, "platform", None) or "", + ) + except Exception as exc: + logger.warning("on_session_start hook failed: %s", exc) + # Store the system prompt snapshot in SQLite if self._session_db: try: @@ -6085,6 +6101,34 @@ class AIAgent: if _preflight_tokens < self.context_compressor.threshold_tokens: break # Under threshold + # Plugin hook: pre_llm_call + # Fired once per turn before the tool-calling loop. Plugins can + # return a dict with a ``context`` key whose value is a string + # that will be appended to the ephemeral system prompt for every + # API call in this turn (not persisted to session DB or cache). + _plugin_turn_context = "" + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _pre_results = _invoke_hook( + "pre_llm_call", + session_id=self.session_id, + user_message=original_user_message, + conversation_history=list(messages), + is_first_turn=(not bool(conversation_history)), + model=self.model, + platform=getattr(self, "platform", None) or "", + ) + _ctx_parts = [] + for r in _pre_results: + if isinstance(r, dict) and r.get("context"): + _ctx_parts.append(str(r["context"])) + elif isinstance(r, str) and r.strip(): + _ctx_parts.append(r) + if _ctx_parts: + _plugin_turn_context = "\n\n".join(_ctx_parts) + except Exception as exc: + logger.warning("pre_llm_call hook failed: %s", exc) + # Main conversation loop api_call_count = 0 final_response = None @@ -6182,6 +6226,9 @@ class AIAgent: effective_system = active_system_prompt or "" if self.ephemeral_system_prompt: effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip() + # Plugin context from pre_llm_call hooks — ephemeral, not cached. + if _plugin_turn_context: + effective_system = (effective_system + "\n\n" + _plugin_turn_context).strip() if effective_system: api_messages = [{"role": "system", "content": effective_system}] + api_messages @@ -7759,6 +7806,25 @@ class AIAgent: self._honcho_sync(original_user_message, final_response) self._queue_honcho_prefetch(original_user_message) + # Plugin hook: post_llm_call + # Fired once per turn after the tool-calling loop completes. + # Plugins can use this to persist conversation data (e.g. sync + # to an external memory system). + if final_response and not interrupted: + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( + "post_llm_call", + session_id=self.session_id, + user_message=original_user_message, + assistant_response=final_response, + conversation_history=list(messages), + model=self.model, + platform=getattr(self, "platform", None) or "", + ) + except Exception as exc: + logger.warning("post_llm_call hook failed: %s", exc) + # Extract reasoning from the last assistant message (if any) last_reasoning = None for msg in reversed(messages): @@ -7824,6 +7890,22 @@ class AIAgent: except Exception: pass # Background review is best-effort + # Plugin hook: on_session_end + # Fired at the very end of every run_conversation call. + # Plugins can use this for cleanup, flushing buffers, etc. + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _invoke_hook( + "on_session_end", + session_id=self.session_id, + completed=completed, + interrupted=interrupted, + model=self.model, + platform=getattr(self, "platform", None) or "", + ) + except Exception as exc: + logger.warning("on_session_end hook failed: %s", exc) + return result def chat(self, message: str, stream_callback: Optional[callable] = None) -> str: diff --git a/tests/test_plugins.py b/tests/test_plugins.py index f90853a81..0da5b640d 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -226,6 +226,42 @@ class TestPluginHooks: # Should not raise despite 1/0 mgr.invoke_hook("post_tool_call", tool_name="x", args={}, result="r", task_id="") + def test_hook_return_values_collected(self, tmp_path, monkeypatch): + """invoke_hook() collects non-None return values from callbacks.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir( + plugins_dir, "ctx_plugin", + register_body=( + 'ctx.register_hook("pre_llm_call", ' + 'lambda **kw: {"context": "memory from plugin"})' + ), + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + results = mgr.invoke_hook("pre_llm_call", session_id="s1", user_message="hi", + conversation_history=[], is_first_turn=True, model="test") + assert len(results) == 1 + assert results[0] == {"context": "memory from plugin"} + + def test_hook_none_returns_excluded(self, tmp_path, monkeypatch): + """invoke_hook() excludes None returns from the result list.""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir( + plugins_dir, "none_hook", + register_body='ctx.register_hook("post_llm_call", lambda **kw: None)', + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + results = mgr.invoke_hook("post_llm_call", session_id="s1", + user_message="hi", assistant_response="bye", model="test") + assert results == [] + def test_invalid_hook_name_warns(self, tmp_path, monkeypatch, caplog): """Registering an unknown hook name logs a warning.""" plugins_dir = tmp_path / "hermes_test" / "plugins" diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md index de3dbec19..abe1e3424 100644 --- a/website/docs/guides/build-a-hermes-plugin.md +++ b/website/docs/guides/build-a-hermes-plugin.md @@ -365,16 +365,18 @@ def register(ctx): Available hooks: -| Hook | When | Arguments | -|------|------|-----------| -| `pre_tool_call` | Before any tool runs | `tool_name`, `args`, `task_id` | -| `post_tool_call` | After any tool returns | `tool_name`, `args`, `result`, `task_id` | -| `pre_llm_call` | Before LLM API call | `messages`, `model` | -| `post_llm_call` | After LLM response | `messages`, `response`, `model` | -| `on_session_start` | Session begins | `session_id`, `platform` | -| `on_session_end` | Session ends | `session_id`, `platform` | +| Hook | When | Arguments | Return | +|------|------|-----------|--------| +| `pre_tool_call` | Before any tool runs | `tool_name`, `args`, `task_id` | — | +| `post_tool_call` | After any tool returns | `tool_name`, `args`, `result`, `task_id` | — | +| `pre_llm_call` | Once per turn, before the LLM loop | `session_id`, `user_message`, `conversation_history`, `is_first_turn`, `model`, `platform` | `{"context": "..."}` | +| `post_llm_call` | Once per turn, after the LLM loop | `session_id`, `user_message`, `assistant_response`, `conversation_history`, `model`, `platform` | — | +| `on_session_start` | New session created (first turn only) | `session_id`, `model`, `platform` | — | +| `on_session_end` | End of every `run_conversation` call | `session_id`, `completed`, `interrupted`, `model`, `platform` | — | -Hooks are observers — they can't modify arguments or return values. If a hook crashes, it's logged and skipped; other hooks and the tool continue normally. +Most hooks are fire-and-forget observers. The exception is `pre_llm_call`: if a callback returns a dict with a `"context"` key (or a plain string), the value is appended to the ephemeral system prompt for the current turn. This allows memory plugins to inject recalled context without touching core code. + +If a hook crashes, it's logged and skipped; other hooks and the agent continue normally. ### Distribute via pip diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md index 30ab6c35a..3bf822d1f 100644 --- a/website/docs/user-guide/features/plugins.md +++ b/website/docs/user-guide/features/plugins.md @@ -52,10 +52,10 @@ Plugins can register callbacks for these lifecycle events. See the **[Event Hook |------|-----------| | `pre_tool_call` | Before any tool executes | | `post_tool_call` | After any tool returns | -| `pre_llm_call` | Before LLM API request *(planned)* | -| `post_llm_call` | After LLM API response *(planned)* | -| `on_session_start` | Session begins *(planned)* | -| `on_session_end` | Session ends *(planned)* | +| `pre_llm_call` | Once per turn, before the LLM loop — can return `{"context": "..."}` to inject into the system prompt | +| `post_llm_call` | Once per turn, after the LLM loop completes | +| `on_session_start` | New session created (first turn only) | +| `on_session_end` | End of every `run_conversation` call | ## Slash commands