feat: activate plugin lifecycle hooks (pre/post_llm_call, session start/end) (#3542)
The plugin system defined six lifecycle hooks but only pre_tool_call and
post_tool_call were invoked. This activates the remaining four so that
external plugins (e.g. memory systems) can hook into the conversation
loop without touching core code.
Hook semantics:
- on_session_start: fires once when a new session is created
- pre_llm_call: fires once per turn before the tool-calling loop;
plugins can return {"context": "..."} to inject into the ephemeral
system prompt (not cached, not persisted)
- post_llm_call: fires once per turn after the loop completes, with
user_message and assistant_response for sync/storage
- on_session_end: fires at the end of every run_conversation call
invoke_hook() now returns a list of non-None callback return values,
enabling pre_llm_call context injection while remaining backward
compatible (existing hooks that return None are unaffected).
Salvaged from PR #2823.
Co-authored-by: Nicolò Boschi <boschi1997@gmail.com>
This commit is contained in:
@@ -385,16 +385,23 @@ class PluginManager:
|
||||
# Hook invocation
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
def invoke_hook(self, hook_name: str, **kwargs: Any) -> None:
|
||||
def invoke_hook(self, hook_name: str, **kwargs: Any) -> List[Any]:
|
||||
"""Call all registered callbacks for *hook_name*.
|
||||
|
||||
Each callback is wrapped in its own try/except so a misbehaving
|
||||
plugin cannot break the core agent loop.
|
||||
|
||||
Returns a list of non-``None`` return values from callbacks.
|
||||
This allows hooks like ``pre_llm_call`` to contribute context
|
||||
that the agent core can collect and inject.
|
||||
"""
|
||||
callbacks = self._hooks.get(hook_name, [])
|
||||
results: List[Any] = []
|
||||
for cb in callbacks:
|
||||
try:
|
||||
cb(**kwargs)
|
||||
ret = cb(**kwargs)
|
||||
if ret is not None:
|
||||
results.append(ret)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Hook '%s' callback %s raised: %s",
|
||||
@@ -402,6 +409,7 @@ class PluginManager:
|
||||
getattr(cb, "__name__", repr(cb)),
|
||||
exc,
|
||||
)
|
||||
return results
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Introspection
|
||||
@@ -446,9 +454,12 @@ def discover_plugins() -> None:
|
||||
get_plugin_manager().discover_and_load()
|
||||
|
||||
|
||||
def invoke_hook(hook_name: str, **kwargs: Any) -> None:
|
||||
"""Invoke a lifecycle hook on all loaded plugins."""
|
||||
get_plugin_manager().invoke_hook(hook_name, **kwargs)
|
||||
def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:
|
||||
"""Invoke a lifecycle hook on all loaded plugins.
|
||||
|
||||
Returns a list of non-``None`` return values from plugin callbacks.
|
||||
"""
|
||||
return get_plugin_manager().invoke_hook(hook_name, **kwargs)
|
||||
|
||||
|
||||
def get_plugin_tool_names() -> Set[str]:
|
||||
|
||||
82
run_agent.py
82
run_agent.py
@@ -6024,6 +6024,22 @@ class AIAgent:
|
||||
self._cached_system_prompt = (
|
||||
self._cached_system_prompt + "\n\n" + self._honcho_context
|
||||
).strip()
|
||||
|
||||
# Plugin hook: on_session_start
|
||||
# Fired once when a brand-new session is created (not on
|
||||
# continuation). Plugins can use this to initialise
|
||||
# session-scoped state (e.g. warm a memory cache).
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"on_session_start",
|
||||
session_id=self.session_id,
|
||||
model=self.model,
|
||||
platform=getattr(self, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("on_session_start hook failed: %s", exc)
|
||||
|
||||
# Store the system prompt snapshot in SQLite
|
||||
if self._session_db:
|
||||
try:
|
||||
@@ -6085,6 +6101,34 @@ class AIAgent:
|
||||
if _preflight_tokens < self.context_compressor.threshold_tokens:
|
||||
break # Under threshold
|
||||
|
||||
# Plugin hook: pre_llm_call
|
||||
# Fired once per turn before the tool-calling loop. Plugins can
|
||||
# return a dict with a ``context`` key whose value is a string
|
||||
# that will be appended to the ephemeral system prompt for every
|
||||
# API call in this turn (not persisted to session DB or cache).
|
||||
_plugin_turn_context = ""
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_pre_results = _invoke_hook(
|
||||
"pre_llm_call",
|
||||
session_id=self.session_id,
|
||||
user_message=original_user_message,
|
||||
conversation_history=list(messages),
|
||||
is_first_turn=(not bool(conversation_history)),
|
||||
model=self.model,
|
||||
platform=getattr(self, "platform", None) or "",
|
||||
)
|
||||
_ctx_parts = []
|
||||
for r in _pre_results:
|
||||
if isinstance(r, dict) and r.get("context"):
|
||||
_ctx_parts.append(str(r["context"]))
|
||||
elif isinstance(r, str) and r.strip():
|
||||
_ctx_parts.append(r)
|
||||
if _ctx_parts:
|
||||
_plugin_turn_context = "\n\n".join(_ctx_parts)
|
||||
except Exception as exc:
|
||||
logger.warning("pre_llm_call hook failed: %s", exc)
|
||||
|
||||
# Main conversation loop
|
||||
api_call_count = 0
|
||||
final_response = None
|
||||
@@ -6182,6 +6226,9 @@ class AIAgent:
|
||||
effective_system = active_system_prompt or ""
|
||||
if self.ephemeral_system_prompt:
|
||||
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
|
||||
# Plugin context from pre_llm_call hooks — ephemeral, not cached.
|
||||
if _plugin_turn_context:
|
||||
effective_system = (effective_system + "\n\n" + _plugin_turn_context).strip()
|
||||
if effective_system:
|
||||
api_messages = [{"role": "system", "content": effective_system}] + api_messages
|
||||
|
||||
@@ -7759,6 +7806,25 @@ class AIAgent:
|
||||
self._honcho_sync(original_user_message, final_response)
|
||||
self._queue_honcho_prefetch(original_user_message)
|
||||
|
||||
# Plugin hook: post_llm_call
|
||||
# Fired once per turn after the tool-calling loop completes.
|
||||
# Plugins can use this to persist conversation data (e.g. sync
|
||||
# to an external memory system).
|
||||
if final_response and not interrupted:
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"post_llm_call",
|
||||
session_id=self.session_id,
|
||||
user_message=original_user_message,
|
||||
assistant_response=final_response,
|
||||
conversation_history=list(messages),
|
||||
model=self.model,
|
||||
platform=getattr(self, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("post_llm_call hook failed: %s", exc)
|
||||
|
||||
# Extract reasoning from the last assistant message (if any)
|
||||
last_reasoning = None
|
||||
for msg in reversed(messages):
|
||||
@@ -7824,6 +7890,22 @@ class AIAgent:
|
||||
except Exception:
|
||||
pass # Background review is best-effort
|
||||
|
||||
# Plugin hook: on_session_end
|
||||
# Fired at the very end of every run_conversation call.
|
||||
# Plugins can use this for cleanup, flushing buffers, etc.
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_invoke_hook(
|
||||
"on_session_end",
|
||||
session_id=self.session_id,
|
||||
completed=completed,
|
||||
interrupted=interrupted,
|
||||
model=self.model,
|
||||
platform=getattr(self, "platform", None) or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("on_session_end hook failed: %s", exc)
|
||||
|
||||
return result
|
||||
|
||||
def chat(self, message: str, stream_callback: Optional[callable] = None) -> str:
|
||||
|
||||
@@ -226,6 +226,42 @@ class TestPluginHooks:
|
||||
# Should not raise despite 1/0
|
||||
mgr.invoke_hook("post_tool_call", tool_name="x", args={}, result="r", task_id="")
|
||||
|
||||
def test_hook_return_values_collected(self, tmp_path, monkeypatch):
|
||||
"""invoke_hook() collects non-None return values from callbacks."""
|
||||
plugins_dir = tmp_path / "hermes_test" / "plugins"
|
||||
_make_plugin_dir(
|
||||
plugins_dir, "ctx_plugin",
|
||||
register_body=(
|
||||
'ctx.register_hook("pre_llm_call", '
|
||||
'lambda **kw: {"context": "memory from plugin"})'
|
||||
),
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
|
||||
|
||||
mgr = PluginManager()
|
||||
mgr.discover_and_load()
|
||||
|
||||
results = mgr.invoke_hook("pre_llm_call", session_id="s1", user_message="hi",
|
||||
conversation_history=[], is_first_turn=True, model="test")
|
||||
assert len(results) == 1
|
||||
assert results[0] == {"context": "memory from plugin"}
|
||||
|
||||
def test_hook_none_returns_excluded(self, tmp_path, monkeypatch):
|
||||
"""invoke_hook() excludes None returns from the result list."""
|
||||
plugins_dir = tmp_path / "hermes_test" / "plugins"
|
||||
_make_plugin_dir(
|
||||
plugins_dir, "none_hook",
|
||||
register_body='ctx.register_hook("post_llm_call", lambda **kw: None)',
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
|
||||
|
||||
mgr = PluginManager()
|
||||
mgr.discover_and_load()
|
||||
|
||||
results = mgr.invoke_hook("post_llm_call", session_id="s1",
|
||||
user_message="hi", assistant_response="bye", model="test")
|
||||
assert results == []
|
||||
|
||||
def test_invalid_hook_name_warns(self, tmp_path, monkeypatch, caplog):
|
||||
"""Registering an unknown hook name logs a warning."""
|
||||
plugins_dir = tmp_path / "hermes_test" / "plugins"
|
||||
|
||||
@@ -365,16 +365,18 @@ def register(ctx):
|
||||
|
||||
Available hooks:
|
||||
|
||||
| Hook | When | Arguments |
|
||||
|------|------|-----------|
|
||||
| `pre_tool_call` | Before any tool runs | `tool_name`, `args`, `task_id` |
|
||||
| `post_tool_call` | After any tool returns | `tool_name`, `args`, `result`, `task_id` |
|
||||
| `pre_llm_call` | Before LLM API call | `messages`, `model` |
|
||||
| `post_llm_call` | After LLM response | `messages`, `response`, `model` |
|
||||
| `on_session_start` | Session begins | `session_id`, `platform` |
|
||||
| `on_session_end` | Session ends | `session_id`, `platform` |
|
||||
| Hook | When | Arguments | Return |
|
||||
|------|------|-----------|--------|
|
||||
| `pre_tool_call` | Before any tool runs | `tool_name`, `args`, `task_id` | — |
|
||||
| `post_tool_call` | After any tool returns | `tool_name`, `args`, `result`, `task_id` | — |
|
||||
| `pre_llm_call` | Once per turn, before the LLM loop | `session_id`, `user_message`, `conversation_history`, `is_first_turn`, `model`, `platform` | `{"context": "..."}` |
|
||||
| `post_llm_call` | Once per turn, after the LLM loop | `session_id`, `user_message`, `assistant_response`, `conversation_history`, `model`, `platform` | — |
|
||||
| `on_session_start` | New session created (first turn only) | `session_id`, `model`, `platform` | — |
|
||||
| `on_session_end` | End of every `run_conversation` call | `session_id`, `completed`, `interrupted`, `model`, `platform` | — |
|
||||
|
||||
Hooks are observers — they can't modify arguments or return values. If a hook crashes, it's logged and skipped; other hooks and the tool continue normally.
|
||||
Most hooks are fire-and-forget observers. The exception is `pre_llm_call`: if a callback returns a dict with a `"context"` key (or a plain string), the value is appended to the ephemeral system prompt for the current turn. This allows memory plugins to inject recalled context without touching core code.
|
||||
|
||||
If a hook crashes, it's logged and skipped; other hooks and the agent continue normally.
|
||||
|
||||
### Distribute via pip
|
||||
|
||||
|
||||
@@ -52,10 +52,10 @@ Plugins can register callbacks for these lifecycle events. See the **[Event Hook
|
||||
|------|-----------|
|
||||
| `pre_tool_call` | Before any tool executes |
|
||||
| `post_tool_call` | After any tool returns |
|
||||
| `pre_llm_call` | Before LLM API request *(planned)* |
|
||||
| `post_llm_call` | After LLM API response *(planned)* |
|
||||
| `on_session_start` | Session begins *(planned)* |
|
||||
| `on_session_end` | Session ends *(planned)* |
|
||||
| `pre_llm_call` | Once per turn, before the LLM loop — can return `{"context": "..."}` to inject into the system prompt |
|
||||
| `post_llm_call` | Once per turn, after the LLM loop completes |
|
||||
| `on_session_start` | New session created (first turn only) |
|
||||
| `on_session_end` | End of every `run_conversation` call |
|
||||
|
||||
## Slash commands
|
||||
|
||||
|
||||
Reference in New Issue
Block a user