feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.
This commit is contained in:
teknium1
2026-02-22 10:15:17 -08:00
parent df2ec585f1
commit 3c6750f37b
3 changed files with 147 additions and 0 deletions

View File

@@ -169,6 +169,15 @@ memory:
memory_char_limit: 2200 # ~800 tokens
user_char_limit: 1375 # ~500 tokens
# Periodic memory nudge: remind the agent to consider saving memories
# every N user turns. Set to 0 to disable. Only active when memory is enabled.
nudge_interval: 10 # Nudge every 10 user turns (0 = disabled)
# Memory flush: give the agent one turn to save memories before context is
# lost (compression, /new, /reset, exit). Set to 0 to disable.
# For exit/reset, only fires if the session had at least this many user turns.
flush_min_turns: 6 # Min user turns to trigger flush on exit/reset (0 = disabled)
# =============================================================================
# Agent Behavior
# =============================================================================

17
cli.py
View File

@@ -1088,6 +1088,11 @@ class HermesCLI:
def reset_conversation(self):
"""Reset the conversation history."""
if self.agent and self.conversation_history:
try:
self.agent.flush_memories(self.conversation_history)
except Exception:
pass
self.conversation_history = []
print("(^_^)b Conversation reset!")
@@ -1482,6 +1487,12 @@ class HermesCLI:
elif cmd_lower == "/config":
self.show_config()
elif cmd_lower == "/clear":
# Flush memories before clearing
if self.agent and self.conversation_history:
try:
self.agent.flush_memories(self.conversation_history)
except Exception:
pass
# Clear terminal screen using Rich (portable, no shell needed)
self.console.clear()
# Reset conversation
@@ -2452,6 +2463,12 @@ class HermesCLI:
pass
finally:
self._should_exit = True
# Flush memories before exit (only for substantial conversations)
if self.agent and self.conversation_history:
try:
self.agent.flush_memories(self.conversation_history)
except Exception:
pass
# Unregister terminal_tool callbacks to avoid dangling references
set_sudo_password_callback(None)
set_approval_callback(None)

View File

@@ -373,12 +373,16 @@ class AIAgent:
self._memory_store = None
self._memory_enabled = False
self._user_profile_enabled = False
self._memory_nudge_interval = 10
self._memory_flush_min_turns = 6
if not skip_memory:
try:
from hermes_cli.config import load_config as _load_mem_config
mem_config = _load_mem_config().get("memory", {})
self._memory_enabled = mem_config.get("memory_enabled", False)
self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
if self._memory_enabled or self._user_profile_enabled:
from tools.memory_tool import MemoryStore
self._memory_store = MemoryStore(
@@ -404,6 +408,7 @@ class AIAgent:
quiet_mode=self.quiet_mode,
)
self.compression_enabled = compression_enabled
self._user_turn_count = 0
if not self.quiet_mode:
if compression_enabled:
@@ -1195,12 +1200,114 @@ class AIAgent:
return msg
def flush_memories(self, messages: list = None, min_turns: int = None):
"""Give the model one turn to persist memories before context is lost.
Called before compression, session reset, or CLI exit. Injects a flush
message, makes one API call, executes any memory tool calls, then
strips all flush artifacts from the message list.
Args:
messages: The current conversation messages. If None, uses
self._session_messages (last run_conversation state).
min_turns: Minimum user turns required to trigger the flush.
None = use config value (flush_min_turns).
0 = always flush (used for compression).
"""
if self._memory_flush_min_turns == 0 and min_turns is None:
return
if "memory" not in self.valid_tool_names or not self._memory_store:
return
effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
if self._user_turn_count < effective_min:
return
if messages is None:
messages = getattr(self, '_session_messages', None)
if not messages or len(messages) < 3:
return
flush_content = (
"[System: The session is being compressed. "
"Please save anything worth remembering to your memories.]"
)
flush_msg = {"role": "user", "content": flush_content}
messages.append(flush_msg)
try:
# Build API messages for the flush call
api_messages = []
for msg in messages:
api_msg = msg.copy()
if msg.get("role") == "assistant":
reasoning = msg.get("reasoning")
if reasoning:
api_msg["reasoning_content"] = reasoning
api_msg.pop("reasoning", None)
api_messages.append(api_msg)
if self._cached_system_prompt:
api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
# Make one API call with only the memory tool available
memory_tool_def = None
for t in (self.tools or []):
if t.get("function", {}).get("name") == "memory":
memory_tool_def = t
break
if not memory_tool_def:
messages.pop() # remove flush msg
return
api_kwargs = {
"model": self.model,
"messages": api_messages,
"tools": [memory_tool_def],
"temperature": 0.3,
"max_tokens": 1024,
}
response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
if response.choices:
assistant_message = response.choices[0].message
if assistant_message.tool_calls:
# Execute only memory tool calls
for tc in assistant_message.tool_calls:
if tc.function.name == "memory":
try:
args = json.loads(tc.function.arguments)
from tools.memory_tool import memory_tool as _memory_tool
result = _memory_tool(
action=args.get("action"),
target=args.get("target", "memory"),
content=args.get("content"),
old_text=args.get("old_text"),
store=self._memory_store,
)
if not self.quiet_mode:
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
except Exception as e:
logger.debug("Memory flush tool call failed: %s", e)
except Exception as e:
logger.debug("Memory flush API call failed: %s", e)
finally:
# Strip flush artifacts: remove everything from the flush message onward
while messages and messages[-1] is not flush_msg and len(messages) > 0:
messages.pop()
if messages and messages[-1] is flush_msg:
messages.pop()
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None) -> tuple:
"""Compress conversation context and split the session in SQLite.
Returns:
(compressed_messages, new_system_prompt) tuple
"""
# Pre-compression memory flush: let the model save memories before they're lost
self.flush_memories(messages, min_turns=0)
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
todo_snapshot = self._todo_store.format_for_injection()
@@ -1489,6 +1596,20 @@ class AIAgent:
for prefill_msg in self.prefill_messages:
messages.append(prefill_msg.copy())
# Track user turns for memory flush and periodic nudge logic
self._user_turn_count += 1
# Periodic memory nudge: remind the model to consider saving memories
if (self._memory_nudge_interval > 0
and self._user_turn_count % self._memory_nudge_interval == 0
and self._user_turn_count > 0
and "memory" in self.valid_tool_names
and self._memory_store):
user_message += (
"\n\n[System: You've had several exchanges in this session. "
"Consider whether there's anything worth saving to your memories.]"
)
# Add user message
messages.append({
"role": "user",