From 30efc263ffca8a67166445a56128c5b604e29ba0 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 1 Mar 2026 00:16:38 -0800
Subject: [PATCH] feat(cli): add /compress command for manual conversation
 context compression

Introduced a new command "/compress" to the CLI, allowing users to manually trigger context compression on the current conversation. The method checks for sufficient conversation history and active agent status before performing compression, providing feedback on the number of messages and tokens before and after the operation. Updated command documentation accordingly.
---
 cli.py                 | 37 +++++++++++++++++++++++++++++++++++++
 hermes_cli/commands.py |  1 +
 2 files changed, 38 insertions(+)

diff --git a/cli.py b/cli.py
index 16ce554e..7f2b160b 100755
--- a/cli.py
+++ b/cli.py
@@ -1722,6 +1722,8 @@ class HermesCLI:
             self._show_gateway_status()
         elif cmd_lower == "/verbose":
             self._toggle_verbose()
+        elif cmd_lower == "/compress":
+            self._manual_compress()
         else:
             # Check for skill slash commands (/gif-search, /axolotl, etc.)
             base_cmd = cmd_lower.split()[0]
@@ -1763,6 +1765,41 @@ class HermesCLI:
         }
         self.console.print(labels.get(self.tool_progress_mode, ""))
 
+    def _manual_compress(self):
+        """Manually trigger context compression on the current conversation."""
+        if not self.conversation_history or len(self.conversation_history) < 4:
+            print("(._.) Not enough conversation to compress (need at least 4 messages).")
+            return
+
+        if not self.agent:
+            print("(._.) No active agent -- send a message first.")
+            return
+
+        if not self.agent.compression_enabled:
+            print("(._.) Compression is disabled in config.")
+            return
+
+        original_count = len(self.conversation_history)
+        try:
+            from agent.model_metadata import estimate_messages_tokens_rough
+            approx_tokens = estimate_messages_tokens_rough(self.conversation_history)
+            print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
+
+            compressed, new_system = self.agent._compress_context(
+                self.conversation_history,
+                self.agent._cached_system_prompt or "",
+                approx_tokens=approx_tokens,
+            )
+            self.conversation_history = compressed
+            new_count = len(self.conversation_history)
+            new_tokens = estimate_messages_tokens_rough(self.conversation_history)
+            print(
+                f"  ✅ Compressed: {original_count} → {new_count} messages "
+                f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
+            )
+        except Exception as e:
+            print(f"  ❌ Compression failed: {e}")
+
         if self.verbose:
             logging.getLogger().setLevel(logging.DEBUG)
             for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index b7e5a621..5de1c6bc 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -26,6 +26,7 @@ COMMANDS = {
     "/skills": "Search, install, inspect, or manage skills from online registries",
     "/platforms": "Show gateway/messaging platform status",
     "/verbose": "Cycle tool progress display: off → new → all → verbose",
+    "/compress": "Manually compress conversation context (flush memories + summarize)",
     "/quit": "Exit the CLI (also: /exit, /q)",
 }