feat(cli): add /compress command for manual conversation context compression

Introduced a new command "/compress" to the CLI, allowing users to manually trigger context compression on the current conversation. The method checks for sufficient conversation history and active agent status before performing compression, providing feedback on the number of messages and tokens before and after the operation. Updated command documentation accordingly.
2026-03-01 00:16:38 -08:00
parent 41d8a80226
commit 30efc263ff
2 changed files with 38 additions and 0 deletions
--- a/cli.py
+++ b/cli.py
@@ -1722,6 +1722,8 @@ class HermesCLI:
            self._show_gateway_status()
        elif cmd_lower == "/verbose":
            self._toggle_verbose()
        elif cmd_lower == "/compress":
            self._manual_compress()
        else:
            # Check for skill slash commands (/gif-search, /axolotl, etc.)
            base_cmd = cmd_lower.split()[0]
@@ -1763,6 +1765,41 @@ class HermesCLI:
        }
        self.console.print(labels.get(self.tool_progress_mode, ""))
    def _manual_compress(self):
        """Manually trigger context compression on the current conversation."""
        if not self.conversation_history or len(self.conversation_history) < 4:
            print("(._.) Not enough conversation to compress (need at least 4 messages).")
            return
        if not self.agent:
            print("(._.) No active agent -- send a message first.")
            return
        if not self.agent.compression_enabled:
            print("(._.) Compression is disabled in config.")
            return
        original_count = len(self.conversation_history)
        try:
            from agent.model_metadata import estimate_messages_tokens_rough
            approx_tokens = estimate_messages_tokens_rough(self.conversation_history)
            print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
            compressed, new_system = self.agent._compress_context(
                self.conversation_history,
                self.agent._cached_system_prompt or "",
                approx_tokens=approx_tokens,
            )
            self.conversation_history = compressed
            new_count = len(self.conversation_history)
            new_tokens = estimate_messages_tokens_rough(self.conversation_history)
            print(
                f"  ✅ Compressed: {original_count} → {new_count} messages "
                f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
            )
        except Exception as e:
            print(f"  ❌ Compression failed: {e}")
        if self.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
            for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -26,6 +26,7 @@ COMMANDS = {
    "/skills": "Search, install, inspect, or manage skills from online registries",
    "/platforms": "Show gateway/messaging platform status",
    "/verbose": "Cycle tool progress display: off → new → all → verbose",
    "/compress": "Manually compress conversation context (flush memories + summarize)",
    "/quit": "Exit the CLI (also: /exit, /q)",
 }