diff --git a/cli.py b/cli.py index 7f2b160b1..2081c7aae 100755 --- a/cli.py +++ b/cli.py @@ -1724,6 +1724,8 @@ class HermesCLI: self._toggle_verbose() elif cmd_lower == "/compress": self._manual_compress() + elif cmd_lower == "/usage": + self._show_usage() else: # Check for skill slash commands (/gif-search, /axolotl, etc.) base_cmd = cmd_lower.split()[0] @@ -1800,6 +1802,42 @@ class HermesCLI: except Exception as e: print(f" ❌ Compression failed: {e}") + def _show_usage(self): + """Show cumulative token usage for the current session.""" + if not self.agent: + print("(._.) No active agent -- send a message first.") + return + + agent = self.agent + prompt = agent.session_prompt_tokens + completion = agent.session_completion_tokens + total = agent.session_total_tokens + calls = agent.session_api_calls + + if calls == 0: + print("(._.) No API calls made yet in this session.") + return + + # Current context window state + compressor = agent.context_compressor + last_prompt = compressor.last_prompt_tokens + ctx_len = compressor.context_length + pct = (last_prompt / ctx_len * 100) if ctx_len else 0 + compressions = compressor.compression_count + + msg_count = len(self.conversation_history) + + print(f" 📊 Session Token Usage") + print(f" {'─' * 40}") + print(f" Prompt tokens (input): {prompt:>10,}") + print(f" Completion tokens (output): {completion:>9,}") + print(f" Total tokens: {total:>10,}") + print(f" API calls: {calls:>10,}") + print(f" {'─' * 40}") + print(f" Current context: {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)") + print(f" Messages: {msg_count}") + print(f" Compressions: {compressions}") + if self.verbose: logging.getLogger().setLevel(logging.DEBUG) for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'): diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 5de1c6bc6..b091a7905 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -27,6 +27,7 @@ COMMANDS = { "/platforms": "Show gateway/messaging platform status", "/verbose": "Cycle tool progress display: off → new → all → verbose", "/compress": "Manually compress conversation context (flush memories + summarize)", + "/usage": "Show token usage for the current session", "/quit": "Exit the CLI (also: /exit, /q)", } diff --git a/run_agent.py b/run_agent.py index 32b69489f..65dd3c2f0 100644 --- a/run_agent.py +++ b/run_agent.py @@ -535,6 +535,12 @@ class AIAgent: ) self.compression_enabled = compression_enabled self._user_turn_count = 0 + + # Cumulative token usage for the session + self.session_prompt_tokens = 0 + self.session_completion_tokens = 0 + self.session_total_tokens = 0 + self.session_api_calls = 0 if not self.quiet_mode: if compression_enabled: @@ -3105,6 +3111,11 @@ class AIAgent: "total_tokens": total_tokens, } self.context_compressor.update_from_response(usage_dict) + + self.session_prompt_tokens += prompt_tokens + self.session_completion_tokens += completion_tokens + self.session_total_tokens += total_tokens + self.session_api_calls += 1 if self.verbose_logging: logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")