feat(cli): add /usage command to display session token usage

Introduced a new command "/usage" in the CLI to show cumulative token usage for the current session. This includes details on prompt tokens, completion tokens, total tokens, API calls, and context state. Updated command documentation to reflect this addition. Enhanced the AIAgent class to track token usage throughout the session.
This commit is contained in:
teknium1
2026-03-01 00:23:19 -08:00
parent 30efc263ff
commit 177be32b7f
3 changed files with 50 additions and 0 deletions

38
cli.py
View File

@@ -1724,6 +1724,8 @@ class HermesCLI:
self._toggle_verbose()
elif cmd_lower == "/compress":
self._manual_compress()
elif cmd_lower == "/usage":
self._show_usage()
else:
# Check for skill slash commands (/gif-search, /axolotl, etc.)
base_cmd = cmd_lower.split()[0]
@@ -1800,6 +1802,42 @@ class HermesCLI:
except Exception as e:
print(f" ❌ Compression failed: {e}")
def _show_usage(self):
"""Show cumulative token usage for the current session."""
if not self.agent:
print("(._.) No active agent -- send a message first.")
return
agent = self.agent
prompt = agent.session_prompt_tokens
completion = agent.session_completion_tokens
total = agent.session_total_tokens
calls = agent.session_api_calls
if calls == 0:
print("(._.) No API calls made yet in this session.")
return
# Current context window state
compressor = agent.context_compressor
last_prompt = compressor.last_prompt_tokens
ctx_len = compressor.context_length
pct = (last_prompt / ctx_len * 100) if ctx_len else 0
compressions = compressor.compression_count
msg_count = len(self.conversation_history)
print(f" 📊 Session Token Usage")
print(f" {'' * 40}")
print(f" Prompt tokens (input): {prompt:>10,}")
print(f" Completion tokens (output): {completion:>9,}")
print(f" Total tokens: {total:>10,}")
print(f" API calls: {calls:>10,}")
print(f" {'' * 40}")
print(f" Current context: {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
print(f" Messages: {msg_count}")
print(f" Compressions: {compressions}")
if self.verbose:
logging.getLogger().setLevel(logging.DEBUG)
for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):

View File

@@ -27,6 +27,7 @@ COMMANDS = {
"/platforms": "Show gateway/messaging platform status",
"/verbose": "Cycle tool progress display: off → new → all → verbose",
"/compress": "Manually compress conversation context (flush memories + summarize)",
"/usage": "Show token usage for the current session",
"/quit": "Exit the CLI (also: /exit, /q)",
}

View File

@@ -535,6 +535,12 @@ class AIAgent:
)
self.compression_enabled = compression_enabled
self._user_turn_count = 0
# Cumulative token usage for the session
self.session_prompt_tokens = 0
self.session_completion_tokens = 0
self.session_total_tokens = 0
self.session_api_calls = 0
if not self.quiet_mode:
if compression_enabled:
@@ -3105,6 +3111,11 @@ class AIAgent:
"total_tokens": total_tokens,
}
self.context_compressor.update_from_response(usage_dict)
self.session_prompt_tokens += prompt_tokens
self.session_completion_tokens += completion_tokens
self.session_total_tokens += total_tokens
self.session_api_calls += 1
if self.verbose_logging:
logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")