feat(cli): add /usage command to display session token usage
Introduced a new command "/usage" in the CLI to show cumulative token usage for the current session. This includes details on prompt tokens, completion tokens, total tokens, API calls, and context state. Updated command documentation to reflect this addition. Enhanced the AIAgent class to track token usage throughout the session.
This commit is contained in:
38
cli.py
38
cli.py
@@ -1724,6 +1724,8 @@ class HermesCLI:
|
||||
self._toggle_verbose()
|
||||
elif cmd_lower == "/compress":
|
||||
self._manual_compress()
|
||||
elif cmd_lower == "/usage":
|
||||
self._show_usage()
|
||||
else:
|
||||
# Check for skill slash commands (/gif-search, /axolotl, etc.)
|
||||
base_cmd = cmd_lower.split()[0]
|
||||
@@ -1800,6 +1802,42 @@ class HermesCLI:
|
||||
except Exception as e:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
|
||||
def _show_usage(self):
|
||||
"""Show cumulative token usage for the current session."""
|
||||
if not self.agent:
|
||||
print("(._.) No active agent -- send a message first.")
|
||||
return
|
||||
|
||||
agent = self.agent
|
||||
prompt = agent.session_prompt_tokens
|
||||
completion = agent.session_completion_tokens
|
||||
total = agent.session_total_tokens
|
||||
calls = agent.session_api_calls
|
||||
|
||||
if calls == 0:
|
||||
print("(._.) No API calls made yet in this session.")
|
||||
return
|
||||
|
||||
# Current context window state
|
||||
compressor = agent.context_compressor
|
||||
last_prompt = compressor.last_prompt_tokens
|
||||
ctx_len = compressor.context_length
|
||||
pct = (last_prompt / ctx_len * 100) if ctx_len else 0
|
||||
compressions = compressor.compression_count
|
||||
|
||||
msg_count = len(self.conversation_history)
|
||||
|
||||
print(f" 📊 Session Token Usage")
|
||||
print(f" {'─' * 40}")
|
||||
print(f" Prompt tokens (input): {prompt:>10,}")
|
||||
print(f" Completion tokens (output): {completion:>9,}")
|
||||
print(f" Total tokens: {total:>10,}")
|
||||
print(f" API calls: {calls:>10,}")
|
||||
print(f" {'─' * 40}")
|
||||
print(f" Current context: {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
|
||||
print(f" Messages: {msg_count}")
|
||||
print(f" Compressions: {compressions}")
|
||||
|
||||
if self.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
|
||||
|
||||
@@ -27,6 +27,7 @@ COMMANDS = {
|
||||
"/platforms": "Show gateway/messaging platform status",
|
||||
"/verbose": "Cycle tool progress display: off → new → all → verbose",
|
||||
"/compress": "Manually compress conversation context (flush memories + summarize)",
|
||||
"/usage": "Show token usage for the current session",
|
||||
"/quit": "Exit the CLI (also: /exit, /q)",
|
||||
}
|
||||
|
||||
|
||||
11
run_agent.py
11
run_agent.py
@@ -535,6 +535,12 @@ class AIAgent:
|
||||
)
|
||||
self.compression_enabled = compression_enabled
|
||||
self._user_turn_count = 0
|
||||
|
||||
# Cumulative token usage for the session
|
||||
self.session_prompt_tokens = 0
|
||||
self.session_completion_tokens = 0
|
||||
self.session_total_tokens = 0
|
||||
self.session_api_calls = 0
|
||||
|
||||
if not self.quiet_mode:
|
||||
if compression_enabled:
|
||||
@@ -3105,6 +3111,11 @@ class AIAgent:
|
||||
"total_tokens": total_tokens,
|
||||
}
|
||||
self.context_compressor.update_from_response(usage_dict)
|
||||
|
||||
self.session_prompt_tokens += prompt_tokens
|
||||
self.session_completion_tokens += completion_tokens
|
||||
self.session_total_tokens += total_tokens
|
||||
self.session_api_calls += 1
|
||||
|
||||
if self.verbose_logging:
|
||||
logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
|
||||
|
||||
Reference in New Issue
Block a user