feat: introduce clarifying questions tool for interactive user engagement

- Added a new `clarify_tool` to enable the agent to ask structured multiple-choice or open-ended questions to users.
- Implemented callback functionality for user interaction, allowing the platform to handle UI presentation.
- Updated the CLI and agent to support clarify questions, including timeout handling and response management.
- Enhanced toolset definitions and requirements to include the clarify tool, ensuring availability across platforms.
This commit is contained in:
teknium1
2026-02-19 20:06:14 -08:00
parent 997f793af1
commit 9350e26e68
6 changed files with 386 additions and 2 deletions

194
cli.py
View File

@@ -32,7 +32,8 @@ from prompt_toolkit.history import FileHistory
from prompt_toolkit.styles import Style as PTStyle
from prompt_toolkit.patch_stdout import patch_stdout
from prompt_toolkit.application import Application
from prompt_toolkit.layout import Layout, HSplit, Window, FormattedTextControl
from prompt_toolkit.layout import Layout, HSplit, Window, FormattedTextControl, ConditionalContainer
from prompt_toolkit.filters import Condition
from prompt_toolkit.layout.dimension import Dimension
from prompt_toolkit.layout.menus import CompletionsMenu
from prompt_toolkit.widgets import TextArea
@@ -716,6 +717,7 @@ class HermesCLI:
# Agent will be initialized on first use
self.agent: Optional[AIAgent] = None
self._app = None # prompt_toolkit Application (set in run())
# Conversation state
self.conversation_history: List[Dict[str, Any]] = []
@@ -761,6 +763,7 @@ class HermesCLI:
session_id=self.session_id, # Pass CLI's session ID to agent
platform="cli", # CLI interface — agent uses terminal-friendly formatting
session_db=self._session_db,
clarify_callback=self._clarify_callback,
)
return True
except Exception as e:
@@ -1443,6 +1446,51 @@ class HermesCLI:
return True
# How long to wait for the user to answer a clarify question before
# the agent auto-proceeds with its own judgment (seconds).
CLARIFY_TIMEOUT = 120
def _clarify_callback(self, question, choices):
"""
Platform callback for the clarify tool. Called from the agent thread.
Sets up the interactive selection UI (or freetext prompt for open-ended
questions), then blocks until the user responds via the prompt_toolkit
key bindings. If no response arrives within CLARIFY_TIMEOUT seconds the
question is dismissed and the agent is told to decide on its own.
"""
response_queue = queue.Queue()
is_open_ended = not choices or len(choices) == 0
self._clarify_state = {
"question": question,
"choices": choices if not is_open_ended else [],
"selected": 0,
"response_queue": response_queue,
}
# Open-ended questions skip straight to freetext input
self._clarify_freetext = is_open_ended
# Trigger prompt_toolkit repaint from this (non-main) thread
if hasattr(self, '_app') and self._app:
self._app.invalidate()
# Block until the user answers, or time out so automated /
# unattended sessions aren't stuck forever.
try:
return response_queue.get(timeout=self.CLARIFY_TIMEOUT)
except queue.Empty:
# Timed out — tear down the UI and let the agent decide
self._clarify_state = None
self._clarify_freetext = False
if hasattr(self, '_app') and self._app:
self._app.invalidate()
_cprint(f"\n{_DIM}(clarify timed out after {self.CLARIFY_TIMEOUT}s — agent will decide){_RST}")
return (
"The user did not provide a response within the time limit. "
"Use your best judgement to make the choice and proceed."
)
def chat(self, message: str) -> Optional[str]:
"""
Send a message to the agent and get a response.
@@ -1487,12 +1535,20 @@ class HermesCLI:
# Monitor the dedicated interrupt queue while the agent runs.
# _interrupt_queue is separate from _pending_input, so process_loop
# and chat() never compete for the same queue.
# When a clarify question is active, user input is handled entirely
# by the Enter key binding (routed to the clarify response queue),
# so we skip interrupt processing to avoid stealing that input.
interrupt_msg = None
while agent_thread.is_alive():
if hasattr(self, '_interrupt_queue'):
try:
interrupt_msg = self._interrupt_queue.get(timeout=0.1)
if interrupt_msg:
# If clarify is active, the Enter handler routes
# input directly; this queue shouldn't have anything.
# But if it does (race condition), don't interrupt.
if self._clarify_state or self._clarify_freetext:
continue
print(f"\n⚡ New message detected, interrupting...")
self.agent.interrupt(interrupt_msg)
break
@@ -1566,6 +1622,12 @@ class HermesCLI:
self._interrupt_queue = queue.Queue() # For messages typed while agent is running
self._should_exit = False
self._last_ctrl_c_time = 0 # Track double Ctrl+C for force exit
# Clarify tool state: interactive question/answer with the user.
# When the agent calls the clarify tool, _clarify_state is set and
# the prompt_toolkit UI switches to a selection mode.
self._clarify_state = None # dict with question, choices, selected, response_queue
self._clarify_freetext = False # True when user chose "Other" and is typing
# Key bindings for the input area
kb = KeyBindings()
@@ -1575,11 +1637,40 @@ class HermesCLI:
"""Handle Enter key - submit input.
Routes to the correct queue based on agent state:
- Clarify freetext mode: answer goes to the clarify response queue
- Clarify choice mode: selected choice goes to the clarify response queue
- Agent running: goes to _interrupt_queue (chat() monitors this)
- Agent idle: goes to _pending_input (process_loop monitors this)
Commands (starting with /) always go to _pending_input so they're
handled as commands, not sent as interrupt text to the agent.
"""
# --- Clarify freetext mode: user typed their own answer ---
if self._clarify_freetext and self._clarify_state:
text = event.app.current_buffer.text.strip()
if text:
self._clarify_state["response_queue"].put(text)
self._clarify_state = None
self._clarify_freetext = False
event.app.current_buffer.reset()
event.app.invalidate()
return
# --- Clarify choice mode: confirm the highlighted selection ---
if self._clarify_state and not self._clarify_freetext:
state = self._clarify_state
selected = state["selected"]
choices = state.get("choices") or []
if selected < len(choices):
state["response_queue"].put(choices[selected])
self._clarify_state = None
event.app.invalidate()
else:
# "Other" selected → switch to freetext
self._clarify_freetext = True
event.app.invalidate()
return
# --- Normal input routing ---
text = event.app.current_buffer.text.strip()
if text:
if self._agent_running and not text.startswith("/"):
@@ -1597,6 +1688,24 @@ class HermesCLI:
def handle_ctrl_enter(event):
"""Ctrl+Enter (c-j) inserts a newline. Most terminals send c-j for Ctrl+Enter."""
event.current_buffer.insert_text('\n')
# --- Clarify tool: arrow-key navigation for multiple-choice questions ---
@kb.add('up', filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))
def clarify_up(event):
"""Move selection up in clarify choices."""
if self._clarify_state:
self._clarify_state["selected"] = max(0, self._clarify_state["selected"] - 1)
event.app.invalidate()
@kb.add('down', filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))
def clarify_down(event):
"""Move selection down in clarify choices."""
if self._clarify_state:
choices = self._clarify_state.get("choices") or []
max_idx = len(choices) # last index is the "Other" option
self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1)
event.app.invalidate()
@kb.add('c-c')
def handle_ctrl_c(event):
@@ -1631,10 +1740,15 @@ class HermesCLI:
self._should_exit = True
event.app.exit()
# Dynamic prompt: shows Hermes symbol when agent is working
# Dynamic prompt: shows Hermes symbol when agent is working,
# or answer prompt when clarify freetext mode is active.
cli_ref = self
def get_prompt():
if cli_ref._clarify_freetext:
return [('class:clarify-selected', ' ')]
if cli_ref._clarify_state:
return [('class:prompt-working', '? ')]
if cli_ref._agent_running:
return [('class:prompt-working', ' ')]
return [('class:prompt', ' ')]
@@ -1691,18 +1805,83 @@ class HermesCLI:
def get_hint_text():
if not cli_ref._agent_running:
return []
# When clarify is active, show a different hint
if cli_ref._clarify_state:
if cli_ref._clarify_freetext:
return [('class:hint', ' type your answer and press Enter')]
return [('class:hint', ' ↑/↓ to select, Enter to confirm')]
buf = input_area.buffer
if buf.text:
return []
return [('class:hint', ' type here to interrupt')]
def get_hint_height():
if cli_ref._clarify_state:
return 1
return 1 if cli_ref._agent_running else 0
spacer = Window(
content=FormattedTextControl(get_hint_text),
height=get_hint_height,
)
# --- Clarify tool: dynamic display widget for questions + choices ---
def _get_clarify_display():
"""Build styled text for the clarify question/choices panel."""
state = cli_ref._clarify_state
if not state:
return []
question = state["question"]
choices = state.get("choices") or []
selected = state.get("selected", 0)
lines = []
# Box top border
lines.append(('class:clarify-border', '╭─ '))
lines.append(('class:clarify-title', 'Hermes needs your input'))
lines.append(('class:clarify-border', ' ─────────────────────────────╮\n'))
lines.append(('class:clarify-border', '\n'))
# Question text
lines.append(('class:clarify-border', ''))
lines.append(('class:clarify-question', question))
lines.append(('', '\n'))
lines.append(('class:clarify-border', '\n'))
if choices:
# Multiple-choice mode: show selectable options
for i, choice in enumerate(choices):
lines.append(('class:clarify-border', ''))
if i == selected and not cli_ref._clarify_freetext:
lines.append(('class:clarify-selected', f' {choice}'))
else:
lines.append(('class:clarify-choice', f' {choice}'))
lines.append(('', '\n'))
# "Other" option (5th line, only shown when choices exist)
other_idx = len(choices)
lines.append(('class:clarify-border', ''))
if selected == other_idx and not cli_ref._clarify_freetext:
lines.append(('class:clarify-selected', ' Other (type your answer)'))
elif cli_ref._clarify_freetext:
lines.append(('class:clarify-active-other', ' Other (type below)'))
else:
lines.append(('class:clarify-choice', ' Other (type your answer)'))
lines.append(('', '\n'))
lines.append(('class:clarify-border', '\n'))
lines.append(('class:clarify-border', '╰──────────────────────────────────────────────────╯\n'))
return lines
clarify_widget = ConditionalContainer(
Window(
FormattedTextControl(_get_clarify_display),
wrap_lines=True,
),
filter=Condition(lambda: cli_ref._clarify_state is not None),
)
# Horizontal rules above and below the input (bronze, 1 line each).
# The bottom rule moves down as the TextArea grows with newlines.
@@ -1720,9 +1899,12 @@ class HermesCLI:
# after agent output has filled the terminal via patch_stdout. Float-based
# menus lose their rendering space in non-full-screen mode once scrollback
# pushes the app area to the very bottom of the terminal.
# The clarify_widget appears above the input area when the agent
# asks a multiple-choice or open-ended question.
layout = Layout(
HSplit([
Window(height=0),
clarify_widget,
spacer,
input_rule_top,
input_area,
@@ -1744,6 +1926,13 @@ class HermesCLI:
'completion-menu.completion.current': 'bg:#333355 #FFD700',
'completion-menu.meta.completion': 'bg:#1a1a2e #888888',
'completion-menu.meta.completion.current': 'bg:#333355 #FFBF00',
# Clarify question panel
'clarify-border': '#CD7F32',
'clarify-title': '#FFD700 bold',
'clarify-question': '#FFF8DC bold',
'clarify-choice': '#AAAAAA',
'clarify-selected': '#FFD700 bold',
'clarify-active-other': '#FFD700 italic',
})
# Create the application
@@ -1754,6 +1943,7 @@ class HermesCLI:
full_screen=False,
mouse_support=False,
)
self._app = app # Store reference for clarify_callback
# Background thread to process inputs and run agent
def process_loop():

View File

@@ -93,6 +93,8 @@ from tools.todo_tool import todo_tool, check_todo_requirements, TODO_SCHEMA
from tools.memory_tool import memory_tool, check_memory_requirements, MEMORY_SCHEMA
# Session search tool (past conversation recall with summarization)
from tools.session_search_tool import session_search, check_session_search_requirements, SESSION_SEARCH_SCHEMA
# Clarifying questions tool
from tools.clarify_tool import clarify_tool, check_clarify_requirements, CLARIFY_SCHEMA
from toolsets import (
get_toolset, resolve_toolset, resolve_multiple_toolsets,
get_all_toolsets, get_toolset_names, validate_toolset,
@@ -203,6 +205,13 @@ TOOLSET_REQUIREMENTS = {
"setup_url": "https://openrouter.ai/keys",
"tools": ["session_search"],
},
"clarify": {
"name": "Clarifying Questions",
"env_vars": [], # Pure UI interaction, no external deps
"check_fn": check_clarify_requirements,
"setup_url": None,
"tools": ["clarify"],
},
}
@@ -986,6 +995,16 @@ def get_session_search_tool_definitions() -> List[Dict[str, Any]]:
return [{"type": "function", "function": SESSION_SEARCH_SCHEMA}]
def get_clarify_tool_definitions() -> List[Dict[str, Any]]:
"""
Get tool definitions for the clarifying questions tool.
Returns:
List[Dict]: List containing the clarify tool definition compatible with OpenAI API
"""
return [{"type": "function", "function": CLARIFY_SCHEMA}]
def get_send_message_tool_definitions():
"""Tool definitions for cross-channel messaging."""
return [
@@ -1151,6 +1170,10 @@ def get_all_tool_names() -> List[str]:
if check_session_search_requirements():
tool_names.extend(["session_search"])
# Clarifying questions (always available)
if check_clarify_requirements():
tool_names.extend(["clarify"])
# Cross-channel messaging (always available on messaging platforms)
tool_names.extend(["send_message"])
@@ -1336,6 +1359,11 @@ def get_tool_definitions(
for tool in get_session_search_tool_definitions():
all_available_tools_map[tool["function"]["name"]] = tool
# Clarifying questions tool
if check_clarify_requirements():
for tool in get_clarify_tool_definitions():
all_available_tools_map[tool["function"]["name"]] = tool
# Cross-channel messaging (always available on messaging platforms)
for tool in get_send_message_tool_definitions():
all_available_tools_map[tool["function"]["name"]] = tool
@@ -2333,6 +2361,12 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
"tools": ["session_search"],
"description": "Session history search: FTS5 search + Gemini Flash summarization of past conversations",
"requirements": ["OPENROUTER_API_KEY", "~/.hermes/state.db"]
},
"clarify_tools": {
"available": check_clarify_requirements(),
"tools": ["clarify"],
"description": "Clarifying questions: ask the user multiple-choice or open-ended questions",
"requirements": []
}
}

View File

@@ -1074,6 +1074,7 @@ class AIAgent:
provider_sort: str = None,
session_id: str = None,
tool_progress_callback: callable = None,
clarify_callback: callable = None,
max_tokens: int = None,
reasoning_config: Dict[str, Any] = None,
prefill_messages: List[Dict[str, Any]] = None,
@@ -1105,6 +1106,8 @@ class AIAgent:
provider_sort (str): Sort providers by price/throughput/latency (optional)
session_id (str): Pre-generated session ID for logging (optional, auto-generated if not provided)
tool_progress_callback (callable): Callback function(tool_name, args_preview) for progress notifications
clarify_callback (callable): Callback function(question, choices) -> str for interactive user questions.
Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error.
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking).
If None, defaults to {"enabled": True, "effort": "xhigh"} for OpenRouter. Set to disable/customize reasoning.
@@ -1132,6 +1135,7 @@ class AIAgent:
# When no base_url is provided, the client defaults to OpenRouter, so reflect that here.
self.base_url = base_url or "https://openrouter.ai/api/v1"
self.tool_progress_callback = tool_progress_callback
self.clarify_callback = clarify_callback
self._last_reported_tool = None # Track for "new tool" mode
# Interrupt mechanism for breaking out of tool loops
@@ -2936,6 +2940,17 @@ class AIAgent:
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
print(f" {self._get_cute_tool_message('memory', function_args, tool_duration)}")
# Clarify tool -- delegates to platform-provided callback
elif function_name == "clarify":
from tools.clarify_tool import clarify_tool as _clarify_tool
function_result = _clarify_tool(
question=function_args.get("question", ""),
choices=function_args.get("choices"),
callback=self.clarify_callback,
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
print(f" {self._get_cute_tool_message('clarify', function_args, tool_duration)}")
# Execute other tools - with animated kawaii spinner in quiet mode
# The face is "alive" while the tool works, then vanishes
# and is replaced by the clean result line.
@@ -2955,6 +2970,7 @@ class AIAgent:
'skills_list': '📚', 'skill_view': '📚',
'schedule_cronjob': '', 'list_cronjobs': '', 'remove_cronjob': '',
'send_message': '📨', 'todo': '📋', 'memory': '🧠', 'session_search': '🔍',
'clarify': '',
}
emoji = tool_emoji_map.get(function_name, '')
preview = _build_tool_preview(function_name, function_args) or function_name

View File

@@ -142,6 +142,13 @@ from .todo_tool import (
TodoStore,
)
# Clarifying questions tool (interactive Q&A with the user)
from .clarify_tool import (
clarify_tool,
check_clarify_requirements,
CLARIFY_SCHEMA,
)
# File tools have no external requirements - they use the terminal backend
def check_file_requirements():
"""File tools only require terminal backend to be available."""
@@ -239,5 +246,9 @@ __all__ = [
'check_todo_requirements',
'TODO_SCHEMA',
'TodoStore',
# Clarifying questions tool
'clarify_tool',
'check_clarify_requirements',
'CLARIFY_SCHEMA',
]

125
tools/clarify_tool.py Normal file
View File

@@ -0,0 +1,125 @@
#!/usr/bin/env python3
"""
Clarify Tool Module - Interactive Clarifying Questions
Allows the agent to present structured multiple-choice questions or open-ended
prompts to the user. In CLI mode, choices are navigable with arrow keys. On
messaging platforms, choices are rendered as a numbered list.
The actual user-interaction logic lives in the platform layer (cli.py for CLI,
gateway/run.py for messaging). This module defines the schema, validation, and
a thin dispatcher that delegates to a platform-provided callback.
"""
import json
from typing import Dict, Any, List, Optional, Callable
# Maximum number of predefined choices the agent can offer.
# A 5th "Other (type your answer)" option is always appended by the UI.
MAX_CHOICES = 4
def clarify_tool(
question: str,
choices: Optional[List[str]] = None,
callback: Optional[Callable] = None,
) -> str:
"""
Ask the user a question, optionally with multiple-choice options.
Args:
question: The question text to present.
choices: Up to 4 predefined answer choices. When omitted the
question is purely open-ended.
callback: Platform-provided function that handles the actual UI
interaction. Signature: callback(question, choices) -> str.
Injected by the agent runner (cli.py / gateway).
Returns:
JSON string with the user's response.
"""
if not question or not question.strip():
return json.dumps({"error": "Question text is required."}, ensure_ascii=False)
question = question.strip()
# Validate and trim choices
if choices is not None:
if not isinstance(choices, list):
return json.dumps({"error": "choices must be a list of strings."}, ensure_ascii=False)
choices = [str(c).strip() for c in choices if str(c).strip()]
if len(choices) > MAX_CHOICES:
choices = choices[:MAX_CHOICES]
if not choices:
choices = None # empty list → open-ended
if callback is None:
return json.dumps(
{"error": "Clarify tool is not available in this execution context."},
ensure_ascii=False,
)
try:
user_response = callback(question, choices)
except Exception as exc:
return json.dumps(
{"error": f"Failed to get user input: {exc}"},
ensure_ascii=False,
)
return json.dumps({
"question": question,
"choices_offered": choices,
"user_response": str(user_response).strip(),
}, ensure_ascii=False)
def check_clarify_requirements() -> bool:
"""Clarify tool has no external requirements -- always available."""
return True
# =============================================================================
# OpenAI Function-Calling Schema
# =============================================================================
CLARIFY_SCHEMA = {
"name": "clarify",
"description": (
"Ask the user a question when you need clarification, feedback, or a "
"decision before proceeding. Supports two modes:\n\n"
"1. **Multiple choice** — provide up to 4 choices. The user picks one "
"or types their own answer via a 5th 'Other' option.\n"
"2. **Open-ended** — omit choices entirely. The user types a free-form "
"response.\n\n"
"Use this tool when:\n"
"- The task is ambiguous and you need the user to choose an approach\n"
"- You want post-task feedback ('How did that work out?')\n"
"- You want to offer to save a skill or update memory\n"
"- A decision has meaningful trade-offs the user should weigh in on\n\n"
"Do NOT use this tool for simple yes/no confirmation of dangerous "
"commands (the terminal tool handles that). Prefer making a reasonable "
"default choice yourself when the decision is low-stakes."
),
"parameters": {
"type": "object",
"properties": {
"question": {
"type": "string",
"description": "The question to present to the user.",
},
"choices": {
"type": "array",
"items": {"type": "string"},
"maxItems": MAX_CHOICES,
"description": (
"Up to 4 answer choices. Omit this parameter entirely to "
"ask an open-ended question. When provided, the UI "
"automatically appends an 'Other (type your answer)' option."
),
},
},
"required": ["question"],
},
}

View File

@@ -132,6 +132,12 @@ TOOLSETS = {
"includes": []
},
"clarify": {
"description": "Ask the user clarifying questions (multiple-choice or open-ended)",
"tools": ["clarify"],
"includes": []
},
# Scenario-specific toolsets
@@ -181,6 +187,8 @@ TOOLSETS = {
"memory",
# Session history search
"session_search",
# Clarifying questions
"clarify",
# Cronjob management (CLI-only)
"schedule_cronjob", "list_cronjobs", "remove_cronjob"
],