diff --git a/model_tools.py b/model_tools.py index 08ea89f94..decc6983f 100644 --- a/model_tools.py +++ b/model_tools.py @@ -267,7 +267,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]: "type": "function", "function": { "name": "web_search", - "description": "Search the web for information on any topic. Returns up to 5 relevant results with titles and URLs. Uses advanced search depth for comprehensive results. PREFERRED over browser tools for finding information - faster and more cost-effective. Use browser tools only when you need to interact with pages (click, fill forms, handle dynamic content).", + "description": "Search the web for information on any topic. Returns up to 5 relevant results with titles, URLs, and descriptions.", "parameters": { "type": "object", "properties": { @@ -284,7 +284,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]: "type": "function", "function": { "name": "web_extract", - "description": "Extract and read the full content from specific web page URLs. Useful for getting detailed information from webpages found through search. The content returned will be excerpts and key points summarized with an LLM to reduce impact on the context window. PREFERRED over browser tools for reading page content - faster and more cost-effective. Use browser tools only when pages require interaction or have dynamic content.", + "description": "Extract content from web page URLs. Pages under 5000 chars return raw content; larger pages are LLM-summarized and capped at ~5000 chars per page. Pages over 2M chars are refused. Use browser tools only when pages require interaction or dynamic content.", "parameters": { "type": "object", "properties": { @@ -367,13 +367,13 @@ def get_vision_tool_definitions() -> List[Dict[str, Any]]: "type": "function", "function": { "name": "vision_analyze", - "description": "Analyze images using AI vision. Accepts HTTP/HTTPS URLs or local file paths (e.g. from the image cache). Provides comprehensive image description and answers specific questions about the image content. Perfect for understanding visual content, reading text in images, identifying objects, analyzing scenes, and extracting visual information.", + "description": "Analyze images using AI vision. Provides a comprehensive description and answers a specific question about the image content.", "parameters": { "type": "object", "properties": { "image_url": { "type": "string", - "description": "The URL or local file path of the image to analyze. Accepts publicly accessible HTTP/HTTPS URLs or local file paths (e.g. /home/user/.hermes/image_cache/abc123.jpg)." + "description": "Image URL (http/https) or local file path to analyze." }, "question": { "type": "string", @@ -399,7 +399,7 @@ def get_moa_tool_definitions() -> List[Dict[str, Any]]: "type": "function", "function": { "name": "mixture_of_agents", - "description": "Process extremely difficult problems requiring intense reasoning using a Mixture-of-Agents. This tool leverages multiple frontier language models to collaboratively solve complex tasks that single models struggle with. Uses a fixed 2-layer architecture: reference models generate diverse responses, then an aggregator synthesizes the best solution. Best for: complex mathematical proofs, advanced coding problems, multi-step analytical reasoning, precise and complex STEM problems, algorithm design, and problems requiring diverse domain expertise.", + "description": "Route a hard problem through multiple frontier LLMs collaboratively. Makes 5 API calls (4 reference models + 1 aggregator) with maximum reasoning effort — use sparingly for genuinely difficult problems. Best for: complex math, advanced algorithms, multi-step analytical reasoning, problems benefiting from diverse perspectives.", "parameters": { "type": "object", "properties": { @@ -729,13 +729,9 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]: "function": { "name": "read_file", "description": ( - "Read a file with pagination support. Preferred over 'cat' in the terminal because it " - "provides line numbers, handles binary/image files, and suggests similar filenames if " - "the file is not found.\n\n" - "**Output format:** Each line is returned as 'LINE_NUM|CONTENT' for easy reference.\n" - "**Binary files:** Detected automatically; images (png/jpg/gif/webp) are returned as base64 with MIME type and dimensions.\n" - "**Large files:** Use offset and limit to paginate. The response includes total line count and a hint for the next page.\n" - "**Paths:** Supports absolute paths, relative paths (from working directory), and ~ expansion." + "Read a file with line numbers and pagination. Output format: 'LINE_NUM|CONTENT'. " + "Suggests similar filenames if not found. Images (png/jpg/gif/webp) returned as base64. " + "Use offset and limit for large files." ), "parameters": { "type": "object", @@ -766,13 +762,8 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]: "function": { "name": "write_file", "description": ( - "Write content to a file, completely replacing any existing content. Creates parent " - "directories automatically if they don't exist. Preferred over 'echo' or heredoc in the " - "terminal because it safely handles special characters, newlines, and shell metacharacters " - "without escaping issues.\n\n" - "**Important:** This OVERWRITES the entire file. To make targeted edits to an existing file, " - "use the 'patch' tool instead.\n" - "**Paths:** Supports absolute paths, relative paths, and ~ expansion." + "Write content to a file, completely replacing existing content. Creates parent " + "directories automatically. OVERWRITES the entire file — use 'patch' for targeted edits." ), "parameters": { "type": "object", @@ -795,17 +786,11 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]: "function": { "name": "patch", "description": ( - "Modify existing files using targeted edits. Preferred over 'sed' or manual rewriting because " - "it uses intelligent fuzzy matching that tolerates minor whitespace and indentation differences, " - "and auto-runs syntax checks (Python, JS, TS, Go, Rust) after editing.\n\n" - "**Replace mode (recommended):** Find a unique string in the file and replace it. Uses a " - "9-strategy fuzzy matching chain (exact → line-trimmed → whitespace-normalized → " - "indentation-flexible → context-aware) so small formatting differences won't cause failures. " - "Returns a unified diff showing exactly what changed.\n\n" - "**Patch mode:** Apply multi-file changes using V4A patch format for large-scale edits across " - "multiple files in one call.\n\n" - "**Auto-lint:** After every edit, automatically runs syntax checks and reports errors so you " - "can fix them immediately." + "Targeted find-and-replace edits in files. Uses fuzzy matching (9 strategies) so " + "minor whitespace/indentation differences won't break it. Returns a unified diff. " + "Auto-runs syntax checks after editing.\n\n" + "Replace mode (default): find a unique string and replace it.\n" + "Patch mode: apply V4A multi-file patches for bulk changes." ), "parameters": { "type": "object", @@ -847,29 +832,25 @@ def get_file_tool_definitions() -> List[Dict[str, Any]]: "function": { "name": "search_files", "description": ( - "The primary tool for searching code and files. Always use this instead of " - "running grep, rg, find, fd, or ack in the terminal — it's faster (ripgrep-backed), " - "returns structured results with line numbers, and handles pagination automatically.\n\n" - "Use for: finding function/class definitions, searching for error strings, locating " - "config files, finding all files of a type, checking where a variable is used.\n\n" - "**Content search (target='content'):** Regex search inside files with optional " - "file type filtering and context lines. Output modes: full matches with line numbers, " - "file paths only, or match counts per file.\n\n" - "**File search (target='files'):** Find files by glob pattern (e.g., '*.py', '*config*'). " - "Results sorted by modification time so recently changed files appear first." + "Search file contents or find files by name. Ripgrep-backed, faster than " + "grep/rg/find in the terminal.\n\n" + "Content search (target='content'): Regex search inside files. Output modes: " + "full matches with line numbers, file paths only, or match counts.\n\n" + "File search (target='files'): Find files by glob pattern (e.g., '*.py', '*config*'). " + "Results sorted by modification time." ), "parameters": { "type": "object", "properties": { "pattern": { "type": "string", - "description": "Regex pattern for grep mode, or glob pattern (e.g., '*.py', '*config*') for find mode" + "description": "Regex pattern for content search, or glob pattern (e.g., '*.py') for file search" }, "target": { "type": "string", - "enum": ["grep", "find"], - "description": "'grep' searches inside file contents, 'find' searches for files by name", - "default": "grep" + "enum": ["content", "files"], + "description": "'content' searches inside file contents, 'files' searches for files by name", + "default": "content" }, "path": { "type": "string", @@ -1038,9 +1019,8 @@ def get_process_tool_definitions() -> List[Dict[str, Any]]: "Manage background processes started with terminal(background=true). " "Actions: 'list' (show all), 'poll' (check status + new output), " "'log' (full output with pagination), 'wait' (block until done or timeout), " - "'kill' (terminate), 'write' (send raw data to stdin), 'submit' (send data + Enter). " - "Use 'wait' when you have nothing else to do and want " - "to block until a background process finishes." + "'kill' (terminate), 'write' (send raw stdin data without newline), " + "'submit' (send data + Enter, for answering prompts)." ), "parameters": { "type": "object", @@ -1052,7 +1032,7 @@ def get_process_tool_definitions() -> List[Dict[str, Any]]: }, "session_id": { "type": "string", - "description": "Process session ID (from terminal background output). Required for poll/log/wait/kill." + "description": "Process session ID (from terminal background output). Required for all actions except 'list'." }, "data": { "type": "string", @@ -1999,9 +1979,9 @@ def handle_file_function_call( ) elif function_name == "search_files": - # Map user-facing target values to internal ones + # Accept both old enum values (grep/find) and new ones (content/files) target_map = {"grep": "content", "find": "files"} - raw_target = function_args.get("target", "grep") + raw_target = function_args.get("target", "content") target = target_map.get(raw_target, raw_target) return search_tool( pattern=function_args.get("pattern", ""), diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 63a8d6b83..88afc8797 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -281,7 +281,7 @@ atexit.register(_stop_browser_cleanup_thread) BROWSER_TOOL_SCHEMAS = [ { "name": "browser_navigate", - "description": "Navigate to a URL in the browser. Opens the page and waits for it to load. Returns the final URL and page title. IMPORTANT: This should be the FIRST browser tool called - it initializes the browser session and loads the target page. Other browser tools require a page to be loaded first. NOTE: For simple information retrieval, prefer using web_search or web_extract first as they are faster and more cost-effective. Use browser tools when you need to interact with a page (click buttons, fill forms, handle dynamic content).", + "description": "Navigate to a URL in the browser. Initializes the session and loads the page. Must be called before other browser tools. For simple information retrieval, prefer web_search or web_extract (faster, cheaper). Use browser tools when you need to interact with a page (click, fill forms, dynamic content).", "parameters": { "type": "object", "properties": { @@ -295,7 +295,7 @@ BROWSER_TOOL_SCHEMAS = [ }, { "name": "browser_snapshot", - "description": "Get a text-based snapshot of the current page's accessibility tree. Returns interactive elements with ref IDs (like @e1, @e2) that can be used with browser_click and browser_type. Use full=true to get the complete page content including all text; use full=false (default) for a compact view focused on interactive elements. Requires browser_navigate to be called first.", + "description": "Get a text-based snapshot of the current page's accessibility tree. Returns interactive elements with ref IDs (like @e1, @e2) for browser_click and browser_type. full=false (default): compact view with interactive elements. full=true: complete page content. Snapshots over 8000 chars are truncated or LLM-summarized. Requires browser_navigate first.", "parameters": { "type": "object", "properties": { diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index ca51b2ecf..87e9134a0 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -553,12 +553,14 @@ EXECUTE_CODE_SCHEMA = { " Lines are 1-indexed. Returns {\"content\": \"...\", \"total_lines\": N}\n" " write_file(path: str, content: str) -> dict\n" " Always overwrites the entire file.\n" - " search_files(pattern: str, target=\"grep\", path=\".\", file_glob=None, limit=50) -> dict\n" - " target: \"grep\" (search inside files) or \"find\" (find files by name). Returns {\"matches\": [...]}\n" + " search_files(pattern: str, target=\"content\", path=\".\", file_glob=None, limit=50) -> dict\n" + " target: \"content\" (search inside files) or \"files\" (find files by name). Returns {\"matches\": [...]}\n" " patch(path: str, old_string: str, new_string: str, replace_all: bool = False) -> dict\n" " Replaces old_string with new_string in the file.\n" " terminal(command: str, timeout=None, workdir=None) -> dict\n" " Foreground only (no background/pty). Returns {\"output\": \"...\", \"exit_code\": N}\n\n" + "Limits: 5-minute timeout, 50KB stdout cap, max 50 tool calls per script. " + "terminal() is foreground-only (no background or pty).\n\n" "Print your final result to stdout. Use Python stdlib (json, re, math, csv, " "datetime, collections, etc.) for processing between tool calls." ), diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index e56ad5da1..392bf89e5 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -331,7 +331,7 @@ DELEGATE_TASK_SCHEMA = { "Each subagent gets its own conversation, terminal session, and toolset. " "Only the final summary is returned -- intermediate tool results " "never enter your context window.\n\n" - "TWO MODES:\n" + "TWO MODES (one of 'goal' or 'tasks' is required):\n" "1. Single task: provide 'goal' (+ optional context, toolsets)\n" "2. Batch (parallel): provide 'tasks' array with up to 3 items. " "All run concurrently and results are returned together.\n\n" diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index a28bfcc23..314457264 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1194,43 +1194,13 @@ class _ModalEnvironment: # Tool description for LLM -TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux environment. +TERMINAL_TOOL_DESCRIPTION = """Execute commands on a Linux environment. Filesystem persists between calls. -**Environment:** -- Isolated execution environment (local, Docker, or Modal cloud based on configuration) -- Filesystem persists between tool calls within the same task -- Internet access available +Background processes: Set background=true to get a session_id, then use the 'process' tool to poll/wait/kill/write. +Working directory: Use 'workdir' for per-command cwd. +PTY mode: Set pty=true for interactive CLI tools (Codex, Claude Code, Python REPL). -**Command Execution:** -- Simple commands: Just provide the 'command' parameter -- Background processes: Set 'background': true to get a session_id for monitoring via the 'process' tool -- Command timeout: Optional 'timeout' parameter in seconds -- Working directory: Optional 'workdir' parameter for per-command cwd -- PTY mode: Set 'pty': true for interactive CLI tools (Codex, Claude Code, etc.) - -**Examples:** -- Run command: `{"command": "ls -la"}` -- Background task: `{"command": "pytest -v tests/", "background": true}` -- returns session_id, use process tool to poll/wait/kill -- With workdir: `{"command": "npm install", "workdir": "/home/user/project"}` -- With timeout: `{"command": "long_task.sh", "timeout": 300}` -- Interactive CLI: `{"command": "codex exec 'Add tests'", "background": true, "pty": true}` - -**Background Process Workflow:** -1. Start: `terminal(command="...", background=true)` -- returns session_id -2. Monitor: `process(action="poll", session_id="...")` -- check status + new output -3. Wait: `process(action="wait", session_id="...", timeout=600)` -- block until done -4. Interact: `process(action="write/submit", session_id="...", data="y")` -- send stdin -5. Kill: `process(action="kill", session_id="...")` -- terminate - -**Best Practices:** -- Use background mode for long-running tasks, then process(wait) to block until completion -- Use workdir to run commands in specific project directories -- Install whatever tools you need with apt-get or pip -- Try to create or use a venv with uv or python -m venv to keep isolation from global system packages - -**Things to avoid:** -- Do NOT use interactive tools (vim, nano, python repl) without pty=true -- they will hang without a pseudo-terminal. -- Even git sometimes becomes interactive if the output is large. If you're not sure, pipe to cat. +Do NOT use vim/nano/interactive tools without pty=true — they hang without a pseudo-terminal. Pipe git output to cat if it might page. """ # Global state for environment lifecycle management