diff --git a/README.md b/README.md new file mode 100644 index 000000000..627a792ee --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +## Setup +``` +pip install -r requirements.txt +git clone git@github.com:NousResearch/hecate.git +cd hecate +pip install -e . +``` + +## Run +``` +python run_agent.py \ + --query "search up the latest docs on jit in python 3.13 and write me basic example that's not in their docs. profile its perf" \ + --max_turns 20 \ + --model claude-sonnet-4-20250514 \ + --base_url https://api.anthropic.com/v1/ \ + --api_key $ANTHROPIC_API_KEY +``` diff --git a/model_tools.py b/model_tools.py index 6e4d828d6..6a6c59f67 100644 --- a/model_tools.py +++ b/model_tools.py @@ -24,6 +24,7 @@ from typing import Dict, Any, List # Import toolsets from web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_tavily_api_key +from terminal_tool import terminal_tool, check_hecate_requirements, TERMINAL_TOOL_DESCRIPTION def get_web_tool_definitions() -> List[Dict[str, Any]]: """ @@ -36,7 +37,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]: { "type": "function", "function": { - "name": "web_search_tool", + "name": "web_search", "description": "Search the web for information on any topic. Returns relevant results with titles, URLs, content snippets, and answers. Uses advanced search depth for comprehensive results.", "parameters": { "type": "object", @@ -60,7 +61,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]: { "type": "function", "function": { - "name": "web_extract_tool", + "name": "web_extract", "description": "Extract and read the full content from specific web page URLs. Useful for getting detailed information from webpages found through search.", "parameters": { "type": "object", @@ -84,7 +85,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]: { "type": "function", "function": { - "name": "web_crawl_tool", + "name": "web_crawl", "description": "Crawl a website with specific instructions to find and extract targeted content. Uses AI to intelligently navigate and extract relevant information from across the site.", "parameters": { "type": "object", @@ -110,6 +111,53 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]: } ] +def get_terminal_tool_definitions() -> List[Dict[str, Any]]: + """ + Get tool definitions for terminal tools in OpenAI's expected format. + + Returns: + List[Dict]: List of terminal tool definitions compatible with OpenAI API + """ + return [ + { + "type": "function", + "function": { + "name": "terminal", + "description": TERMINAL_TOOL_DESCRIPTION, + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The command to execute on the VM" + }, + "input_keys": { + "type": "string", + "description": "Keystrokes to send to the most recent interactive session (e.g., 'hello\\n' for typing hello + Enter). If no active session exists, this will be ignored." + }, + "background": { + "type": "boolean", + "description": "Whether to run the command in the background (default: false)", + "default": False + }, + "idle_threshold": { + "type": "number", + "description": "Seconds to wait for output before considering session idle (default: 5.0)", + "default": 5.0, + "minimum": 0.1 + }, + "timeout": { + "type": "integer", + "description": "Command timeout in seconds (optional)", + "minimum": 1 + } + }, + "required": [] + } + } + } + ] + def get_tool_definitions() -> List[Dict[str, Any]]: """ Get all available tool definitions for model API calls. @@ -125,6 +173,9 @@ def get_tool_definitions() -> List[Dict[str, Any]]: # Add web tools tools.extend(get_web_tool_definitions()) + # Add terminal tools + tools.extend(get_terminal_tool_definitions()) + # Future toolsets can be added here: # tools.extend(get_file_tool_definitions()) # tools.extend(get_code_tool_definitions()) @@ -143,21 +194,21 @@ def handle_web_function_call(function_name: str, function_args: Dict[str, Any]) Returns: str: Function result as JSON string """ - if function_name == "web_search_tool": + if function_name == "web_search": query = function_args.get("query", "") limit = function_args.get("limit", 5) # Ensure limit is within bounds limit = max(1, min(10, limit)) return web_search_tool(query, limit) - elif function_name == "web_extract_tool": + elif function_name == "web_extract": urls = function_args.get("urls", []) # Limit URLs to prevent abuse urls = urls[:5] if isinstance(urls, list) else [] format = function_args.get("format") return web_extract_tool(urls, format) - elif function_name == "web_crawl_tool": + elif function_name == "web_crawl": url = function_args.get("url", "") instructions = function_args.get("instructions") depth = function_args.get("depth", "basic") @@ -166,6 +217,29 @@ def handle_web_function_call(function_name: str, function_args: Dict[str, Any]) else: return json.dumps({"error": f"Unknown web function: {function_name}"}) +def handle_terminal_function_call(function_name: str, function_args: Dict[str, Any]) -> str: + """ + Handle function calls for terminal tools. + + Args: + function_name (str): Name of the terminal function to call + function_args (Dict): Arguments for the function + + Returns: + str: Function result as JSON string + """ + if function_name == "terminal": + command = function_args.get("command") + input_keys = function_args.get("input_keys") + background = function_args.get("background", False) + idle_threshold = function_args.get("idle_threshold", 5.0) + timeout = function_args.get("timeout") + # Session management is handled internally - don't pass session_id from model + return terminal_tool(command, input_keys, None, background, idle_threshold, timeout) + + else: + return json.dumps({"error": f"Unknown terminal function: {function_name}"}) + def handle_function_call(function_name: str, function_args: Dict[str, Any]) -> str: """ Main function call dispatcher that routes calls to appropriate toolsets. @@ -186,9 +260,13 @@ def handle_function_call(function_name: str, function_args: Dict[str, Any]) -> s """ try: # Route web tools - if function_name in ["web_search_tool", "web_extract_tool", "web_crawl_tool"]: + if function_name in ["web_search", "web_extract", "web_crawl"]: return handle_web_function_call(function_name, function_args) + # Route terminal tools + elif function_name in ["terminal"]: + return handle_terminal_function_call(function_name, function_args) + # Future toolsets can be routed here: # elif function_name in ["file_read_tool", "file_write_tool"]: # return handle_file_function_call(function_name, function_args) @@ -218,6 +296,12 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]: "tools": ["web_search_tool", "web_extract_tool", "web_crawl_tool"], "description": "Web search, content extraction, and website crawling tools", "requirements": ["TAVILY_API_KEY environment variable"] + }, + "terminal_tools": { + "available": check_hecate_requirements(), + "tools": ["terminal_tool"], + "description": "Execute commands with optional interactive session support on Linux VMs", + "requirements": ["MORPH_API_KEY environment variable", "hecate package"] } # Future toolsets can be added here } @@ -232,7 +316,8 @@ def check_toolset_requirements() -> Dict[str, bool]: Dict: Status of each toolset's requirements """ return { - "web_tools": check_tavily_api_key() + "web_tools": check_tavily_api_key(), + "terminal_tools": check_hecate_requirements() } if __name__ == "__main__": diff --git a/run_agent.py b/run_agent.py index 729682513..f289e2ac6 100644 --- a/run_agent.py +++ b/run_agent.py @@ -25,6 +25,7 @@ import os import time from typing import List, Dict, Any, Optional from openai import OpenAI +import fire # Import our tool system from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements @@ -67,7 +68,7 @@ class AIAgent: if api_key: client_kwargs["api_key"] = api_key else: - client_kwargs["api_key"] = os.getenv("OPENAI_API_KEY", "dummy-key") + client_kwargs["api_key"] = os.getenv("ANTHROPIC_API_KEY", "dummy-key") try: self.client = OpenAI(**client_kwargs) @@ -276,28 +277,46 @@ class AIAgent: return result["final_response"] -def main(): +def main( + query: str = None, + model: str = "claude-opus-4-20250514", + api_key: str = None, + base_url: str = "https://api.anthropic.com/v1/", + max_turns: int = 10 +): """ Main function for running the agent directly. + + Args: + query (str): Natural language query for the agent. Defaults to Python 3.13 example. + model (str): Model name to use. Defaults to claude-opus-4-20250514. + api_key (str): API key for authentication. Uses ANTHROPIC_API_KEY env var if not provided. + base_url (str): Base URL for the model API. Defaults to https://api.anthropic.com/v1/ + max_turns (int): Maximum number of API call iterations. Defaults to 10. """ print("šŸ¤– AI Agent with Tool Calling") print("=" * 50) - # Initialize agent with local SGLang server (modify as needed) + # Initialize agent with provided parameters try: agent = AIAgent( - base_url="https://api.anthropic.com/v1/", - model="claude-opus-4-20250514" + base_url=base_url, + model=model, + api_key=api_key, + max_iterations=max_turns ) except RuntimeError as e: print(f"āŒ Failed to initialize agent: {e}") return - # Example conversation - user_query = ( - "Tell me about the latest developments in Python 3.12 and what new features " - "developers should know about. Please search for current information." - ) + # Use provided query or default to Python 3.13 example + if query is None: + user_query = ( + "Tell me about the latest developments in Python 3.13 and what new features " + "developers should know about. Please search for current information and try it out." + ) + else: + user_query = query print(f"\nšŸ“ User Query: {user_query}") print("\n" + "=" * 50) @@ -321,4 +340,4 @@ def main(): if __name__ == "__main__": - main() + fire.Fire(main) diff --git a/terminal_tool.py b/terminal_tool.py index e69de29bb..e01d7a617 100644 --- a/terminal_tool.py +++ b/terminal_tool.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 +""" +Terminal Tool Module + +This module provides a single terminal tool using Hecate's VM infrastructure. +It wraps Hecate's functionality to provide a simple interface for executing commands +on Morph VMs with automatic lifecycle management. + +Available tool: +- terminal_tool: Execute commands with optional interactive session support + +Usage: + from terminal_tool import terminal_tool + + # Execute a single command + result = terminal_tool("ls -la") + + # Execute in an interactive session + result = terminal_tool("python", input_keys="print('hello')\\nexit()\\n") +""" + +import json +import os +from typing import Optional, Dict, Any +from hecate import run_tool_with_lifecycle_management +from morphcloud._llm import ToolCall + +# Detailed description for the terminal tool based on Hermes Terminal system prompt +TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure, persistent Linux VM environment with full interactive application support. + +**Environment:** +- Minimal Debian-based OS with internet access +- Automatic VM lifecycle management (creates on-demand, reuses, cleans up) +- **Full state persistence across tool calls**: current directory (pwd), environment variables, activated virtual environments (conda/venv), running processes, and command history all persist between consecutive tool calls +- Session state managed automatically via tmux + +**Command Execution:** +- Simple commands: Just provide the 'command' parameter +- Background processes: Set 'background': True for servers/long-running tasks +- Interactive applications automatically detected and handled + +**Interactive Applications (TUIs/Pagers/Prompts):** +When commands enter interactive mode (vim, nano, less, git prompts, package managers, etc.), you'll receive screen content with "frozen" status. This is NORMAL - the session is still active and waiting for input. + +**To interact with frozen sessions:** +1. Use 'input_keys' parameter with keystrokes to send +2. System auto-detects and uses the active session +3. Session stays active until application exits + +**Special Key Syntax for input_keys:** +- ``: Escape key +- ``: Enter/Return +- ``, ``, ``: Control combinations +- ``, ``, ``, ``: Arrow keys +- ``, ``: Tab and Backspace +- `` through ``: Function keys +- ``: Shift+Tab +- Uppercase letters for Shift+letter (e.g., 'V' for Shift+V) +- Symbols for Shift+number (e.g., '!' for Shift+1, ':' for Shift+;) + +**Examples:** +- Start vim: `{"command": "vim file.txt"}` +- Type in vim: `{"input_keys": "iHello World"}` +- Save and quit: `{"input_keys": ":wq"}` +- Navigate in less: `{"input_keys": "j"}` +- Quit less: `{"input_keys": "q"}` + +**Best Practices:** +- Run servers/long processes in background with separate tool calls +- Chain multiple foreground commands in single call if needed +- Monitor disk usage for large tasks, clean up to free space +- Test components incrementally with mock inputs +- Install whatever tools needed - full system access provided""" + +def terminal_tool( + command: Optional[str] = None, + input_keys: Optional[str] = None, + session_id: Optional[str] = None, + background: bool = False, + idle_threshold: float = 5.0, + timeout: Optional[int] = None +) -> str: + """ + Execute a command on a Morph VM with optional interactive session support. + + This tool uses Hecate's VM lifecycle management to automatically create + and manage VMs. VMs are reused within the configured lifetime window + and automatically cleaned up after inactivity. + + Args: + command: The command to execute (optional if continuing existing session) + input_keys: Keystrokes to send to interactive session (e.g., "hello\\n") + session_id: ID of existing session to continue (optional) + background: Whether to run the command in the background (default: False) + idle_threshold: Seconds to wait for output before considering session idle (default: 5.0) + timeout: Command timeout in seconds (optional) + + Returns: + str: JSON string containing command output, session info, exit code, and any errors + + Examples: + # Execute a simple command + >>> result = terminal_tool(command="ls -la /tmp") + + # Start an interactive Python session + >>> result = terminal_tool(command="python3") + >>> session_data = json.loads(result) + >>> session_id = session_data["session_id"] + + # Send input to the session + >>> result = terminal_tool(input_keys="print('Hello')\\n", session_id=session_id) + + # Run a background task + >>> result = terminal_tool(command="sleep 60", background=True) + """ + try: + # Build tool input based on provided parameters + tool_input = {} + + if command: + tool_input["command"] = command + if input_keys: + tool_input["input_keys"] = input_keys + if session_id: + tool_input["session_id"] = session_id + if background: + tool_input["background"] = background + if idle_threshold != 5.0: + tool_input["idle_threshold"] = idle_threshold + if timeout is not None: + tool_input["timeout"] = timeout + + tool_call = ToolCall( + name="run_command", + input=tool_input + ) + + # Execute with lifecycle management + result = run_tool_with_lifecycle_management(tool_call) + + # Format the result with all possible fields + # Map hecate's "stdout" to "output" for compatibility + formatted_result = { + "output": result.get("stdout", result.get("output", "")), + "screen": result.get("screen", ""), + "session_id": result.get("session_id"), + "exit_code": result.get("returncode", result.get("exit_code", -1)), + "error": result.get("error"), + "status": "active" if result.get("session_id") else "ended" + } + + return json.dumps(formatted_result) + + except Exception as e: + return json.dumps({ + "output": "", + "screen": "", + "session_id": None, + "exit_code": -1, + "error": f"Failed to execute terminal command: {str(e)}", + "status": "error" + }) + +def check_hecate_requirements() -> bool: + """ + Check if all requirements for terminal tools are met. + + Returns: + bool: True if all requirements are met, False otherwise + """ + # Check for required environment variables + required_vars = ["MORPH_API_KEY"] + optional_vars = ["OPENAI_API_KEY"] # Needed for Hecate's LLM features + + missing_required = [var for var in required_vars if not os.getenv(var)] + missing_optional = [var for var in optional_vars if not os.getenv(var)] + + if missing_required: + print(f"Missing required environment variables: {', '.join(missing_required)}") + return False + + if missing_optional: + print(f"Warning: Missing optional environment variables: {', '.join(missing_optional)}") + print(" (Some Hecate features may be limited)") + + # Check if Hecate is importable + try: + import hecate + return True + except ImportError: + print("Hecate is not installed. Please install it with: pip install hecate") + return False + +# Module-level initialization check +_requirements_met = check_hecate_requirements() + +if __name__ == "__main__": + """ + Simple test/demo when run directly + """ + print("Terminal Tool Module") + print("=" * 40) + + if not _requirements_met: + print("Requirements not met. Please check the messages above.") + exit(1) + + print("All requirements met!") + print("\nAvailable Tool:") + print(" - terminal_tool: Execute commands with optional interactive session support") + + print("\nUsage Examples:") + print(" # Execute a command") + print(" result = terminal_tool(command='ls -la')") + print(" ") + print(" # Start an interactive session") + print(" result = terminal_tool(command='python3')") + print(" session_data = json.loads(result)") + print(" session_id = session_data['session_id']") + print(" ") + print(" # Send input to the session") + print(" result = terminal_tool(") + print(" input_keys='print(\"Hello\")\\\\n',") + print(" session_id=session_id") + print(" )") + print(" ") + print(" # Run a background task") + print(" result = terminal_tool(command='sleep 60', background=True)") + + print("\nEnvironment Variables:") + print(f" MORPH_API_KEY: {'Set' if os.getenv('MORPH_API_KEY') else 'Not set'}") + print(f" OPENAI_API_KEY: {'Set' if os.getenv('OPENAI_API_KEY') else 'Not set (optional)'}") + print(f" HECATE_VM_LIFETIME_SECONDS: {os.getenv('HECATE_VM_LIFETIME_SECONDS', '300')} (default: 300)") + print(f" HECATE_DEFAULT_SNAPSHOT_ID: {os.getenv('HECATE_DEFAULT_SNAPSHOT_ID', 'snapshot_p5294qxt')} (default: snapshot_p5294qxt)") \ No newline at end of file