Merge pull request #1 from NousResearch/terminal

Terminal tool
2025-07-26 07:13:34 -07:00
parent 21d80ca683 10b4cfeace
commit 1dacd941f6
4 changed files with 374 additions and 19 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,17 @@
+## Setup
+```
+pip install -r requirements.txt
+git clone git@github.com:NousResearch/hecate.git
+cd hecate
+pip install -e .
+```
+
+## Run
+```
+python run_agent.py \
+  --query "search up the latest docs on jit in python 3.13 and write me basic example that's not in their docs. profile its perf" \
+  --max_turns 20 \
+  --model claude-sonnet-4-20250514 \
+  --base_url https://api.anthropic.com/v1/ \
+  --api_key $ANTHROPIC_API_KEY
+```
--- a/model_tools.py
+++ b/model_tools.py
@@ -24,6 +24,7 @@ from typing import Dict, Any, List

 # Import toolsets
 from web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_tavily_api_key
+from terminal_tool import terminal_tool, check_hecate_requirements, TERMINAL_TOOL_DESCRIPTION

 def get_web_tool_definitions() -> List[Dict[str, Any]]:
    """
@@ -36,7 +37,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
        {
            "type": "function",
            "function": {
-                "name": "web_search_tool",
+                "name": "web_search",
                "description": "Search the web for information on any topic. Returns relevant results with titles, URLs, content snippets, and answers. Uses advanced search depth for comprehensive results.",
                "parameters": {
                    "type": "object",
@@ -60,7 +61,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
        {
            "type": "function",
            "function": {
-                "name": "web_extract_tool",
+                "name": "web_extract",
                "description": "Extract and read the full content from specific web page URLs. Useful for getting detailed information from webpages found through search.",
                "parameters": {
                    "type": "object",
@@ -84,7 +85,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
        {
            "type": "function",
            "function": {
-                "name": "web_crawl_tool",
+                "name": "web_crawl",
                "description": "Crawl a website with specific instructions to find and extract targeted content. Uses AI to intelligently navigate and extract relevant information from across the site.",
                "parameters": {
                    "type": "object",
@@ -110,6 +111,53 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
        }
    ]

+def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
+    """
+    Get tool definitions for terminal tools in OpenAI's expected format.
+    
+    Returns:
+        List[Dict]: List of terminal tool definitions compatible with OpenAI API
+    """
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "terminal",
+                "description": TERMINAL_TOOL_DESCRIPTION,
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "command": {
+                            "type": "string",
+                            "description": "The command to execute on the VM"
+                        },
+                        "input_keys": {
+                            "type": "string",
+                            "description": "Keystrokes to send to the most recent interactive session (e.g., 'hello\\n' for typing hello + Enter). If no active session exists, this will be ignored."
+                        },
+                        "background": {
+                            "type": "boolean",
+                            "description": "Whether to run the command in the background (default: false)",
+                            "default": False
+                        },
+                        "idle_threshold": {
+                            "type": "number",
+                            "description": "Seconds to wait for output before considering session idle (default: 5.0)",
+                            "default": 5.0,
+                            "minimum": 0.1
+                        },
+                        "timeout": {
+                            "type": "integer",
+                            "description": "Command timeout in seconds (optional)",
+                            "minimum": 1
+                        }
+                    },
+                    "required": []
+                }
+            }
+        }
+    ]
+
 def get_tool_definitions() -> List[Dict[str, Any]]:
    """
    Get all available tool definitions for model API calls.
@@ -125,6 +173,9 @@ def get_tool_definitions() -> List[Dict[str, Any]]:
    # Add web tools
    tools.extend(get_web_tool_definitions())
    
+    # Add terminal tools
+    tools.extend(get_terminal_tool_definitions())
+    
    # Future toolsets can be added here:
    # tools.extend(get_file_tool_definitions())
    # tools.extend(get_code_tool_definitions())
@@ -143,21 +194,21 @@ def handle_web_function_call(function_name: str, function_args: Dict[str, Any])
    Returns:
        str: Function result as JSON string
    """
-    if function_name == "web_search_tool":
+    if function_name == "web_search":
        query = function_args.get("query", "")
        limit = function_args.get("limit", 5)
        # Ensure limit is within bounds
        limit = max(1, min(10, limit))
        return web_search_tool(query, limit)
    
-    elif function_name == "web_extract_tool":
+    elif function_name == "web_extract":
        urls = function_args.get("urls", [])
        # Limit URLs to prevent abuse
        urls = urls[:5] if isinstance(urls, list) else []
        format = function_args.get("format")
        return web_extract_tool(urls, format)
    
-    elif function_name == "web_crawl_tool":
+    elif function_name == "web_crawl":
        url = function_args.get("url", "")
        instructions = function_args.get("instructions")
        depth = function_args.get("depth", "basic")
@@ -166,6 +217,29 @@ def handle_web_function_call(function_name: str, function_args: Dict[str, Any])
    else:
        return json.dumps({"error": f"Unknown web function: {function_name}"})

+def handle_terminal_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
+    """
+    Handle function calls for terminal tools.
+    
+    Args:
+        function_name (str): Name of the terminal function to call
+        function_args (Dict): Arguments for the function
+    
+    Returns:
+        str: Function result as JSON string
+    """
+    if function_name == "terminal":
+        command = function_args.get("command")
+        input_keys = function_args.get("input_keys")
+        background = function_args.get("background", False)
+        idle_threshold = function_args.get("idle_threshold", 5.0)
+        timeout = function_args.get("timeout")
+        # Session management is handled internally - don't pass session_id from model
+        return terminal_tool(command, input_keys, None, background, idle_threshold, timeout)
+    
+    else:
+        return json.dumps({"error": f"Unknown terminal function: {function_name}"})
+
 def handle_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
    """
    Main function call dispatcher that routes calls to appropriate toolsets.
@@ -186,9 +260,13 @@ def handle_function_call(function_name: str, function_args: Dict[str, Any]) -> s
    """
    try:
        # Route web tools
-        if function_name in ["web_search_tool", "web_extract_tool", "web_crawl_tool"]:
+        if function_name in ["web_search", "web_extract", "web_crawl"]:
            return handle_web_function_call(function_name, function_args)
        
+        # Route terminal tools
+        elif function_name in ["terminal"]:
+            return handle_terminal_function_call(function_name, function_args)
+        
        # Future toolsets can be routed here:
        # elif function_name in ["file_read_tool", "file_write_tool"]:
        #     return handle_file_function_call(function_name, function_args)
@@ -218,6 +296,12 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
            "tools": ["web_search_tool", "web_extract_tool", "web_crawl_tool"],
            "description": "Web search, content extraction, and website crawling tools",
            "requirements": ["TAVILY_API_KEY environment variable"]
+        },
+        "terminal_tools": {
+            "available": check_hecate_requirements(),
+            "tools": ["terminal_tool"],
+            "description": "Execute commands with optional interactive session support on Linux VMs",
+            "requirements": ["MORPH_API_KEY environment variable", "hecate package"]
        }
        # Future toolsets can be added here
    }
@@ -232,7 +316,8 @@ def check_toolset_requirements() -> Dict[str, bool]:
        Dict: Status of each toolset's requirements
    """
    return {
-        "web_tools": check_tavily_api_key()
+        "web_tools": check_tavily_api_key(),
+        "terminal_tools": check_hecate_requirements()
    }

 if __name__ == "__main__":
--- a/run_agent.py
+++ b/run_agent.py
@@ -25,6 +25,7 @@ import os
 import time
 from typing import List, Dict, Any, Optional
 from openai import OpenAI
+import fire

 # Import our tool system
 from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
@@ -67,7 +68,7 @@ class AIAgent:
        if api_key:
            client_kwargs["api_key"] = api_key
        else:
-            client_kwargs["api_key"] = os.getenv("OPENAI_API_KEY", "dummy-key")
+            client_kwargs["api_key"] = os.getenv("ANTHROPIC_API_KEY", "dummy-key")
        
        try:
            self.client = OpenAI(**client_kwargs)
@@ -276,28 +277,46 @@ class AIAgent:
        return result["final_response"]


-def main():
+def main(
+    query: str = None,
+    model: str = "claude-opus-4-20250514", 
+    api_key: str = None,
+    base_url: str = "https://api.anthropic.com/v1/",
+    max_turns: int = 10
+):
    """
    Main function for running the agent directly.
+    
+    Args:
+        query (str): Natural language query for the agent. Defaults to Python 3.13 example.
+        model (str): Model name to use. Defaults to claude-opus-4-20250514.
+        api_key (str): API key for authentication. Uses ANTHROPIC_API_KEY env var if not provided.
+        base_url (str): Base URL for the model API. Defaults to https://api.anthropic.com/v1/
+        max_turns (int): Maximum number of API call iterations. Defaults to 10.
    """
    print("🤖 AI Agent with Tool Calling")
    print("=" * 50)
    
-    # Initialize agent with local SGLang server (modify as needed)
+    # Initialize agent with provided parameters
    try:
        agent = AIAgent(
-            base_url="https://api.anthropic.com/v1/",
-            model="claude-opus-4-20250514"
+            base_url=base_url,
+            model=model,
+            api_key=api_key,
+            max_iterations=max_turns
        )
    except RuntimeError as e:
        print(f"❌ Failed to initialize agent: {e}")
        return
    
-    # Example conversation
-    user_query = (
-        "Tell me about the latest developments in Python 3.12 and what new features "
-        "developers should know about. Please search for current information."
-    )
+    # Use provided query or default to Python 3.13 example
+    if query is None:
+        user_query = (
+            "Tell me about the latest developments in Python 3.13 and what new features "
+            "developers should know about. Please search for current information and try it out."
+        )
+    else:
+        user_query = query
    
    print(f"\n📝 User Query: {user_query}")
    print("\n" + "=" * 50)
@@ -321,4 +340,4 @@ def main():


 if __name__ == "__main__":
-    main()
+    fire.Fire(main)
--- a/terminal_tool.py
+++ b/terminal_tool.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+"""
+Terminal Tool Module
+
+This module provides a single terminal tool using Hecate's VM infrastructure.
+It wraps Hecate's functionality to provide a simple interface for executing commands
+on Morph VMs with automatic lifecycle management.
+
+Available tool:
+- terminal_tool: Execute commands with optional interactive session support
+
+Usage:
+    from terminal_tool import terminal_tool
+    
+    # Execute a single command
+    result = terminal_tool("ls -la")
+    
+    # Execute in an interactive session
+    result = terminal_tool("python", input_keys="print('hello')\\nexit()\\n")
+"""
+
+import json
+import os
+from typing import Optional, Dict, Any
+from hecate import run_tool_with_lifecycle_management
+from morphcloud._llm import ToolCall
+
+# Detailed description for the terminal tool based on Hermes Terminal system prompt
+TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure, persistent Linux VM environment with full interactive application support.
+
+**Environment:** 
+- Minimal Debian-based OS with internet access
+- Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
+- **Full state persistence across tool calls**: current directory (pwd), environment variables, activated virtual environments (conda/venv), running processes, and command history all persist between consecutive tool calls
+- Session state managed automatically via tmux
+
+**Command Execution:**
+- Simple commands: Just provide the 'command' parameter
+- Background processes: Set 'background': True for servers/long-running tasks
+- Interactive applications automatically detected and handled
+
+**Interactive Applications (TUIs/Pagers/Prompts):**
+When commands enter interactive mode (vim, nano, less, git prompts, package managers, etc.), you'll receive screen content with "frozen" status. This is NORMAL - the session is still active and waiting for input.
+
+**To interact with frozen sessions:**
+1. Use 'input_keys' parameter with keystrokes to send
+2. System auto-detects and uses the active session
+3. Session stays active until application exits
+
+**Special Key Syntax for input_keys:**
+- `<ESC>`: Escape key
+- `<ENTER>`: Enter/Return  
+- `<CTRL+C>`, `<CTRL+D>`, `<CTRL+Z>`: Control combinations
+- `<UP>`, `<DOWN>`, `<LEFT>`, `<RIGHT>`: Arrow keys
+- `<TAB>`, `<BACKSPACE>`: Tab and Backspace
+- `<F1>` through `<F12>`: Function keys
+- `<SHIFT+TAB>`: Shift+Tab
+- Uppercase letters for Shift+letter (e.g., 'V' for Shift+V)
+- Symbols for Shift+number (e.g., '!' for Shift+1, ':' for Shift+;)
+
+**Examples:**
+- Start vim: `{"command": "vim file.txt"}`
+- Type in vim: `{"input_keys": "iHello World<ESC>"}`  
+- Save and quit: `{"input_keys": ":wq<ENTER>"}`
+- Navigate in less: `{"input_keys": "j"}`
+- Quit less: `{"input_keys": "q"}`
+
+**Best Practices:**
+- Run servers/long processes in background with separate tool calls
+- Chain multiple foreground commands in single call if needed
+- Monitor disk usage for large tasks, clean up to free space
+- Test components incrementally with mock inputs
+- Install whatever tools needed - full system access provided"""
+
+def terminal_tool(
+    command: Optional[str] = None,
+    input_keys: Optional[str] = None,
+    session_id: Optional[str] = None,
+    background: bool = False,
+    idle_threshold: float = 5.0,
+    timeout: Optional[int] = None
+) -> str:
+    """
+    Execute a command on a Morph VM with optional interactive session support.
+    
+    This tool uses Hecate's VM lifecycle management to automatically create
+    and manage VMs. VMs are reused within the configured lifetime window
+    and automatically cleaned up after inactivity.
+    
+    Args:
+        command: The command to execute (optional if continuing existing session)
+        input_keys: Keystrokes to send to interactive session (e.g., "hello\\n")
+        session_id: ID of existing session to continue (optional)
+        background: Whether to run the command in the background (default: False) 
+        idle_threshold: Seconds to wait for output before considering session idle (default: 5.0)
+        timeout: Command timeout in seconds (optional)
+    
+    Returns:
+        str: JSON string containing command output, session info, exit code, and any errors
+    
+    Examples:
+        # Execute a simple command
+        >>> result = terminal_tool(command="ls -la /tmp")
+        
+        # Start an interactive Python session
+        >>> result = terminal_tool(command="python3")
+        >>> session_data = json.loads(result)
+        >>> session_id = session_data["session_id"]
+        
+        # Send input to the session
+        >>> result = terminal_tool(input_keys="print('Hello')\\n", session_id=session_id)
+        
+        # Run a background task
+        >>> result = terminal_tool(command="sleep 60", background=True)
+    """
+    try:
+        # Build tool input based on provided parameters
+        tool_input = {}
+        
+        if command:
+            tool_input["command"] = command
+        if input_keys:
+            tool_input["input_keys"] = input_keys
+        if session_id:
+            tool_input["session_id"] = session_id
+        if background:
+            tool_input["background"] = background
+        if idle_threshold != 5.0:
+            tool_input["idle_threshold"] = idle_threshold
+        if timeout is not None:
+            tool_input["timeout"] = timeout
+        
+        tool_call = ToolCall(
+            name="run_command",
+            input=tool_input
+        )
+        
+        # Execute with lifecycle management
+        result = run_tool_with_lifecycle_management(tool_call)
+        
+        # Format the result with all possible fields
+        # Map hecate's "stdout" to "output" for compatibility
+        formatted_result = {
+            "output": result.get("stdout", result.get("output", "")),
+            "screen": result.get("screen", ""),
+            "session_id": result.get("session_id"),
+            "exit_code": result.get("returncode", result.get("exit_code", -1)),
+            "error": result.get("error"),
+            "status": "active" if result.get("session_id") else "ended"
+        }
+        
+        return json.dumps(formatted_result)
+        
+    except Exception as e:
+        return json.dumps({
+            "output": "",
+            "screen": "",
+            "session_id": None,
+            "exit_code": -1,
+            "error": f"Failed to execute terminal command: {str(e)}",
+            "status": "error"
+        })
+
+def check_hecate_requirements() -> bool:
+    """
+    Check if all requirements for terminal tools are met.
+    
+    Returns:
+        bool: True if all requirements are met, False otherwise
+    """
+    # Check for required environment variables
+    required_vars = ["MORPH_API_KEY"]
+    optional_vars = ["OPENAI_API_KEY"]  # Needed for Hecate's LLM features
+    
+    missing_required = [var for var in required_vars if not os.getenv(var)]
+    missing_optional = [var for var in optional_vars if not os.getenv(var)]
+    
+    if missing_required:
+        print(f"Missing required environment variables: {', '.join(missing_required)}")
+        return False
+    
+    if missing_optional:
+        print(f"Warning: Missing optional environment variables: {', '.join(missing_optional)}")
+        print("   (Some Hecate features may be limited)")
+    
+    # Check if Hecate is importable
+    try:
+        import hecate
+        return True
+    except ImportError:
+        print("Hecate is not installed. Please install it with: pip install hecate")
+        return False
+
+# Module-level initialization check
+_requirements_met = check_hecate_requirements()
+
+if __name__ == "__main__":
+    """
+    Simple test/demo when run directly
+    """
+    print("Terminal Tool Module")
+    print("=" * 40)
+    
+    if not _requirements_met:
+        print("Requirements not met. Please check the messages above.")
+        exit(1)
+    
+    print("All requirements met!")
+    print("\nAvailable Tool:")
+    print("  - terminal_tool: Execute commands with optional interactive session support")
+    
+    print("\nUsage Examples:")
+    print("  # Execute a command")
+    print("  result = terminal_tool(command='ls -la')")
+    print("  ")
+    print("  # Start an interactive session")
+    print("  result = terminal_tool(command='python3')")
+    print("  session_data = json.loads(result)")
+    print("  session_id = session_data['session_id']")
+    print("  ")
+    print("  # Send input to the session")
+    print("  result = terminal_tool(")
+    print("      input_keys='print(\"Hello\")\\\\n',")
+    print("      session_id=session_id")
+    print("  )")
+    print("  ")
+    print("  # Run a background task")
+    print("  result = terminal_tool(command='sleep 60', background=True)")
+    
+    print("\nEnvironment Variables:")
+    print(f"  MORPH_API_KEY: {'Set' if os.getenv('MORPH_API_KEY') else 'Not set'}")
+    print(f"  OPENAI_API_KEY: {'Set' if os.getenv('OPENAI_API_KEY') else 'Not set (optional)'}")
+    print(f"  HECATE_VM_LIFETIME_SECONDS: {os.getenv('HECATE_VM_LIFETIME_SECONDS', '300')} (default: 300)")
+    print(f"  HECATE_DEFAULT_SNAPSHOT_ID: {os.getenv('HECATE_DEFAULT_SNAPSHOT_ID', 'snapshot_p5294qxt')} (default: snapshot_p5294qxt)")