terminal tool

2025-07-25 15:15:36 +00:00
parent 21d80ca683
commit 122d8788ae
3 changed files with 357 additions and 3 deletions
--- a/model_tools.py
+++ b/model_tools.py
@@ -24,6 +24,7 @@ from typing import Dict, Any, List

 # Import toolsets
 from web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_tavily_api_key
+from terminal_tool import terminal_execute_tool, terminal_session_tool, check_hecate_requirements

 def get_web_tool_definitions() -> List[Dict[str, Any]]:
    """
@@ -110,6 +111,74 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
        }
    ]

+def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
+    """
+    Get tool definitions for terminal tools in OpenAI's expected format.
+    
+    Returns:
+        List[Dict]: List of terminal tool definitions compatible with OpenAI API
+    """
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "terminal_execute_tool",
+                "description": "Execute a command on a Linux VM and get the output. Automatically manages VM lifecycle - creates VMs on demand, reuses existing VMs, and cleans up after inactivity.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "command": {
+                            "type": "string",
+                            "description": "The command to execute on the VM"
+                        },
+                        "background": {
+                            "type": "boolean",
+                            "description": "Whether to run the command in the background (default: false)",
+                            "default": False
+                        },
+                        "timeout": {
+                            "type": "integer",
+                            "description": "Command timeout in seconds (optional)",
+                            "minimum": 1
+                        }
+                    },
+                    "required": ["command"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "terminal_session_tool",
+                "description": "Execute commands in an interactive terminal session. Useful for running interactive programs (vim, python REPL, etc.), maintaining state between commands, or sending keystrokes to running programs.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "command": {
+                            "type": "string",
+                            "description": "Command to start a new session (optional if continuing existing session)"
+                        },
+                        "input_keys": {
+                            "type": "string",
+                            "description": "Keystrokes to send to the session (e.g., 'hello\\n' for typing hello + Enter)"
+                        },
+                        "session_id": {
+                            "type": "string",
+                            "description": "ID of existing session to continue (optional)"
+                        },
+                        "idle_threshold": {
+                            "type": "number",
+                            "description": "Seconds to wait for output before considering session idle (default: 5.0)",
+                            "default": 5.0,
+                            "minimum": 0.1
+                        }
+                    },
+                    "required": []
+                }
+            }
+        }
+    ]
+
 def get_tool_definitions() -> List[Dict[str, Any]]:
    """
    Get all available tool definitions for model API calls.
@@ -125,6 +194,9 @@ def get_tool_definitions() -> List[Dict[str, Any]]:
    # Add web tools
    tools.extend(get_web_tool_definitions())
    
+    # Add terminal tools
+    tools.extend(get_terminal_tool_definitions())
+    
    # Future toolsets can be added here:
    # tools.extend(get_file_tool_definitions())
    # tools.extend(get_code_tool_definitions())
@@ -166,6 +238,33 @@ def handle_web_function_call(function_name: str, function_args: Dict[str, Any])
    else:
        return json.dumps({"error": f"Unknown web function: {function_name}"})

+def handle_terminal_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
+    """
+    Handle function calls for terminal tools.
+    
+    Args:
+        function_name (str): Name of the terminal function to call
+        function_args (Dict): Arguments for the function
+    
+    Returns:
+        str: Function result as JSON string
+    """
+    if function_name == "terminal_execute_tool":
+        command = function_args.get("command", "")
+        background = function_args.get("background", False)
+        timeout = function_args.get("timeout")
+        return terminal_execute_tool(command, background, timeout)
+    
+    elif function_name == "terminal_session_tool":
+        command = function_args.get("command")
+        input_keys = function_args.get("input_keys")
+        session_id = function_args.get("session_id")
+        idle_threshold = function_args.get("idle_threshold", 5.0)
+        return terminal_session_tool(command, input_keys, session_id, idle_threshold)
+    
+    else:
+        return json.dumps({"error": f"Unknown terminal function: {function_name}"})
+
 def handle_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
    """
    Main function call dispatcher that routes calls to appropriate toolsets.
@@ -189,6 +288,10 @@ def handle_function_call(function_name: str, function_args: Dict[str, Any]) -> s
        if function_name in ["web_search_tool", "web_extract_tool", "web_crawl_tool"]:
            return handle_web_function_call(function_name, function_args)
        
+        # Route terminal tools
+        elif function_name in ["terminal_execute_tool", "terminal_session_tool"]:
+            return handle_terminal_function_call(function_name, function_args)
+        
        # Future toolsets can be routed here:
        # elif function_name in ["file_read_tool", "file_write_tool"]:
        #     return handle_file_function_call(function_name, function_args)
@@ -218,6 +321,12 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
            "tools": ["web_search_tool", "web_extract_tool", "web_crawl_tool"],
            "description": "Web search, content extraction, and website crawling tools",
            "requirements": ["TAVILY_API_KEY environment variable"]
+        },
+        "terminal_tools": {
+            "available": check_hecate_requirements(),
+            "tools": ["terminal_execute_tool", "terminal_session_tool"],
+            "description": "Execute commands and manage interactive sessions on Linux VMs",
+            "requirements": ["MORPH_API_KEY environment variable", "hecate package"]
        }
        # Future toolsets can be added here
    }
@@ -232,7 +341,8 @@ def check_toolset_requirements() -> Dict[str, bool]:
        Dict: Status of each toolset's requirements
    """
    return {
-        "web_tools": check_tavily_api_key()
+        "web_tools": check_tavily_api_key(),
+        "terminal_tools": check_hecate_requirements()
    }

 if __name__ == "__main__":
--- a/run_agent.py
+++ b/run_agent.py
@@ -67,7 +67,7 @@ class AIAgent:
        if api_key:
            client_kwargs["api_key"] = api_key
        else:
-            client_kwargs["api_key"] = os.getenv("OPENAI_API_KEY", "dummy-key")
+            client_kwargs["api_key"] = os.getenv("ANTHROPIC_API_KEY", "dummy-key")
        
        try:
            self.client = OpenAI(**client_kwargs)
@@ -296,7 +296,7 @@ def main():
    # Example conversation
    user_query = (
        "Tell me about the latest developments in Python 3.12 and what new features "
-        "developers should know about. Please search for current information."
+        "developers should know about. Please search for current information and try it out."
    )
    
    print(f"\n📝 User Query: {user_query}")
--- a/terminal_tool.py
+++ b/terminal_tool.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+"""
+Terminal Tools Module
+
+This module provides terminal/command execution tools using Hecate's VM infrastructure.
+It wraps Hecate's functionality to provide a simple interface for executing commands
+on Morph VMs with automatic lifecycle management.
+
+Available tools:
+- terminal_execute_tool: Execute a single command and get output
+- terminal_session_tool: Execute a command in a persistent session
+
+Usage:
+    from terminal_tool import terminal_execute_tool, terminal_session_tool
+    
+    # Execute a single command
+    result = terminal_execute_tool("ls -la")
+    
+    # Execute in a session (for interactive commands)
+    result = terminal_session_tool("python", input_keys="print('hello')\\nexit()\\n")
+"""
+
+import json
+import os
+from typing import Optional
+from hecate import run_tool_with_lifecycle_management
+from morphcloud._llm import ToolCall
+
+def terminal_execute_tool(
+    command: str,
+    background: bool = False,
+    timeout: Optional[int] = None
+) -> str:
+    """
+    Execute a command on a Morph VM and return the output.
+    
+    This tool uses Hecate's VM lifecycle management to automatically create
+    and manage VMs. VMs are reused within the configured lifetime window
+    and automatically cleaned up after inactivity.
+    
+    Args:
+        command: The command to execute
+        background: Whether to run the command in the background (default: False)
+        timeout: Command timeout in seconds (optional)
+    
+    Returns:
+        str: JSON string containing the command output, exit code, and any errors
+    
+    Example:
+        >>> result = terminal_execute_tool("ls -la /tmp")
+        >>> print(json.loads(result))
+        {
+            "output": "total 8\\ndrwxrwxrwt 2 root root 4096 ...",
+            "exit_code": 0,
+            "error": null
+        }
+    """
+    try:
+        # Create tool call for Hecate
+        tool_input = {
+            "command": command,
+            "background": background
+        }
+        
+        if timeout is not None:
+            tool_input["timeout"] = timeout
+        
+        tool_call = ToolCall(
+            name="run_command",
+            input=tool_input
+        )
+        
+        # Execute with lifecycle management
+        result = run_tool_with_lifecycle_management(tool_call)
+        
+        # Format the result
+        formatted_result = {
+            "output": result.get("output", ""),
+            "exit_code": result.get("returncode", result.get("exit_code", -1)),
+            "error": result.get("error")
+        }
+        
+        # Add session info if present (for interactive sessions)
+        if "session_id" in result:
+            formatted_result["session_id"] = result["session_id"]
+        if "screen" in result:
+            formatted_result["screen"] = result["screen"]
+        
+        return json.dumps(formatted_result)
+        
+    except Exception as e:
+        return json.dumps({
+            "output": "",
+            "exit_code": -1,
+            "error": f"Failed to execute command: {str(e)}"
+        })
+
+def terminal_session_tool(
+    command: Optional[str] = None,
+    input_keys: Optional[str] = None,
+    session_id: Optional[str] = None,
+    idle_threshold: float = 5.0
+) -> str:
+    """
+    Execute a command in an interactive terminal session.
+    
+    This tool is useful for:
+    - Running interactive programs (vim, python REPL, etc.)
+    - Maintaining state between commands
+    - Sending keystrokes to running programs
+    
+    Args:
+        command: Command to start a new session (optional if continuing existing session)
+        input_keys: Keystrokes to send to the session (e.g., "hello\\n" for typing hello + Enter)
+        session_id: ID of existing session to continue (optional)
+        idle_threshold: Seconds to wait for output before considering session idle (default: 5.0)
+    
+    Returns:
+        str: JSON string containing session info, screen content, and any errors
+    
+    Example:
+        # Start a Python REPL session
+        >>> result = terminal_session_tool("python")
+        >>> session_data = json.loads(result)
+        >>> session_id = session_data["session_id"]
+        
+        # Send commands to the session
+        >>> result = terminal_session_tool(
+        ...     input_keys="print('Hello, World!')\\n",
+        ...     session_id=session_id
+        ... )
+    """
+    try:
+        tool_input = {}
+        
+        if command:
+            tool_input["command"] = command
+        if input_keys:
+            tool_input["input_keys"] = input_keys
+        if session_id:
+            tool_input["session_id"] = session_id
+        if idle_threshold != 5.0:
+            tool_input["idle_threshold"] = idle_threshold
+        
+        tool_call = ToolCall(
+            name="run_command",
+            input=tool_input
+        )
+        
+        # Execute with lifecycle management
+        result = run_tool_with_lifecycle_management(tool_call)
+        
+        # Format the result for session tools
+        formatted_result = {
+            "session_id": result.get("session_id"),
+            "screen": result.get("screen", ""),
+            "exit_code": result.get("returncode", result.get("exit_code", 0)),
+            "error": result.get("error"),
+            "status": "active" if result.get("session_id") else "ended"
+        }
+        
+        # Include output if present (for non-interactive commands)
+        if "output" in result:
+            formatted_result["output"] = result["output"]
+        
+        return json.dumps(formatted_result)
+        
+    except Exception as e:
+        return json.dumps({
+            "session_id": None,
+            "screen": "",
+            "exit_code": -1,
+            "error": f"Failed to manage session: {str(e)}",
+            "status": "error"
+        })
+
+def check_hecate_requirements() -> bool:
+    """
+    Check if all requirements for terminal tools are met.
+    
+    Returns:
+        bool: True if all requirements are met, False otherwise
+    """
+    # Check for required environment variables
+    required_vars = ["MORPH_API_KEY"]
+    optional_vars = ["OPENAI_API_KEY"]  # Needed for Hecate's LLM features
+    
+    missing_required = [var for var in required_vars if not os.getenv(var)]
+    missing_optional = [var for var in optional_vars if not os.getenv(var)]
+    
+    if missing_required:
+        print(f"Missing required environment variables: {', '.join(missing_required)}")
+        return False
+    
+    if missing_optional:
+        print(f"Warning: Missing optional environment variables: {', '.join(missing_optional)}")
+        print("   (Some Hecate features may be limited)")
+    
+    # Check if Hecate is importable
+    try:
+        import hecate
+        return True
+    except ImportError:
+        print("Hecate is not installed. Please install it with: pip install hecate")
+        return False
+
+# Module-level initialization check
+_requirements_met = check_hecate_requirements()
+
+if __name__ == "__main__":
+    """
+    Simple test/demo when run directly
+    """
+    print("Terminal Tools Module")
+    print("=" * 40)
+    
+    if not _requirements_met:
+        print("Requirements not met. Please check the messages above.")
+        exit(1)
+    
+    print("All requirements met!")
+    print("\nAvailable Tools:")
+    print("  - terminal_execute_tool: Execute single commands")
+    print("  - terminal_session_tool: Interactive terminal sessions")
+    
+    print("\nUsage Examples:")
+    print("  # Execute a command")
+    print("  result = terminal_execute_tool('ls -la')")
+    print("  ")
+    print("  # Start an interactive session")
+    print("  result = terminal_session_tool('python')")
+    print("  session_data = json.loads(result)")
+    print("  session_id = session_data['session_id']")
+    print("  ")
+    print("  # Send input to the session")
+    print("  result = terminal_session_tool(")
+    print("      input_keys='print(\"Hello\")\\\\n',")
+    print("      session_id=session_id")
+    print("  )")
+    
+    print("\nEnvironment Variables:")
+    print(f"  MORPH_API_KEY: {'Set' if os.getenv('MORPH_API_KEY') else 'Not set'}")
+    print(f"  OPENAI_API_KEY: {'Set' if os.getenv('OPENAI_API_KEY') else 'Not set (optional)'}")
+    print(f"  HECATE_VM_LIFETIME_SECONDS: {os.getenv('HECATE_VM_LIFETIME_SECONDS', '300')} (default: 300)")