From 17608c11422bc1bea8c8c2e2032c41cd4ac57a7a Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 10 Sep 2025 00:43:55 -0700
Subject: [PATCH] Update to use toolsets and make them easy to create and
 configure

---
 README.md      |  121 ++++-
 model_tools.py |  238 +++++----
 run_agent.py   | 1253 +++++++++++++++++++++++++-----------------------
 test_run.sh    |   14 +-
 toolsets.py    |  326 +++++++++++++
 5 files changed, 1210 insertions(+), 742 deletions(-)
 create mode 100644 toolsets.py

diff --git a/README.md b/README.md
index 627a792ee..541a01a24 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,65 @@
+# Hermes Agent
+
+An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.
+
+## Features
+
+- **Web Tools**: Search, extract content, and crawl websites
+- **Terminal Tools**: Execute commands with interactive session support
+- **Vision Tools**: Analyze images from URLs
+- **Reasoning Tools**: Advanced multi-model reasoning (Mixture of Agents)
+- **Creative Tools**: Generate images from text prompts
+- **Toolsets System**: Organize tools into logical groups for different scenarios
+
 ## Setup
-```
+```bash
 pip install -r requirements.txt
 git clone git@github.com:NousResearch/hecate.git
 cd hecate
 pip install -e .
 ```
 
-## Run
+## Toolsets System
+
+The agent uses a toolsets system for organizing and managing tools. All tools must be part of a toolset to be accessible - individual tool selection is not supported. This ensures consistent and logical grouping of capabilities.
+
+### Key Concepts
+
+- **Toolsets**: Logical groups of tools for specific use cases (e.g., "research", "development", "debugging")
+- **Composition**: Toolsets can include other toolsets for powerful combinations
+- **Custom Toolsets**: Create your own toolsets at runtime or by editing `toolsets.py`
+- **Toolset-Only Access**: Tools are only accessible through toolsets, not individually
+
+### Available Toolsets
+
+See `toolsets.py` for the complete list of predefined toolsets including:
+- Basic toolsets (web, terminal, vision, creative, reasoning)
+- Composite toolsets (research, development, analysis, etc.)
+- Scenario-specific toolsets (debugging, documentation, API testing, etc.)
+- Special toolsets (safe mode without terminal, minimal, offline)
+
+### Using Toolsets
+
+```bash
+# Use a predefined toolset
+python run_agent.py --enabled_toolsets=research --query "Find latest AI papers"
+
+# Combine multiple toolsets
+python run_agent.py --enabled_toolsets=web,vision --query "Analyze this website"
+
+# Safe mode (no terminal access)
+python run_agent.py --enabled_toolsets=safe --query "Help without running commands"
+
+# List all available toolsets and tools
+python run_agent.py --list_tools
 ```
+
+For detailed documentation on toolsets, see `TOOLSETS_README.md`.
+
+## Basic Usage
+
+### Default (all tools enabled)
+```bash
 python run_agent.py \
   --query "search up the latest docs on jit in python 3.13 and write me basic example that's not in their docs. profile its perf" \
   --max_turns 20 \
@@ -15,3 +67,68 @@ python run_agent.py \
   --base_url https://api.anthropic.com/v1/ \
   --api_key $ANTHROPIC_API_KEY
 ```
+
+### With specific toolset
+```bash
+python run_agent.py \
+  --query "Debug this Python error" \
+  --enabled_toolsets=debugging \
+  --model claude-sonnet-4-20250514 \
+  --api_key $ANTHROPIC_API_KEY
+```
+
+### Python API
+```python
+from run_agent import AIAgent
+
+# Use a specific toolset
+agent = AIAgent(
+    model="claude-opus-4-20250514",
+    enabled_toolsets=["research"]
+)
+response = agent.chat("Find information about quantum computing")
+
+# Create custom toolset at runtime
+from toolsets import create_custom_toolset
+
+create_custom_toolset(
+    name="my_tools",
+    description="My custom toolkit",
+    tools=["web_search"],
+    includes=["terminal", "vision"]
+)
+
+agent = AIAgent(enabled_toolsets=["my_tools"])
+```
+
+## Command Line Arguments
+
+- `--query`: The question or task for the agent
+- `--model`: Model to use (default: claude-opus-4-20250514)
+- `--api_key`: API key for authentication
+- `--base_url`: API endpoint URL
+- `--max_turns`: Maximum number of tool-calling iterations
+- `--enabled_toolsets`: Comma-separated list of toolsets to enable
+- `--disabled_toolsets`: Comma-separated list of toolsets to disable
+- `--list_tools`: List all available toolsets and tools
+- `--save_trajectories`: Save conversation trajectories to JSONL files
+
+## Environment Variables
+
+Set these environment variables to enable different tools:
+
+- `FIRECRAWL_API_KEY`: For web tools (search, extract, crawl)
+- `MORPH_API_KEY`: For terminal tools
+- `NOUS_API_KEY`: For vision and reasoning tools
+- `FAL_KEY`: For image generation tools
+- `ANTHROPIC_API_KEY`: For the main agent model
+
+## Documentation
+
+- `TOOLSETS_README.md`: Comprehensive guide to the toolsets system
+- `toolsets.py`: View and modify available toolsets
+- `model_tools.py`: Core tool definitions and handlers
+
+## Examples
+
+See `TOOLSETS_README.md` for extensive examples of using different toolsets for various scenarios.
diff --git a/model_tools.py b/model_tools.py
index 08b073953..42f068604 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -23,7 +23,7 @@ Usage:
     web_tools = get_tool_definitions(enabled_toolsets=['web_tools'])
     
     # Handle function calls from model
-    result = handle_function_call("web_search", {"query": "Python", "limit": 3})
+    result = handle_function_call("web_search", {"query": "Python"})
 """
 
 import json
@@ -35,6 +35,11 @@ from terminal_tool import terminal_tool, check_hecate_requirements, TERMINAL_TOO
 from vision_tools import vision_analyze_tool, check_vision_requirements
 from mixture_of_agents_tool import mixture_of_agents_tool, check_moa_requirements
 from image_generation_tool import image_generate_tool, check_image_generation_requirements
+from toolsets import (
+    get_toolset, resolve_toolset, resolve_multiple_toolsets,
+    get_all_toolsets, get_toolset_names, validate_toolset,
+    get_toolset_info, print_toolset_tree
+)
 
 def get_web_tool_definitions() -> List[Dict[str, Any]]:
     """
@@ -48,20 +53,13 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "web_search",
-                "description": "Search the web for information on any topic. Returns relevant results with titles and URLs. Uses advanced search depth for comprehensive results.",
+                "description": "Search the web for information on any topic. Returns up to 5 relevant results with titles and URLs. Uses advanced search depth for comprehensive results.",
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "query": {
                             "type": "string",
                             "description": "The search query to look up on the web"
-                        },
-                        "limit": {
-                            "type": "integer",
-                            "description": "Maximum number of results to return (default: 5, max: 10)",
-                            "default": 5,
-                            "minimum": 1,
-                            "maximum": 10
                         }
                     },
                     "required": ["query"]
@@ -308,145 +306,146 @@ def get_toolset_for_tool(tool_name: str) -> str:
 
 
 def get_tool_definitions(
-    enabled_tools: List[str] = None, 
-    disabled_tools: List[str] = None,
     enabled_toolsets: List[str] = None,
     disabled_toolsets: List[str] = None
 ) -> List[Dict[str, Any]]:
     """
-    Get tool definitions for model API calls with optional filtering.
+    Get tool definitions for model API calls with toolset-based filtering.
     
-    This function aggregates tool definitions from all available toolsets
-    and applies filtering based on the provided parameters.
-    
-    Filter Priority (higher priority overrides lower):
-    1. enabled_tools (highest priority - only these tools, overrides everything)
-    2. disabled_tools (applied after toolset filtering)
-    3. enabled_toolsets (only tools from these toolsets)
-    4. disabled_toolsets (exclude tools from these toolsets)
+    This function aggregates tool definitions from available toolsets.
+    All tools must be part of a toolset to be accessible. Individual tool
+    selection is not supported - use toolsets to organize and select tools.
     
     Args:
-        enabled_tools (List[str]): Only include these specific tools. If provided, 
-                                  ONLY these tools will be included (overrides all other filters)
-        disabled_tools (List[str]): Exclude these specific tools (applied after toolset filtering)
-        enabled_toolsets (List[str]): Only include tools from these toolsets
-        disabled_toolsets (List[str]): Exclude tools from these toolsets
+        enabled_toolsets (List[str]): Only include tools from these toolsets.
+                                     If None, all available tools are included.
+        disabled_toolsets (List[str]): Exclude tools from these toolsets.
+                                      Applied only if enabled_toolsets is None.
     
     Returns:
         List[Dict]: Filtered list of tool definitions
     
     Examples:
-        # Only web tools
-        tools = get_tool_definitions(enabled_toolsets=["web_tools"])
+        # Use predefined toolsets
+        tools = get_tool_definitions(enabled_toolsets=["research"])
+        tools = get_tool_definitions(enabled_toolsets=["development"])
         
-        # All tools except terminal
-        tools = get_tool_definitions(disabled_tools=["terminal"])
+        # Combine multiple toolsets
+        tools = get_tool_definitions(enabled_toolsets=["web", "vision"])
         
-        # Only specific tools (overrides toolset filters)
-        tools = get_tool_definitions(enabled_tools=["web_search", "web_extract"])
+        # All tools except those in terminal toolset
+        tools = get_tool_definitions(disabled_toolsets=["terminal"])
         
-        # Conflicting filters (enabled_tools wins)
-        tools = get_tool_definitions(enabled_toolsets=["web_tools"], enabled_tools=["terminal"])
-        # Result: Only terminal tool (enabled_tools overrides enabled_toolsets)
+        # Default - all available tools
+        tools = get_tool_definitions()
     """
-    # Detect and warn about potential conflicts
-    conflicts_detected = False
+    # Collect all available tool definitions
+    all_available_tools_map = {}
     
-    if enabled_tools and (enabled_toolsets or disabled_toolsets or disabled_tools):
-        print("⚠️  enabled_tools overrides all other filters")
-        conflicts_detected = True
+    # Map tool names to their definitions
+    if check_firecrawl_api_key():
+        for tool in get_web_tool_definitions():
+            all_available_tools_map[tool["function"]["name"]] = tool
     
-    if enabled_toolsets and disabled_toolsets:
-        # Check for overlap
-        enabled_set = set(enabled_toolsets)
-        disabled_set = set(disabled_toolsets)
-        overlap = enabled_set & disabled_set
-        if overlap:
-            print(f"⚠️  Conflicting toolsets: {overlap} in both enabled and disabled")
-            print(f"   → enabled_toolsets takes priority")
-            conflicts_detected = True
+    if check_hecate_requirements():
+        for tool in get_terminal_tool_definitions():
+            all_available_tools_map[tool["function"]["name"]] = tool
     
-    if enabled_tools and disabled_tools:
-        # Check for overlap
-        enabled_set = set(enabled_tools)
-        disabled_set = set(disabled_tools)
-        overlap = enabled_set & disabled_set
-        if overlap:
-            print(f"⚠️  Conflicting tools: {overlap} in both enabled and disabled")
-            print(f"   → enabled_tools takes priority")
-            conflicts_detected = True
+    if check_vision_requirements():
+        for tool in get_vision_tool_definitions():
+            all_available_tools_map[tool["function"]["name"]] = tool
     
-    all_tools = []
+    if check_moa_requirements():
+        for tool in get_moa_tool_definitions():
+            all_available_tools_map[tool["function"]["name"]] = tool
     
-    # Collect all available tools from each toolset
-    toolset_tools = {
-        "web_tools": get_web_tool_definitions() if check_firecrawl_api_key() else [],
-        "terminal_tools": get_terminal_tool_definitions() if check_hecate_requirements() else [],
-        "vision_tools": get_vision_tool_definitions() if check_vision_requirements() else [],
-        "moa_tools": get_moa_tool_definitions() if check_moa_requirements() else [],
-        "image_tools": get_image_tool_definitions() if check_image_generation_requirements() else []
-    }
+    if check_image_generation_requirements():
+        for tool in get_image_tool_definitions():
+            all_available_tools_map[tool["function"]["name"]] = tool
     
-    # HIGHEST PRIORITY: enabled_tools (overrides everything)
-    if enabled_tools:
-        if conflicts_detected:
-            print(f"🎯 Using only enabled_tools: {enabled_tools}")
-        
-        # Collect all available tools first
-        all_available_tools = []
-        for tools in toolset_tools.values():
-            all_available_tools.extend(tools)
-        
-        # Only include specifically enabled tools
-        tool_names_to_include = set(enabled_tools)
-        filtered_tools = [
-            tool for tool in all_available_tools 
-            if tool["function"]["name"] in tool_names_to_include
-        ]
-        
-        # Warn about requested tools that aren't available
-        found_tools = {tool["function"]["name"] for tool in filtered_tools}
-        missing_tools = tool_names_to_include - found_tools
-        if missing_tools:
-            print(f"⚠️  Requested tools not available: {missing_tools}")
-        
-        return filtered_tools
+    # Determine which tools to include based on toolsets
+    tools_to_include = set()
     
-    # Apply toolset-level filtering first
     if enabled_toolsets:
         # Only include tools from enabled toolsets
         for toolset_name in enabled_toolsets:
-            if toolset_name in toolset_tools:
-                all_tools.extend(toolset_tools[toolset_name])
+            if validate_toolset(toolset_name):
+                resolved_tools = resolve_toolset(toolset_name)
+                tools_to_include.update(resolved_tools)
+                print(f"✅ Enabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
             else:
-                print(f"⚠️  Unknown toolset: {toolset_name}")
+                # Try legacy compatibility
+                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools"]:
+                    # Map legacy names to new system
+                    legacy_map = {
+                        "web_tools": ["web_search", "web_extract", "web_crawl"],
+                        "terminal_tools": ["terminal"],
+                        "vision_tools": ["vision_analyze"],
+                        "moa_tools": ["mixture_of_agents"],
+                        "image_tools": ["image_generate"]
+                    }
+                    legacy_tools = legacy_map.get(toolset_name, [])
+                    tools_to_include.update(legacy_tools)
+                    print(f"✅ Enabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
+                else:
+                    print(f"⚠️  Unknown toolset: {toolset_name}")
     elif disabled_toolsets:
-        # Include all tools except from disabled toolsets
-        for toolset_name, tools in toolset_tools.items():
-            if toolset_name not in disabled_toolsets:
-                all_tools.extend(tools)
+        # Start with all tools from all toolsets, then remove disabled ones
+        # Note: Only tools that are part of toolsets are accessible
+        # We need to get all tools from all defined toolsets
+        from toolsets import get_all_toolsets
+        all_toolset_tools = set()
+        for toolset_name in get_all_toolsets():
+            resolved_tools = resolve_toolset(toolset_name)
+            all_toolset_tools.update(resolved_tools)
+        
+        # Start with all tools from toolsets
+        tools_to_include = all_toolset_tools
+        
+        # Remove tools from disabled toolsets
+        for toolset_name in disabled_toolsets:
+            if validate_toolset(toolset_name):
+                resolved_tools = resolve_toolset(toolset_name)
+                tools_to_include.difference_update(resolved_tools)
+                print(f"🚫 Disabled toolset '{toolset_name}': {', '.join(resolved_tools) if resolved_tools else 'no tools'}")
+            else:
+                # Try legacy compatibility
+                if toolset_name in ["web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools"]:
+                    legacy_map = {
+                        "web_tools": ["web_search", "web_extract", "web_crawl"],
+                        "terminal_tools": ["terminal"],
+                        "vision_tools": ["vision_analyze"],
+                        "moa_tools": ["mixture_of_agents"],
+                        "image_tools": ["image_generate"]
+                    }
+                    legacy_tools = legacy_map.get(toolset_name, [])
+                    tools_to_include.difference_update(legacy_tools)
+                    print(f"🚫 Disabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
+                else:
+                    print(f"⚠️  Unknown toolset: {toolset_name}")
     else:
-        # Include all available tools
-        for tools in toolset_tools.values():
-            all_tools.extend(tools)
+        # No filtering - include all tools from all defined toolsets
+        from toolsets import get_all_toolsets
+        for toolset_name in get_all_toolsets():
+            resolved_tools = resolve_toolset(toolset_name)
+            tools_to_include.update(resolved_tools)
     
-    # Apply tool-level filtering (disabled_tools)
-    if disabled_tools:
-        tool_names_to_exclude = set(disabled_tools)
-        original_tools = [tool["function"]["name"] for tool in all_tools]
-        
-        all_tools = [
-            tool for tool in all_tools 
-            if tool["function"]["name"] not in tool_names_to_exclude
-        ]
-        
-        # Show what was actually filtered out
-        remaining_tools = {tool["function"]["name"] for tool in all_tools}
-        actually_excluded = set(original_tools) & tool_names_to_exclude
-        if actually_excluded:
-            print(f"🚫 Excluded tools: {actually_excluded}")
+    # Build final tool list (only include tools that are available)
+    filtered_tools = []
+    for tool_name in tools_to_include:
+        if tool_name in all_available_tools_map:
+            filtered_tools.append(all_available_tools_map[tool_name])
     
-    return all_tools
+    # Sort tools for consistent ordering
+    filtered_tools.sort(key=lambda t: t["function"]["name"])
+    
+    if filtered_tools:
+        tool_names = [t["function"]["name"] for t in filtered_tools]
+        print(f"🛠️  Final tool selection ({len(filtered_tools)} tools): {', '.join(tool_names)}")
+    else:
+        print("🛠️  No tools selected (all filtered out or unavailable)")
+    
+    return filtered_tools
 
 def handle_web_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
     """
@@ -461,9 +460,8 @@ def handle_web_function_call(function_name: str, function_args: Dict[str, Any])
     """
     if function_name == "web_search":
         query = function_args.get("query", "")
-        limit = function_args.get("limit", 5)
-        # Ensure limit is within bounds
-        limit = max(1, min(10, limit))
+        # Always use fixed limit of 5
+        limit = 5
         return web_search_tool(query, limit)
     
     elif function_name == "web_extract":
diff --git a/run_agent.py b/run_agent.py
index eec9d63c7..1aba154f4 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1,613 +1,640 @@
-#!/usr/bin/env python3
-"""
-AI Agent Runner with Tool Calling
-
-This module provides a clean, standalone agent that can execute AI models
-with tool calling capabilities. It handles the conversation loop, tool execution,
-and response management.
-
-Features:
-- Automatic tool calling loop until completion
-- Configurable model parameters
-- Error handling and recovery
-- Message history management
-- Support for multiple model providers
-
-Usage:
-    from run_agent import AIAgent
-    
-    agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514")
-    response = agent.run_conversation("Tell me about the latest Python updates")
-"""
-
-import json
-import os
-import time
-from typing import List, Dict, Any, Optional
-from openai import OpenAI
-import fire
-from datetime import datetime
-
-# Import our tool system
-from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
-
-
-class AIAgent:
-    """
-    AI Agent with tool calling capabilities.
-    
-    This class manages the conversation flow, tool execution, and response handling
-    for AI models that support function calling.
-    """
-    
-    def __init__(
-        self, 
-        base_url: str = None, 
-        api_key: str = None, 
-        model: str = "gpt-4",
-        max_iterations: int = 10,
-        tool_delay: float = 1.0,
-        enabled_tools: List[str] = None,
-        disabled_tools: List[str] = None,
-        enabled_toolsets: List[str] = None,
-        disabled_toolsets: List[str] = None,
-        save_trajectories: bool = False
-    ):
-        """
-        Initialize the AI Agent.
-        
-        Args:
-            base_url (str): Base URL for the model API (optional)
-            api_key (str): API key for authentication (optional, uses env var if not provided)
-            model (str): Model name to use (default: "gpt-4")
-            max_iterations (int): Maximum number of tool calling iterations (default: 10)
-            tool_delay (float): Delay between tool calls in seconds (default: 1.0)
-            enabled_tools (List[str]): Only enable these specific tools (optional)
-            disabled_tools (List[str]): Disable these specific tools (optional)
-            enabled_toolsets (List[str]): Only enable tools from these toolsets (optional)
-            disabled_toolsets (List[str]): Disable tools from these toolsets (optional)
-            save_trajectories (bool): Whether to save conversation trajectories to JSONL files (default: False)
-        """
-        self.model = model
-        self.max_iterations = max_iterations
-        self.tool_delay = tool_delay
-        self.save_trajectories = save_trajectories
-        
-        # Store tool filtering options
-        self.enabled_tools = enabled_tools
-        self.disabled_tools = disabled_tools
-        self.enabled_toolsets = enabled_toolsets
-        self.disabled_toolsets = disabled_toolsets
-        
-        # Initialize OpenAI client
-        client_kwargs = {}
-        if base_url:
-            client_kwargs["base_url"] = base_url
-        if api_key:
-            client_kwargs["api_key"] = api_key
-        else:
-            client_kwargs["api_key"] = os.getenv("ANTHROPIC_API_KEY", "dummy-key")
-        
-        try:
-            self.client = OpenAI(**client_kwargs)
-            print(f"🤖 AI Agent initialized with model: {self.model}")
-            if base_url:
-                print(f"🔗 Using custom base URL: {base_url}")
-        except Exception as e:
-            raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
-        
-        # Get available tools with filtering
-        self.tools = get_tool_definitions(
-            enabled_tools=enabled_tools,
-            disabled_tools=disabled_tools,
-            enabled_toolsets=enabled_toolsets,
-            disabled_toolsets=disabled_toolsets
-        )
-        
-        # Show tool configuration
-        if self.tools:
-            tool_names = [tool["function"]["name"] for tool in self.tools]
-            print(f"🛠️  Loaded {len(self.tools)} tools: {', '.join(tool_names)}")
-            
-            # Show filtering info if applied
-            if enabled_tools:
-                print(f"   ✅ Enabled tools: {', '.join(enabled_tools)}")
-            if disabled_tools:
-                print(f"   ❌ Disabled tools: {', '.join(disabled_tools)}")
-            if enabled_toolsets:
-                print(f"   ✅ Enabled toolsets: {', '.join(enabled_toolsets)}")
-            if disabled_toolsets:
-                print(f"   ❌ Disabled toolsets: {', '.join(disabled_toolsets)}")
-        else:
-            print("🛠️  No tools loaded (all tools filtered out or unavailable)")
-        
-        # Check tool requirements
-        if self.tools:
-            requirements = check_toolset_requirements()
-            missing_reqs = [name for name, available in requirements.items() if not available]
-            if missing_reqs:
-                print(f"⚠️  Some tools may not work due to missing requirements: {missing_reqs}")
-        
-        # Show trajectory saving status
-        if self.save_trajectories:
-            print("📝 Trajectory saving enabled")
-    
-    def _format_tools_for_system_message(self) -> str:
-        """
-        Format tool definitions for the system message in the trajectory format.
-        
-        Returns:
-            str: JSON string representation of tool definitions
-        """
-        if not self.tools:
-            return "[]"
-        
-        # Convert tool definitions to the format expected in trajectories
-        formatted_tools = []
-        for tool in self.tools:
-            func = tool["function"]
-            formatted_tool = {
-                "name": func["name"],
-                "description": func.get("description", ""),
-                "parameters": func.get("parameters", {}),
-                "required": None  # Match the format in the example
-            }
-            formatted_tools.append(formatted_tool)
-        
-        return json.dumps(formatted_tools)
-    
-    def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]:
-        """
-        Convert internal message format to trajectory format for saving.
-        
-        Args:
-            messages (List[Dict]): Internal message history
-            user_query (str): Original user query
-            completed (bool): Whether the conversation completed successfully
-            
-        Returns:
-            List[Dict]: Messages in trajectory format
-        """
-        trajectory = []
-        
-        # Add system message with tool definitions
-        system_msg = (
-            "You are a function calling AI model. You are provided with function signatures within <tools> </tools> XML tags. "
-            "You may call one or more functions to assist with the user query. If available tools are not relevant in assisting "
-            "with user query, just respond in natural conversational language. Don't make assumptions about what values to plug "
-            "into functions. After calling & executing the functions, you will be provided with function results within "
-            "<tool_response> </tool_response> XML tags. Here are the available tools:\n"
-            f"<tools>\n{self._format_tools_for_system_message()}\n</tools>\n"
-            "For each function call return a JSON object, with the following pydantic model json schema for each:\n"
-            "{'title': 'FunctionCall', 'type': 'object', 'properties': {'name': {'title': 'Name', 'type': 'string'}, "
-            "'arguments': {'title': 'Arguments', 'type': 'object'}}, 'required': ['name', 'arguments']}\n"
-            "Each function call should be enclosed within <tool_call> </tool_call> XML tags.\n"
-            "Example:\n<tool_call>\n{'name': <function-name>,'arguments': <args-dict>}\n</tool_call>"
-        )
-        
-        trajectory.append({
-            "from": "system",
-            "value": system_msg
-        })
-        
-        # Add the initial user message
-        trajectory.append({
-            "from": "human",
-            "value": user_query
-        })
-        
-        # Process remaining messages
-        i = 1  # Skip the first user message as we already added it
-        while i < len(messages):
-            msg = messages[i]
-            
-            if msg["role"] == "assistant":
-                # Check if this message has tool calls
-                if "tool_calls" in msg and msg["tool_calls"]:
-                    # Format assistant message with tool calls
-                    content = ""
-                    if msg.get("content") and msg["content"].strip():
-                        content = msg["content"] + "\n"
-                    
-                    # Add tool calls wrapped in XML tags
-                    for tool_call in msg["tool_calls"]:
-                        tool_call_json = {
-                            "name": tool_call["function"]["name"],
-                            "arguments": json.loads(tool_call["function"]["arguments"]) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"]
-                        }
-                        content += f"<tool_call>\n{json.dumps(tool_call_json)}\n</tool_call>\n"
-                    
-                    trajectory.append({
-                        "from": "gpt",
-                        "value": content.rstrip()
-                    })
-                    
-                    # Collect all subsequent tool responses
-                    tool_responses = []
-                    j = i + 1
-                    while j < len(messages) and messages[j]["role"] == "tool":
-                        tool_msg = messages[j]
-                        # Format tool response with XML tags
-                        tool_response = f"<tool_response>\n"
-                        
-                        # Try to parse tool content as JSON if it looks like JSON
-                        tool_content = tool_msg["content"]
-                        try:
-                            if tool_content.strip().startswith(("{", "[")):
-                                tool_content = json.loads(tool_content)
-                        except (json.JSONDecodeError, AttributeError):
-                            pass  # Keep as string if not valid JSON
-                        
-                        tool_response += json.dumps({
-                            "tool_call_id": tool_msg.get("tool_call_id", ""),
-                            "name": msg["tool_calls"][len(tool_responses)]["function"]["name"] if len(tool_responses) < len(msg["tool_calls"]) else "unknown",
-                            "content": tool_content
-                        })
-                        tool_response += "\n</tool_response>"
-                        tool_responses.append(tool_response)
-                        j += 1
-                    
-                    # Add all tool responses as a single message
-                    if tool_responses:
-                        trajectory.append({
-                            "from": "tool",
-                            "value": "\n".join(tool_responses)
-                        })
-                        i = j - 1  # Skip the tool messages we just processed
-                
-                else:
-                    # Regular assistant message without tool calls
-                    trajectory.append({
-                        "from": "gpt",
-                        "value": msg["content"] or ""
-                    })
-            
-            elif msg["role"] == "user":
-                trajectory.append({
-                    "from": "human",
-                    "value": msg["content"]
-                })
-            
-            i += 1
-        
-        return trajectory
-    
-    def _save_trajectory(self, messages: List[Dict[str, Any]], user_query: str, completed: bool):
-        """
-        Save conversation trajectory to JSONL file.
-        
-        Args:
-            messages (List[Dict]): Complete message history
-            user_query (str): Original user query
-            completed (bool): Whether the conversation completed successfully
-        """
-        if not self.save_trajectories:
-            return
-        
-        # Convert messages to trajectory format
-        trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
-        
-        # Determine which file to save to
-        filename = "trajectory_samples.jsonl" if completed else "failed_trajectories.jsonl"
-        
-        # Create trajectory entry
-        entry = {
-            "conversations": trajectory,
-            "timestamp": datetime.now().isoformat(),
-            "model": self.model,
-            "completed": completed
-        }
-        
-        # Append to JSONL file
-        try:
-            with open(filename, "a", encoding="utf-8") as f:
-                f.write(json.dumps(entry, ensure_ascii=False) + "\n")
-            print(f"💾 Trajectory saved to {filename}")
-        except Exception as e:
-            print(f"⚠️ Failed to save trajectory: {e}")
-    
-    def run_conversation(
-        self, 
-        user_message: str, 
-        system_message: str = None, 
-        conversation_history: List[Dict[str, Any]] = None
-    ) -> Dict[str, Any]:
-        """
-        Run a complete conversation with tool calling until completion.
-        
-        Args:
-            user_message (str): The user's message/question
-            system_message (str): Custom system message (optional)
-            conversation_history (List[Dict]): Previous conversation messages (optional)
-            
-        Returns:
-            Dict: Complete conversation result with final response and message history
-        """
-        # Initialize conversation
-        messages = conversation_history or []
-        
-        # Add user message
-        messages.append({
-            "role": "user",
-            "content": user_message
-        })
-        
-        print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
-        
-        # Main conversation loop
-        api_call_count = 0
-        final_response = None
-        
-        while api_call_count < self.max_iterations:
-            api_call_count += 1
-            print(f"\n🔄 Making API call #{api_call_count}...")
-            
-            try:
-                # Make API call with tools
-                response = self.client.chat.completions.create(
-                    model=self.model,
-                    messages=messages,
-                    tools=self.tools if self.tools else None
-                )
-                
-                assistant_message = response.choices[0].message
-                
-                # Handle assistant response
-                if assistant_message.content:
-                    print(f"🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
-                
-                # Check for tool calls
-                if assistant_message.tool_calls:
-                    print(f"🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
-                    
-                    # Add assistant message with tool calls to conversation
-                    messages.append({
-                        "role": "assistant",
-                        "content": assistant_message.content,
-                        "tool_calls": [
-                            {
-                                "id": tool_call.id,
-                                "type": tool_call.type,
-                                "function": {
-                                    "name": tool_call.function.name,
-                                    "arguments": tool_call.function.arguments
-                                }
-                            }
-                            for tool_call in assistant_message.tool_calls
-                        ]
-                    })
-                    
-                    # Execute each tool call
-                    for i, tool_call in enumerate(assistant_message.tool_calls, 1):
-                        function_name = tool_call.function.name
-                        
-                        try:
-                            function_args = json.loads(tool_call.function.arguments)
-                        except json.JSONDecodeError as e:
-                            print(f"❌ Invalid JSON in tool call arguments: {e}")
-                            function_args = {}
-                        
-                        print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
-                        
-                        # Execute the tool
-                        function_result = handle_function_call(function_name, function_args)
-                        
-                        # Add tool result to conversation
-                        messages.append({
-                            "role": "tool",
-                            "content": function_result,
-                            "tool_call_id": tool_call.id
-                        })
-                        
-                        print(f"  ✅ Tool {i} completed")
-                        
-                        # Delay between tool calls
-                        if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
-                            time.sleep(self.tool_delay)
-                    
-                    # Continue loop for next response
-                    continue
-                
-                else:
-                    # No tool calls - this is the final response
-                    final_response = assistant_message.content or ""
-                    
-                    # Add final assistant message
-                    messages.append({
-                        "role": "assistant", 
-                        "content": final_response
-                    })
-                    
-                    print(f"🎉 Conversation completed after {api_call_count} API call(s)")
-                    break
-                
-            except Exception as e:
-                error_msg = f"Error during API call #{api_call_count}: {str(e)}"
-                print(f"❌ {error_msg}")
-                
-                # Add error to conversation and try to continue
-                messages.append({
-                    "role": "assistant",
-                    "content": f"I encountered an error: {error_msg}. Let me try a different approach."
-                })
-                
-                # If we're near the limit, break to avoid infinite loops
-                if api_call_count >= self.max_iterations - 1:
-                    final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
-                    break
-        
-        # Handle max iterations reached
-        if api_call_count >= self.max_iterations:
-            print(f"⚠️  Reached maximum iterations ({self.max_iterations}). Stopping to prevent infinite loop.")
-            if final_response is None:
-                final_response = "I've reached the maximum number of iterations. Here's what I found so far."
-        
-        # Determine if conversation completed successfully
-        completed = final_response is not None and api_call_count < self.max_iterations
-        
-        # Save trajectory if enabled
-        self._save_trajectory(messages, user_message, completed)
-        
-        return {
-            "final_response": final_response,
-            "messages": messages,
-            "api_calls": api_call_count,
-            "completed": completed
-        }
-    
-    def chat(self, message: str) -> str:
-        """
-        Simple chat interface that returns just the final response.
-        
-        Args:
-            message (str): User message
-            
-        Returns:
-            str: Final assistant response
-        """
-        result = self.run_conversation(message)
-        return result["final_response"]
-
-
-def main(
-    query: str = None,
-    model: str = "claude-opus-4-20250514", 
-    api_key: str = None,
-    base_url: str = "https://api.anthropic.com/v1/",
-    max_turns: int = 10,
-    enabled_tools: str = None,
-    disabled_tools: str = None,
-    enabled_toolsets: str = None,
-    disabled_toolsets: str = None,
-    list_tools: bool = False,
-    save_trajectories: bool = False
-):
-    """
-    Main function for running the agent directly.
-    
-    Args:
-        query (str): Natural language query for the agent. Defaults to Python 3.13 example.
-        model (str): Model name to use. Defaults to claude-opus-4-20250514.
-        api_key (str): API key for authentication. Uses ANTHROPIC_API_KEY env var if not provided.
-        base_url (str): Base URL for the model API. Defaults to https://api.anthropic.com/v1/
-        max_turns (int): Maximum number of API call iterations. Defaults to 10.
-        enabled_tools (str): Comma-separated list of tools to enable (e.g., "web_search,terminal")
-        disabled_tools (str): Comma-separated list of tools to disable (e.g., "terminal")
-        enabled_toolsets (str): Comma-separated list of toolsets to enable (e.g., "web_tools")
-        disabled_toolsets (str): Comma-separated list of toolsets to disable (e.g., "terminal_tools")
-        list_tools (bool): Just list available tools and exit
-        save_trajectories (bool): Save conversation trajectories to JSONL files. Defaults to False.
-    """
-    print("🤖 AI Agent with Tool Calling")
-    print("=" * 50)
-    
-    # Handle tool listing
-    if list_tools:
-        from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets
-        
-        print("📋 Available Tools & Toolsets:")
-        print("-" * 30)
-        
-        # Show toolsets
-        toolsets = get_available_toolsets()
-        print("📦 Toolsets:")
-        for name, info in toolsets.items():
-            status = "✅" if info["available"] else "❌"
-            print(f"  {status} {name}: {info['description']}")
-            if not info["available"]:
-                print(f"    Requirements: {', '.join(info['requirements'])}")
-        
-        # Show individual tools
-        all_tools = get_all_tool_names()
-        print(f"\n🔧 Individual Tools ({len(all_tools)} available):")
-        for tool_name in all_tools:
-            toolset = get_toolset_for_tool(tool_name)
-            print(f"  📌 {tool_name} (from {toolset})")
-        
-        print(f"\n💡 Usage Examples:")
-        print(f"  # Run with only web tools")
-        print(f"  python run_agent.py --enabled_toolsets=web_tools --query='search for Python news'")
-        print(f"  # Run with specific tools only")
-        print(f"  python run_agent.py --enabled_tools=web_search,web_extract --query='research topic'")
-        print(f"  # Run without terminal tools")
-        print(f"  python run_agent.py --disabled_tools=terminal --query='web research only'")
-        print(f"  # Run with trajectory saving enabled")
-        print(f"  python run_agent.py --save_trajectories --query='your question here'")
-        return
-    
-    # Parse tool selection arguments
-    enabled_tools_list = None
-    disabled_tools_list = None
-    enabled_toolsets_list = None
-    disabled_toolsets_list = None
-    
-    if enabled_tools:
-        enabled_tools_list = [t.strip() for t in enabled_tools.split(",")]
-        print(f"🎯 Enabled tools: {enabled_tools_list}")
-    
-    if disabled_tools:
-        disabled_tools_list = [t.strip() for t in disabled_tools.split(",")]
-        print(f"🚫 Disabled tools: {disabled_tools_list}")
-    
-    if enabled_toolsets:
-        enabled_toolsets_list = [t.strip() for t in enabled_toolsets.split(",")]
-        print(f"🎯 Enabled toolsets: {enabled_toolsets_list}")
-    
-    if disabled_toolsets:
-        disabled_toolsets_list = [t.strip() for t in disabled_toolsets.split(",")]
-        print(f"🚫 Disabled toolsets: {disabled_toolsets_list}")
-    
-    if save_trajectories:
-        print(f"💾 Trajectory saving: ENABLED")
-        print(f"   - Successful conversations → trajectory_samples.jsonl")
-        print(f"   - Failed conversations → failed_trajectories.jsonl")
-    
-    # Initialize agent with provided parameters
-    try:
-        agent = AIAgent(
-            base_url=base_url,
-            model=model,
-            api_key=api_key,
-            max_iterations=max_turns,
-            enabled_tools=enabled_tools_list,
-            disabled_tools=disabled_tools_list,
-            enabled_toolsets=enabled_toolsets_list,
-            disabled_toolsets=disabled_toolsets_list,
-            save_trajectories=save_trajectories
-        )
-    except RuntimeError as e:
-        print(f"❌ Failed to initialize agent: {e}")
-        return
-    
-    # Use provided query or default to Python 3.13 example
-    if query is None:
-        user_query = (
-            "Tell me about the latest developments in Python 3.13 and what new features "
-            "developers should know about. Please search for current information and try it out."
-        )
-    else:
-        user_query = query
-    
-    print(f"\n📝 User Query: {user_query}")
-    print("\n" + "=" * 50)
-    
-    # Run conversation
-    result = agent.run_conversation(user_query)
-    
-    print("\n" + "=" * 50)
-    print("📋 CONVERSATION SUMMARY")
-    print("=" * 50)
-    print(f"✅ Completed: {result['completed']}")
-    print(f"📞 API Calls: {result['api_calls']}")
-    print(f"💬 Messages: {len(result['messages'])}")
-    
-    if result['final_response']:
-        print(f"\n🎯 FINAL RESPONSE:")
-        print("-" * 30)
-        print(result['final_response'])
-    
-    print("\n👋 Agent execution completed!")
-
-
-if __name__ == "__main__":
-    fire.Fire(main)
+#!/usr/bin/env python3
+"""
+AI Agent Runner with Tool Calling
+
+This module provides a clean, standalone agent that can execute AI models
+with tool calling capabilities. It handles the conversation loop, tool execution,
+and response management.
+
+Features:
+- Automatic tool calling loop until completion
+- Configurable model parameters
+- Error handling and recovery
+- Message history management
+- Support for multiple model providers
+
+Usage:
+    from run_agent import AIAgent
+    
+    agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514")
+    response = agent.run_conversation("Tell me about the latest Python updates")
+"""
+
+import json
+
+import os
+import time
+from typing import List, Dict, Any, Optional
+from openai import OpenAI
+import fire
+from datetime import datetime
+
+# Import our tool system
+from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
+
+
+class AIAgent:
+    """
+    AI Agent with tool calling capabilities.
+    
+    This class manages the conversation flow, tool execution, and response handling
+    for AI models that support function calling.
+    """
+    
+    def __init__(
+        self, 
+        base_url: str = None, 
+        api_key: str = None, 
+        model: str = "gpt-4",
+        max_iterations: int = 10,
+        tool_delay: float = 1.0,
+        enabled_toolsets: List[str] = None,
+        disabled_toolsets: List[str] = None,
+        save_trajectories: bool = False
+    ):
+        """
+        Initialize the AI Agent.
+        
+        Args:
+            base_url (str): Base URL for the model API (optional)
+            api_key (str): API key for authentication (optional, uses env var if not provided)
+            model (str): Model name to use (default: "gpt-4")
+            max_iterations (int): Maximum number of tool calling iterations (default: 10)
+            tool_delay (float): Delay between tool calls in seconds (default: 1.0)
+            enabled_toolsets (List[str]): Only enable tools from these toolsets (optional)
+            disabled_toolsets (List[str]): Disable tools from these toolsets (optional)
+            save_trajectories (bool): Whether to save conversation trajectories to JSONL files (default: False)
+        """
+        self.model = model
+        self.max_iterations = max_iterations
+        self.tool_delay = tool_delay
+        self.save_trajectories = save_trajectories
+        
+        # Store toolset filtering options
+        self.enabled_toolsets = enabled_toolsets
+        self.disabled_toolsets = disabled_toolsets
+        
+        # Initialize OpenAI client
+        client_kwargs = {}
+        if base_url:
+            client_kwargs["base_url"] = base_url
+        if api_key:
+            client_kwargs["api_key"] = api_key
+        else:
+            client_kwargs["api_key"] = os.getenv("ANTHROPIC_API_KEY", "dummy-key")
+        
+        try:
+            self.client = OpenAI(**client_kwargs)
+            print(f"🤖 AI Agent initialized with model: {self.model}")
+            if base_url:
+                print(f"🔗 Using custom base URL: {base_url}")
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
+        
+        # Get available tools with filtering
+        self.tools = get_tool_definitions(
+            enabled_toolsets=enabled_toolsets,
+            disabled_toolsets=disabled_toolsets
+        )
+        
+        # Show tool configuration
+        if self.tools:
+            tool_names = [tool["function"]["name"] for tool in self.tools]
+            print(f"🛠️  Loaded {len(self.tools)} tools: {', '.join(tool_names)}")
+            
+            # Show filtering info if applied
+            if enabled_toolsets:
+                print(f"   ✅ Enabled toolsets: {', '.join(enabled_toolsets)}")
+            if disabled_toolsets:
+                print(f"   ❌ Disabled toolsets: {', '.join(disabled_toolsets)}")
+        else:
+            print("🛠️  No tools loaded (all tools filtered out or unavailable)")
+        
+        # Check tool requirements
+        if self.tools:
+            requirements = check_toolset_requirements()
+            missing_reqs = [name for name, available in requirements.items() if not available]
+            if missing_reqs:
+                print(f"⚠️  Some tools may not work due to missing requirements: {missing_reqs}")
+        
+        # Show trajectory saving status
+        if self.save_trajectories:
+            print("📝 Trajectory saving enabled")
+    
+    def _format_tools_for_system_message(self) -> str:
+        """
+        Format tool definitions for the system message in the trajectory format.
+        
+        Returns:
+            str: JSON string representation of tool definitions
+        """
+        if not self.tools:
+            return "[]"
+        
+        # Convert tool definitions to the format expected in trajectories
+        formatted_tools = []
+        for tool in self.tools:
+            func = tool["function"]
+            formatted_tool = {
+                "name": func["name"],
+                "description": func.get("description", ""),
+                "parameters": func.get("parameters", {}),
+                "required": None  # Match the format in the example
+            }
+            formatted_tools.append(formatted_tool)
+        
+        return json.dumps(formatted_tools)
+    
+    def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]:
+        """
+        Convert internal message format to trajectory format for saving.
+        
+        Args:
+            messages (List[Dict]): Internal message history
+            user_query (str): Original user query
+            completed (bool): Whether the conversation completed successfully
+            
+        Returns:
+            List[Dict]: Messages in trajectory format
+        """
+        trajectory = []
+        
+        # Add system message with tool definitions
+        system_msg = (
+            "You are a function calling AI model. You are provided with function signatures within <tools> </tools> XML tags. "
+            "You may call one or more functions to assist with the user query. If available tools are not relevant in assisting "
+            "with user query, just respond in natural conversational language. Don't make assumptions about what values to plug "
+            "into functions. After calling & executing the functions, you will be provided with function results within "
+            "<tool_response> </tool_response> XML tags. Here are the available tools:\n"
+            f"<tools>\n{self._format_tools_for_system_message()}\n</tools>\n"
+            "For each function call return a JSON object, with the following pydantic model json schema for each:\n"
+            "{'title': 'FunctionCall', 'type': 'object', 'properties': {'name': {'title': 'Name', 'type': 'string'}, "
+            "'arguments': {'title': 'Arguments', 'type': 'object'}}, 'required': ['name', 'arguments']}\n"
+            "Each function call should be enclosed within <tool_call> </tool_call> XML tags.\n"
+            "Example:\n<tool_call>\n{'name': <function-name>,'arguments': <args-dict>}\n</tool_call>"
+        )
+        
+        trajectory.append({
+            "from": "system",
+            "value": system_msg
+        })
+        
+        # Add the initial user message
+        trajectory.append({
+            "from": "human",
+            "value": user_query
+        })
+        
+        # Process remaining messages
+        i = 1  # Skip the first user message as we already added it
+        while i < len(messages):
+            msg = messages[i]
+            
+            if msg["role"] == "assistant":
+                # Check if this message has tool calls
+                if "tool_calls" in msg and msg["tool_calls"]:
+                    # Format assistant message with tool calls
+                    content = ""
+                    if msg.get("content") and msg["content"].strip():
+                        content = msg["content"] + "\n"
+                    
+                    # Add tool calls wrapped in XML tags
+                    for tool_call in msg["tool_calls"]:
+                        tool_call_json = {
+                            "name": tool_call["function"]["name"],
+                            "arguments": json.loads(tool_call["function"]["arguments"]) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"]
+                        }
+                        content += f"<tool_call>\n{json.dumps(tool_call_json)}\n</tool_call>\n"
+                    
+                    trajectory.append({
+                        "from": "gpt",
+                        "value": content.rstrip()
+                    })
+                    
+                    # Collect all subsequent tool responses
+                    tool_responses = []
+                    j = i + 1
+                    while j < len(messages) and messages[j]["role"] == "tool":
+                        tool_msg = messages[j]
+                        # Format tool response with XML tags
+                        tool_response = f"<tool_response>\n"
+                        
+                        # Try to parse tool content as JSON if it looks like JSON
+                        tool_content = tool_msg["content"]
+                        try:
+                            if tool_content.strip().startswith(("{", "[")):
+                                tool_content = json.loads(tool_content)
+                        except (json.JSONDecodeError, AttributeError):
+                            pass  # Keep as string if not valid JSON
+                        
+                        tool_response += json.dumps({
+                            "tool_call_id": tool_msg.get("tool_call_id", ""),
+                            "name": msg["tool_calls"][len(tool_responses)]["function"]["name"] if len(tool_responses) < len(msg["tool_calls"]) else "unknown",
+                            "content": tool_content
+                        })
+                        tool_response += "\n</tool_response>"
+                        tool_responses.append(tool_response)
+                        j += 1
+                    
+                    # Add all tool responses as a single message
+                    if tool_responses:
+                        trajectory.append({
+                            "from": "tool",
+                            "value": "\n".join(tool_responses)
+                        })
+                        i = j - 1  # Skip the tool messages we just processed
+                
+                else:
+                    # Regular assistant message without tool calls
+                    trajectory.append({
+                        "from": "gpt",
+                        "value": msg["content"] or ""
+                    })
+            
+            elif msg["role"] == "user":
+                trajectory.append({
+                    "from": "human",
+                    "value": msg["content"]
+                })
+            
+            i += 1
+        
+        return trajectory
+    
+    def _save_trajectory(self, messages: List[Dict[str, Any]], user_query: str, completed: bool):
+        """
+        Save conversation trajectory to JSONL file.
+        
+        Args:
+            messages (List[Dict]): Complete message history
+            user_query (str): Original user query
+            completed (bool): Whether the conversation completed successfully
+        """
+        if not self.save_trajectories:
+            return
+        
+        # Convert messages to trajectory format
+        trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
+        
+        # Determine which file to save to
+        filename = "trajectory_samples.jsonl" if completed else "failed_trajectories.jsonl"
+        
+        # Create trajectory entry
+        entry = {
+            "conversations": trajectory,
+            "timestamp": datetime.now().isoformat(),
+            "model": self.model,
+            "completed": completed
+        }
+        
+        # Append to JSONL file
+        try:
+            with open(filename, "a", encoding="utf-8") as f:
+                f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+            print(f"💾 Trajectory saved to {filename}")
+        except Exception as e:
+            print(f"⚠️ Failed to save trajectory: {e}")
+    
+    def run_conversation(
+        self, 
+        user_message: str, 
+        system_message: str = None, 
+        conversation_history: List[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        Run a complete conversation with tool calling until completion.
+        
+        Args:
+            user_message (str): The user's message/question
+            system_message (str): Custom system message (optional)
+            conversation_history (List[Dict]): Previous conversation messages (optional)
+            
+        Returns:
+            Dict: Complete conversation result with final response and message history
+        """
+        # Initialize conversation
+        messages = conversation_history or []
+        
+        # Add user message
+        messages.append({
+            "role": "user",
+            "content": user_message
+        })
+        
+        print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
+        
+        # Main conversation loop
+        api_call_count = 0
+        final_response = None
+        
+        while api_call_count < self.max_iterations:
+            api_call_count += 1
+            print(f"\n🔄 Making API call #{api_call_count}...")
+            
+            try:
+                # Make API call with tools
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=messages,
+                    tools=self.tools if self.tools else None
+                )
+                
+                assistant_message = response.choices[0].message
+                
+                # Handle assistant response
+                if assistant_message.content:
+                    print(f"🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
+                
+                # Check for tool calls
+                if assistant_message.tool_calls:
+                    print(f"🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
+                    
+                    # Add assistant message with tool calls to conversation
+                    messages.append({
+                        "role": "assistant",
+                        "content": assistant_message.content,
+                        "tool_calls": [
+                            {
+                                "id": tool_call.id,
+                                "type": tool_call.type,
+                                "function": {
+                                    "name": tool_call.function.name,
+                                    "arguments": tool_call.function.arguments
+                                }
+                            }
+                            for tool_call in assistant_message.tool_calls
+                        ]
+                    })
+                    
+                    # Execute each tool call
+                    for i, tool_call in enumerate(assistant_message.tool_calls, 1):
+                        function_name = tool_call.function.name
+                        
+                        try:
+                            function_args = json.loads(tool_call.function.arguments)
+                        except json.JSONDecodeError as e:
+                            print(f"❌ Invalid JSON in tool call arguments: {e}")
+                            function_args = {}
+                        
+                        print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
+                        
+                        # Execute the tool
+                        function_result = handle_function_call(function_name, function_args)
+                        
+                        # Add tool result to conversation
+                        messages.append({
+                            "role": "tool",
+                            "content": function_result,
+                            "tool_call_id": tool_call.id
+                        })
+                        
+                        print(f"  ✅ Tool {i} completed")
+                        
+                        # Delay between tool calls
+                        if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
+                            time.sleep(self.tool_delay)
+                    
+                    # Continue loop for next response
+                    continue
+                
+                else:
+                    # No tool calls - this is the final response
+                    final_response = assistant_message.content or ""
+                    
+                    # Add final assistant message
+                    messages.append({
+                        "role": "assistant", 
+                        "content": final_response
+                    })
+                    
+                    print(f"🎉 Conversation completed after {api_call_count} API call(s)")
+                    break
+                
+            except Exception as e:
+                error_msg = f"Error during API call #{api_call_count}: {str(e)}"
+                print(f"❌ {error_msg}")
+                
+                # Add error to conversation and try to continue
+                messages.append({
+                    "role": "assistant",
+                    "content": f"I encountered an error: {error_msg}. Let me try a different approach."
+                })
+                
+                # If we're near the limit, break to avoid infinite loops
+                if api_call_count >= self.max_iterations - 1:
+                    final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
+                    break
+        
+        # Handle max iterations reached
+        if api_call_count >= self.max_iterations:
+            print(f"⚠️  Reached maximum iterations ({self.max_iterations}). Stopping to prevent infinite loop.")
+            if final_response is None:
+                final_response = "I've reached the maximum number of iterations. Here's what I found so far."
+        
+        # Determine if conversation completed successfully
+        completed = final_response is not None and api_call_count < self.max_iterations
+        
+        # Save trajectory if enabled
+        self._save_trajectory(messages, user_message, completed)
+        
+        return {
+            "final_response": final_response,
+            "messages": messages,
+            "api_calls": api_call_count,
+            "completed": completed
+        }
+    
+    def chat(self, message: str) -> str:
+        """
+        Simple chat interface that returns just the final response.
+        
+        Args:
+            message (str): User message
+            
+        Returns:
+            str: Final assistant response
+        """
+        result = self.run_conversation(message)
+        return result["final_response"]
+
+
+def main(
+    query: str = None,
+    model: str = "claude-opus-4-20250514", 
+    api_key: str = None,
+    base_url: str = "https://api.anthropic.com/v1/",
+    max_turns: int = 10,
+    enabled_toolsets: str = None,
+    disabled_toolsets: str = None,
+    list_tools: bool = False,
+    save_trajectories: bool = False
+):
+    """
+    Main function for running the agent directly.
+    
+    Args:
+        query (str): Natural language query for the agent. Defaults to Python 3.13 example.
+        model (str): Model name to use. Defaults to claude-opus-4-20250514.
+        api_key (str): API key for authentication. Uses ANTHROPIC_API_KEY env var if not provided.
+        base_url (str): Base URL for the model API. Defaults to https://api.anthropic.com/v1/
+        max_turns (int): Maximum number of API call iterations. Defaults to 10.
+        enabled_toolsets (str): Comma-separated list of toolsets to enable. Supports predefined 
+                              toolsets (e.g., "research", "development", "safe"). 
+                              Multiple toolsets can be combined: "web,vision"
+        disabled_toolsets (str): Comma-separated list of toolsets to disable (e.g., "terminal")
+        list_tools (bool): Just list available tools and exit
+        save_trajectories (bool): Save conversation trajectories to JSONL files. Defaults to False.
+        
+    Toolset Examples:
+        - "research": Web search, extract, crawl + vision tools
+    """
+    print("🤖 AI Agent with Tool Calling")
+    print("=" * 50)
+    
+    # Handle tool listing
+    if list_tools:
+        from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets
+        from toolsets import get_all_toolsets, get_toolset_info
+        
+        print("📋 Available Tools & Toolsets:")
+        print("-" * 50)
+        
+        # Show new toolsets system
+        print("\n🎯 Predefined Toolsets (New System):")
+        print("-" * 40)
+        all_toolsets = get_all_toolsets()
+        
+        # Group by category
+        basic_toolsets = []
+        composite_toolsets = []
+        scenario_toolsets = []
+        
+        for name, toolset in all_toolsets.items():
+            info = get_toolset_info(name)
+            if info:
+                entry = (name, info)
+                if name in ["web", "terminal", "vision", "creative", "reasoning"]:
+                    basic_toolsets.append(entry)
+                elif name in ["research", "development", "analysis", "content_creation", "full_stack"]:
+                    composite_toolsets.append(entry)
+                else:
+                    scenario_toolsets.append(entry)
+        
+        # Print basic toolsets
+        print("\n📌 Basic Toolsets:")
+        for name, info in basic_toolsets:
+            tools_str = ', '.join(info['resolved_tools']) if info['resolved_tools'] else 'none'
+            print(f"  • {name:15} - {info['description']}")
+            print(f"    Tools: {tools_str}")
+        
+        # Print composite toolsets
+        print("\n📂 Composite Toolsets (built from other toolsets):")
+        for name, info in composite_toolsets:
+            includes_str = ', '.join(info['includes']) if info['includes'] else 'none'
+            print(f"  • {name:15} - {info['description']}")
+            print(f"    Includes: {includes_str}")
+            print(f"    Total tools: {info['tool_count']}")
+        
+        # Print scenario-specific toolsets
+        print("\n🎭 Scenario-Specific Toolsets:")
+        for name, info in scenario_toolsets:
+            print(f"  • {name:20} - {info['description']}")
+            print(f"    Total tools: {info['tool_count']}")
+        
+        
+        # Show legacy toolset compatibility
+        print("\n📦 Legacy Toolsets (for backward compatibility):")
+        legacy_toolsets = get_available_toolsets()
+        for name, info in legacy_toolsets.items():
+            status = "✅" if info["available"] else "❌"
+            print(f"  {status} {name}: {info['description']}")
+            if not info["available"]:
+                print(f"    Requirements: {', '.join(info['requirements'])}")
+        
+        # Show individual tools
+        all_tools = get_all_tool_names()
+        print(f"\n🔧 Individual Tools ({len(all_tools)} available):")
+        for tool_name in sorted(all_tools):
+            toolset = get_toolset_for_tool(tool_name)
+            print(f"  📌 {tool_name} (from {toolset})")
+        
+        print(f"\n💡 Usage Examples:")
+        print(f"  # Use predefined toolsets")
+        print(f"  python run_agent.py --enabled_toolsets=research --query='search for Python news'")
+        print(f"  python run_agent.py --enabled_toolsets=development --query='debug this code'")
+        print(f"  python run_agent.py --enabled_toolsets=safe --query='analyze without terminal'")
+        print(f"  ")
+        print(f"  # Combine multiple toolsets")
+        print(f"  python run_agent.py --enabled_toolsets=web,vision --query='analyze website'")
+        print(f"  ")
+        print(f"  # Disable toolsets")
+        print(f"  python run_agent.py --disabled_toolsets=terminal --query='no command execution'")
+        print(f"  ")
+        print(f"  # Run with trajectory saving enabled")
+        print(f"  python run_agent.py --save_trajectories --query='your question here'")
+        return
+    
+    # Parse toolset selection arguments
+    enabled_toolsets_list = None
+    disabled_toolsets_list = None
+    
+    if enabled_toolsets:
+        enabled_toolsets_list = [t.strip() for t in enabled_toolsets.split(",")]
+        print(f"🎯 Enabled toolsets: {enabled_toolsets_list}")
+    
+    if disabled_toolsets:
+        disabled_toolsets_list = [t.strip() for t in disabled_toolsets.split(",")]
+        print(f"🚫 Disabled toolsets: {disabled_toolsets_list}")
+    
+    if save_trajectories:
+        print(f"💾 Trajectory saving: ENABLED")
+        print(f"   - Successful conversations → trajectory_samples.jsonl")
+        print(f"   - Failed conversations → failed_trajectories.jsonl")
+    
+    # Initialize agent with provided parameters
+    try:
+        agent = AIAgent(
+            base_url=base_url,
+            model=model,
+            api_key=api_key,
+            max_iterations=max_turns,
+            enabled_toolsets=enabled_toolsets_list,
+            disabled_toolsets=disabled_toolsets_list,
+            save_trajectories=save_trajectories
+        )
+    except RuntimeError as e:
+        print(f"❌ Failed to initialize agent: {e}")
+        return
+    
+    # Use provided query or default to Python 3.13 example
+    if query is None:
+        user_query = (
+            "Tell me about the latest developments in Python 3.13 and what new features "
+            "developers should know about. Please search for current information and try it out."
+        )
+    else:
+        user_query = query
+    
+    print(f"\n📝 User Query: {user_query}")
+    print("\n" + "=" * 50)
+    
+    # Run conversation
+    result = agent.run_conversation(user_query)
+    
+    print("\n" + "=" * 50)
+    print("📋 CONVERSATION SUMMARY")
+    print("=" * 50)
+    print(f"✅ Completed: {result['completed']}")
+    print(f"📞 API Calls: {result['api_calls']}")
+    print(f"💬 Messages: {len(result['messages'])}")
+    
+    if result['final_response']:
+        print(f"\n🎯 FINAL RESPONSE:")
+        print("-" * 30)
+        print(result['final_response'])
+    
+    print("\n👋 Agent execution completed!")
+
+
+if __name__ == "__main__":
+    fire.Fire(main)
diff --git a/test_run.sh b/test_run.sh
index 54856eeb0..ff4ffc3c2 100644
--- a/test_run.sh
+++ b/test_run.sh
@@ -17,14 +17,14 @@ export WEB_TOOLS_DEBUG=true
 python run_agent.py \
   --query "$PROMPT" \
   --max_turns 30 \
-#  --model claude-sonnet-4-20250514 \
-#  --base_url https://api.anthropic.com/v1/ \
-  --model hermes-4-70B \
-  --base_url http://bore.pub:8292/v1 \
+  --model claude-sonnet-4-20250514 \
+  --base_url https://api.anthropic.com/v1/ \
   --api_key $ANTHROPIC_API_KEY \
-  --save_trajectories
-  #--enabled_toolsets=vision_tools
-
+  --save_trajectories \
+  --enabled_toolsets=web
+  
+#  --model claude-sonnet-4-20250514 \
+#  
 #Possible Toolsets:
 #web_tools
 #vision_tools
diff --git a/toolsets.py b/toolsets.py
new file mode 100644
index 000000000..4ed474dae
--- /dev/null
+++ b/toolsets.py
@@ -0,0 +1,326 @@
+#!/usr/bin/env python3
+"""
+Toolsets Module
+
+This module provides a flexible system for defining and managing tool aliases/toolsets.
+Toolsets allow you to group tools together for specific scenarios and can be composed
+from individual tools or other toolsets.
+
+Features:
+- Define custom toolsets with specific tools
+- Compose toolsets from other toolsets
+- Built-in common toolsets for typical use cases
+- Easy extension for new toolsets
+- Support for dynamic toolset resolution
+
+Usage:
+    from toolsets import get_toolset, resolve_toolset, get_all_toolsets
+    
+    # Get tools for a specific toolset
+    tools = get_toolset("research")
+    
+    # Resolve a toolset to get all tool names (including from composed toolsets)
+    all_tools = resolve_toolset("full_stack")
+"""
+
+from typing import List, Dict, Any, Set, Optional
+import json
+
+
+# Core toolset definitions
+# These can include individual tools or reference other toolsets
+TOOLSETS = {
+    # Basic toolsets - individual tool categories
+    "web": {
+        "description": "Web research and content extraction tools",
+        "tools": ["web_search", "web_extract", "web_crawl"],
+        "includes": []  # No other toolsets included
+    },
+    
+    "vision": {
+        "description": "Image analysis and vision tools",
+        "tools": ["vision_analyze"],
+        "includes": []
+    },
+    
+    "image_gen": {
+        "description": "Creative generation tools (images)",
+        "tools": ["image_generate"],
+        "includes": []
+    },
+    
+    "terminal": {
+        "description": "Terminal/command execution tools",
+        "tools": ["terminal"],
+        "includes": []
+    },
+    
+    "moa": {
+        "description": "Advanced reasoning and problem-solving tools",
+        "tools": ["mixture_of_agents"],
+        "includes": []
+    },
+    
+    # Scenario-specific toolsets
+    
+    "debugging": {
+        "description": "Debugging and troubleshooting toolkit",
+        "tools": ["terminal"],
+        "includes": ["web"]  # For searching error messages and solutions
+    },
+    
+    "safe": {
+        "description": "Safe toolkit without terminal access",
+        "tools": ["mixture_of_agents"],
+        "includes": ["web", "vision", "creative"]
+    }
+}
+
+
+
+def get_toolset(name: str) -> Optional[Dict[str, Any]]:
+    """
+    Get a toolset definition by name.
+    
+    Args:
+        name (str): Name of the toolset
+        
+    Returns:
+        Dict: Toolset definition with description, tools, and includes
+        None: If toolset not found
+    """
+    # Return toolset definition
+    return TOOLSETS.get(name)
+
+
+def resolve_toolset(name: str, visited: Set[str] = None) -> List[str]:
+    """
+    Recursively resolve a toolset to get all tool names.
+    
+    This function handles toolset composition by recursively resolving
+    included toolsets and combining all tools.
+    
+    Args:
+        name (str): Name of the toolset to resolve
+        visited (Set[str]): Set of already visited toolsets (for cycle detection)
+        
+    Returns:
+        List[str]: List of all tool names in the toolset
+    """
+    if visited is None:
+        visited = set()
+    
+    # Check for cycles
+    if name in visited:
+        print(f"⚠️  Circular dependency detected in toolset '{name}'")
+        return []
+    
+    visited.add(name)
+    
+    # Get toolset definition
+    toolset = TOOLSETS.get(name)
+    if not toolset:
+        return []
+    
+    # Collect direct tools
+    tools = set(toolset.get("tools", []))
+    
+    # Recursively resolve included toolsets
+    for included_name in toolset.get("includes", []):
+        included_tools = resolve_toolset(included_name, visited.copy())
+        tools.update(included_tools)
+    
+    return list(tools)
+
+
+def resolve_multiple_toolsets(toolset_names: List[str]) -> List[str]:
+    """
+    Resolve multiple toolsets and combine their tools.
+    
+    Args:
+        toolset_names (List[str]): List of toolset names to resolve
+        
+    Returns:
+        List[str]: Combined list of all tool names (deduplicated)
+    """
+    all_tools = set()
+    
+    for name in toolset_names:
+        tools = resolve_toolset(name)
+        all_tools.update(tools)
+    
+    return list(all_tools)
+
+
+def get_all_toolsets() -> Dict[str, Dict[str, Any]]:
+    """
+    Get all available toolsets with their definitions.
+    
+    Returns:
+        Dict: All toolset definitions
+    """
+    return TOOLSETS.copy()
+
+
+def get_toolset_names() -> List[str]:
+    """
+    Get names of all available toolsets (excluding aliases).
+    
+    Returns:
+        List[str]: List of toolset names
+    """
+    return list(TOOLSETS.keys())
+
+
+
+
+def validate_toolset(name: str) -> bool:
+    """
+    Check if a toolset name is valid.
+    
+    Args:
+        name (str): Toolset name to validate
+        
+    Returns:
+        bool: True if valid, False otherwise
+    """
+    return name in TOOLSETS
+
+
+def create_custom_toolset(
+    name: str,
+    description: str,
+    tools: List[str] = None,
+    includes: List[str] = None
+) -> None:
+    """
+    Create a custom toolset at runtime.
+    
+    Args:
+        name (str): Name for the new toolset
+        description (str): Description of the toolset
+        tools (List[str]): Direct tools to include
+        includes (List[str]): Other toolsets to include
+    """
+    TOOLSETS[name] = {
+        "description": description,
+        "tools": tools or [],
+        "includes": includes or []
+    }
+
+
+
+
+def get_toolset_info(name: str) -> Dict[str, Any]:
+    """
+    Get detailed information about a toolset including resolved tools.
+    
+    Args:
+        name (str): Toolset name
+        
+    Returns:
+        Dict: Detailed toolset information
+    """
+    toolset = get_toolset(name)
+    if not toolset:
+        return None
+    
+    resolved_tools = resolve_toolset(name)
+    
+    return {
+        "name": name,
+        "description": toolset["description"],
+        "direct_tools": toolset["tools"],
+        "includes": toolset["includes"],
+        "resolved_tools": resolved_tools,
+        "tool_count": len(resolved_tools),
+        "is_composite": len(toolset["includes"]) > 0
+    }
+
+
+def print_toolset_tree(name: str, indent: int = 0) -> None:
+    """
+    Print a tree view of a toolset and its composition.
+    
+    Args:
+        name (str): Toolset name
+        indent (int): Current indentation level
+    """
+    prefix = "  " * indent
+    toolset = get_toolset(name)
+    
+    if not toolset:
+        print(f"{prefix}❌ Unknown toolset: {name}")
+        return
+    
+    # Print toolset name and description
+    print(f"{prefix}📦 {name}: {toolset['description']}")
+    
+    # Print direct tools
+    if toolset["tools"]:
+        print(f"{prefix}  🔧 Tools: {', '.join(toolset['tools'])}")
+    
+    # Print included toolsets
+    if toolset["includes"]:
+        print(f"{prefix}  📂 Includes:")
+        for included in toolset["includes"]:
+            print_toolset_tree(included, indent + 2)
+
+
+if __name__ == "__main__":
+    """
+    Demo and testing of the toolsets system
+    """
+    print("🎯 Toolsets System Demo")
+    print("=" * 60)
+    
+    # Show all available toolsets
+    print("\n📦 Available Toolsets:")
+    print("-" * 40)
+    for name, toolset in get_all_toolsets().items():
+        info = get_toolset_info(name)
+        composite = "📂" if info["is_composite"] else "🔧"
+        print(f"{composite} {name:20} - {toolset['description']}")
+        print(f"   Tools: {len(info['resolved_tools'])} total")
+    
+    
+    # Demo toolset resolution
+    print("\n🔍 Toolset Resolution Examples:")
+    print("-" * 40)
+    
+    examples = ["research", "development", "full_stack", "minimal", "safe"]
+    for name in examples:
+        tools = resolve_toolset(name)
+        print(f"\n{name}:")
+        print(f"  Resolved to {len(tools)} tools: {', '.join(sorted(tools))}")
+    
+    # Show toolset composition tree
+    print("\n🌳 Toolset Composition Tree:")
+    print("-" * 40)
+    print("\nExample: 'content_creation' toolset:")
+    print_toolset_tree("content_creation")
+    
+    print("\nExample: 'full_stack' toolset:")
+    print_toolset_tree("full_stack")
+    
+    # Demo multiple toolset resolution
+    print("\n🔗 Multiple Toolset Resolution:")
+    print("-" * 40)
+    combined = resolve_multiple_toolsets(["minimal", "vision", "reasoning"])
+    print(f"Combining ['minimal', 'vision', 'reasoning']:")
+    print(f"  Result: {', '.join(sorted(combined))}")
+    
+    # Demo custom toolset creation
+    print("\n➕ Custom Toolset Creation:")
+    print("-" * 40)
+    create_custom_toolset(
+        name="my_custom",
+        description="My custom toolset for specific tasks",
+        tools=["web_search"],
+        includes=["terminal", "vision"]
+    )
+    
+    custom_info = get_toolset_info("my_custom")
+    print(f"Created 'my_custom' toolset:")
+    print(f"  Description: {custom_info['description']}")
+    print(f"  Resolved tools: {', '.join(custom_info['resolved_tools'])}")