initital commit
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
/venv/
|
||||||
|
/_pycache/
|
||||||
BIN
__pycache__/model_tools.cpython-310.pyc
Normal file
BIN
__pycache__/model_tools.cpython-310.pyc
Normal file
Binary file not shown.
BIN
__pycache__/web_tools.cpython-310.pyc
Normal file
BIN
__pycache__/web_tools.cpython-310.pyc
Normal file
Binary file not shown.
272
model_tools.py
Normal file
272
model_tools.py
Normal file
@@ -0,0 +1,272 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Model Tools Module
|
||||||
|
|
||||||
|
This module constructs tool schemas and handlers for AI model API calls.
|
||||||
|
It imports tools from various toolset modules and provides a unified interface
|
||||||
|
for defining tools and executing function calls.
|
||||||
|
|
||||||
|
Currently supports:
|
||||||
|
- Web tools (search, extract, crawl) from web_tools.py
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from model_tools import get_tool_definitions, handle_function_call
|
||||||
|
|
||||||
|
# Get tool definitions for model API
|
||||||
|
tools = get_tool_definitions()
|
||||||
|
|
||||||
|
# Handle function calls from model
|
||||||
|
result = handle_function_call("web_search_tool", {"query": "Python", "limit": 3})
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
|
# Import toolsets
|
||||||
|
from web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_tavily_api_key
|
||||||
|
|
||||||
|
def get_web_tool_definitions() -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Get tool definitions for web tools in OpenAI's expected format.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[Dict]: List of web tool definitions compatible with OpenAI API
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "web_search_tool",
|
||||||
|
"description": "Search the web for information on any topic. Returns relevant results with titles, URLs, content snippets, and answers. Uses advanced search depth for comprehensive results.",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"query": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The search query to look up on the web"
|
||||||
|
},
|
||||||
|
"limit": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Maximum number of results to return (default: 5, max: 10)",
|
||||||
|
"default": 5,
|
||||||
|
"minimum": 1,
|
||||||
|
"maximum": 10
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["query"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "web_extract_tool",
|
||||||
|
"description": "Extract and read the full content from specific web page URLs. Useful for getting detailed information from webpages found through search.",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"urls": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {"type": "string"},
|
||||||
|
"description": "List of URLs to extract content from (max 5 URLs per call)",
|
||||||
|
"maxItems": 5
|
||||||
|
},
|
||||||
|
"format": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["markdown", "html"],
|
||||||
|
"description": "Desired output format for extracted content (optional)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["urls"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "web_crawl_tool",
|
||||||
|
"description": "Crawl a website with specific instructions to find and extract targeted content. Uses AI to intelligently navigate and extract relevant information from across the site.",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"url": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The base URL to crawl (can include or exclude https://)"
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Specific instructions for what to crawl/extract using AI intelligence (e.g., 'Find pricing information', 'Get documentation pages', 'Extract contact details')"
|
||||||
|
},
|
||||||
|
"depth": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["basic", "advanced"],
|
||||||
|
"description": "Depth of extraction - 'basic' for surface content, 'advanced' for deeper analysis (default: basic)",
|
||||||
|
"default": "basic"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["url"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_tool_definitions() -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Get all available tool definitions for model API calls.
|
||||||
|
|
||||||
|
This function aggregates tool definitions from all available toolsets.
|
||||||
|
Currently includes web tools, but can be extended to include other toolsets.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[Dict]: Complete list of all available tool definitions
|
||||||
|
"""
|
||||||
|
tools = []
|
||||||
|
|
||||||
|
# Add web tools
|
||||||
|
tools.extend(get_web_tool_definitions())
|
||||||
|
|
||||||
|
# Future toolsets can be added here:
|
||||||
|
# tools.extend(get_file_tool_definitions())
|
||||||
|
# tools.extend(get_code_tool_definitions())
|
||||||
|
# tools.extend(get_database_tool_definitions())
|
||||||
|
|
||||||
|
return tools
|
||||||
|
|
||||||
|
def handle_web_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Handle function calls for web tools.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
function_name (str): Name of the web function to call
|
||||||
|
function_args (Dict): Arguments for the function
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Function result as JSON string
|
||||||
|
"""
|
||||||
|
if function_name == "web_search_tool":
|
||||||
|
query = function_args.get("query", "")
|
||||||
|
limit = function_args.get("limit", 5)
|
||||||
|
# Ensure limit is within bounds
|
||||||
|
limit = max(1, min(10, limit))
|
||||||
|
return web_search_tool(query, limit)
|
||||||
|
|
||||||
|
elif function_name == "web_extract_tool":
|
||||||
|
urls = function_args.get("urls", [])
|
||||||
|
# Limit URLs to prevent abuse
|
||||||
|
urls = urls[:5] if isinstance(urls, list) else []
|
||||||
|
format = function_args.get("format")
|
||||||
|
return web_extract_tool(urls, format)
|
||||||
|
|
||||||
|
elif function_name == "web_crawl_tool":
|
||||||
|
url = function_args.get("url", "")
|
||||||
|
instructions = function_args.get("instructions")
|
||||||
|
depth = function_args.get("depth", "basic")
|
||||||
|
return web_crawl_tool(url, instructions, depth)
|
||||||
|
|
||||||
|
else:
|
||||||
|
return json.dumps({"error": f"Unknown web function: {function_name}"})
|
||||||
|
|
||||||
|
def handle_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Main function call dispatcher that routes calls to appropriate toolsets.
|
||||||
|
|
||||||
|
This function determines which toolset a function belongs to and dispatches
|
||||||
|
the call to the appropriate handler. This makes it easy to add new toolsets
|
||||||
|
without changing the main calling interface.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
function_name (str): Name of the function to call
|
||||||
|
function_args (Dict): Arguments for the function
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Function result as JSON string
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
None: Returns error as JSON string instead of raising exceptions
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Route web tools
|
||||||
|
if function_name in ["web_search_tool", "web_extract_tool", "web_crawl_tool"]:
|
||||||
|
return handle_web_function_call(function_name, function_args)
|
||||||
|
|
||||||
|
# Future toolsets can be routed here:
|
||||||
|
# elif function_name in ["file_read_tool", "file_write_tool"]:
|
||||||
|
# return handle_file_function_call(function_name, function_args)
|
||||||
|
# elif function_name in ["code_execute_tool", "code_analyze_tool"]:
|
||||||
|
# return handle_code_function_call(function_name, function_args)
|
||||||
|
|
||||||
|
else:
|
||||||
|
error_msg = f"Unknown function: {function_name}"
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
return json.dumps({"error": error_msg})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Error executing {function_name}: {str(e)}"
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
return json.dumps({"error": error_msg})
|
||||||
|
|
||||||
|
def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Get information about all available toolsets and their status.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict: Information about each toolset including availability and tools
|
||||||
|
"""
|
||||||
|
toolsets = {
|
||||||
|
"web_tools": {
|
||||||
|
"available": check_tavily_api_key(),
|
||||||
|
"tools": ["web_search_tool", "web_extract_tool", "web_crawl_tool"],
|
||||||
|
"description": "Web search, content extraction, and website crawling tools",
|
||||||
|
"requirements": ["TAVILY_API_KEY environment variable"]
|
||||||
|
}
|
||||||
|
# Future toolsets can be added here
|
||||||
|
}
|
||||||
|
|
||||||
|
return toolsets
|
||||||
|
|
||||||
|
def check_toolset_requirements() -> Dict[str, bool]:
|
||||||
|
"""
|
||||||
|
Check if all requirements for available toolsets are met.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict: Status of each toolset's requirements
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"web_tools": check_tavily_api_key()
|
||||||
|
}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
"""
|
||||||
|
Simple test/demo when run directly
|
||||||
|
"""
|
||||||
|
print("🛠️ Model Tools Module")
|
||||||
|
print("=" * 40)
|
||||||
|
|
||||||
|
# Check toolset requirements
|
||||||
|
requirements = check_toolset_requirements()
|
||||||
|
print("📋 Toolset Requirements:")
|
||||||
|
for toolset, available in requirements.items():
|
||||||
|
status = "✅" if available else "❌"
|
||||||
|
print(f" {status} {toolset}: {'Available' if available else 'Missing requirements'}")
|
||||||
|
|
||||||
|
# Show available tools
|
||||||
|
tools = get_tool_definitions()
|
||||||
|
print(f"\n🔧 Available Tools ({len(tools)} total):")
|
||||||
|
for tool in tools:
|
||||||
|
func_name = tool["function"]["name"]
|
||||||
|
desc = tool["function"]["description"]
|
||||||
|
print(f" 📌 {func_name}: {desc[:80]}{'...' if len(desc) > 80 else ''}")
|
||||||
|
|
||||||
|
# Show toolset info
|
||||||
|
toolsets = get_available_toolsets()
|
||||||
|
print(f"\n📦 Toolset Information:")
|
||||||
|
for name, info in toolsets.items():
|
||||||
|
status = "✅" if info["available"] else "❌"
|
||||||
|
print(f" {status} {name}: {info['description']}")
|
||||||
|
if not info["available"]:
|
||||||
|
print(f" Requirements: {', '.join(info['requirements'])}")
|
||||||
|
|
||||||
|
print("\n💡 Usage Example:")
|
||||||
|
print(" from model_tools import get_tool_definitions, handle_function_call")
|
||||||
|
print(" tools = get_tool_definitions()")
|
||||||
|
print(" result = handle_function_call('web_search_tool', {'query': 'Python'})")
|
||||||
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
tavily-python
|
||||||
|
openai
|
||||||
324
run_agent.py
Normal file
324
run_agent.py
Normal file
@@ -0,0 +1,324 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
AI Agent Runner with Tool Calling
|
||||||
|
|
||||||
|
This module provides a clean, standalone agent that can execute AI models
|
||||||
|
with tool calling capabilities. It handles the conversation loop, tool execution,
|
||||||
|
and response management.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Automatic tool calling loop until completion
|
||||||
|
- Configurable model parameters
|
||||||
|
- Error handling and recovery
|
||||||
|
- Message history management
|
||||||
|
- Support for multiple model providers
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from run_agent import AIAgent
|
||||||
|
|
||||||
|
agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514")
|
||||||
|
response = agent.run_conversation("Tell me about the latest Python updates")
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Import our tool system
|
||||||
|
from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
|
||||||
|
|
||||||
|
|
||||||
|
class AIAgent:
|
||||||
|
"""
|
||||||
|
AI Agent with tool calling capabilities.
|
||||||
|
|
||||||
|
This class manages the conversation flow, tool execution, and response handling
|
||||||
|
for AI models that support function calling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
base_url: str = None,
|
||||||
|
api_key: str = None,
|
||||||
|
model: str = "gpt-4",
|
||||||
|
max_iterations: int = 10,
|
||||||
|
tool_delay: float = 1.0
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize the AI Agent.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
base_url (str): Base URL for the model API (optional)
|
||||||
|
api_key (str): API key for authentication (optional, uses env var if not provided)
|
||||||
|
model (str): Model name to use (default: "gpt-4")
|
||||||
|
max_iterations (int): Maximum number of tool calling iterations (default: 10)
|
||||||
|
tool_delay (float): Delay between tool calls in seconds (default: 1.0)
|
||||||
|
"""
|
||||||
|
self.model = model
|
||||||
|
self.max_iterations = max_iterations
|
||||||
|
self.tool_delay = tool_delay
|
||||||
|
|
||||||
|
# Initialize OpenAI client
|
||||||
|
client_kwargs = {}
|
||||||
|
if base_url:
|
||||||
|
client_kwargs["base_url"] = base_url
|
||||||
|
if api_key:
|
||||||
|
client_kwargs["api_key"] = api_key
|
||||||
|
else:
|
||||||
|
client_kwargs["api_key"] = os.getenv("OPENAI_API_KEY", "dummy-key")
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.client = OpenAI(**client_kwargs)
|
||||||
|
print(f"🤖 AI Agent initialized with model: {self.model}")
|
||||||
|
if base_url:
|
||||||
|
print(f"🔗 Using custom base URL: {base_url}")
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
|
||||||
|
|
||||||
|
# Get available tools
|
||||||
|
self.tools = get_tool_definitions()
|
||||||
|
print(f"🛠️ Loaded {len(self.tools)} tools")
|
||||||
|
|
||||||
|
# Check tool requirements
|
||||||
|
requirements = check_toolset_requirements()
|
||||||
|
missing_reqs = [name for name, available in requirements.items() if not available]
|
||||||
|
if missing_reqs:
|
||||||
|
print(f"⚠️ Some tools may not work due to missing requirements: {missing_reqs}")
|
||||||
|
|
||||||
|
def create_system_message(self, custom_system: str = None) -> str:
|
||||||
|
"""
|
||||||
|
Create the system message for the agent.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
custom_system (str): Custom system message (optional)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: System message content
|
||||||
|
"""
|
||||||
|
if custom_system:
|
||||||
|
return custom_system
|
||||||
|
|
||||||
|
return (
|
||||||
|
"You are an AI assistant that provides helpful responses. You may use extremely long chains of thought "
|
||||||
|
"to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help "
|
||||||
|
"come to a correct solution prior to answering. You should enclose your thoughts and internal monologue "
|
||||||
|
"inside <thinking> tags.\n\n"
|
||||||
|
"You are equipped with web research tools that allow you to search the web, extract content from web pages, "
|
||||||
|
"and crawl websites. Use these tools to gather current information and provide accurate, well-researched responses. "
|
||||||
|
"You can call multiple tools in parallel if they are not reliant on each other's results. You can also use "
|
||||||
|
"sequential tool calls to build on data you've collected from previous tool calls. Continue using tools until "
|
||||||
|
"you feel confident you have enough information to provide a comprehensive answer."
|
||||||
|
)
|
||||||
|
|
||||||
|
def run_conversation(
|
||||||
|
self,
|
||||||
|
user_message: str,
|
||||||
|
system_message: str = None,
|
||||||
|
conversation_history: List[Dict[str, Any]] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Run a complete conversation with tool calling until completion.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_message (str): The user's message/question
|
||||||
|
system_message (str): Custom system message (optional)
|
||||||
|
conversation_history (List[Dict]): Previous conversation messages (optional)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict: Complete conversation result with final response and message history
|
||||||
|
"""
|
||||||
|
# Initialize conversation
|
||||||
|
messages = conversation_history or []
|
||||||
|
|
||||||
|
# Add system message if not already present
|
||||||
|
if not messages or messages[0]["role"] != "system":
|
||||||
|
messages.insert(0, {
|
||||||
|
"role": "system",
|
||||||
|
"content": self.create_system_message(system_message)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Add user message
|
||||||
|
messages.append({
|
||||||
|
"role": "user",
|
||||||
|
"content": user_message
|
||||||
|
})
|
||||||
|
|
||||||
|
print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
|
||||||
|
|
||||||
|
# Main conversation loop
|
||||||
|
api_call_count = 0
|
||||||
|
final_response = None
|
||||||
|
|
||||||
|
while api_call_count < self.max_iterations:
|
||||||
|
api_call_count += 1
|
||||||
|
print(f"\n🔄 Making API call #{api_call_count}...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Make API call with tools
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=messages,
|
||||||
|
tools=self.tools if self.tools else None
|
||||||
|
)
|
||||||
|
|
||||||
|
assistant_message = response.choices[0].message
|
||||||
|
|
||||||
|
# Handle assistant response
|
||||||
|
if assistant_message.content:
|
||||||
|
print(f"🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
|
||||||
|
|
||||||
|
# Check for tool calls
|
||||||
|
if assistant_message.tool_calls:
|
||||||
|
print(f"🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
|
||||||
|
|
||||||
|
# Add assistant message with tool calls to conversation
|
||||||
|
messages.append({
|
||||||
|
"role": "assistant",
|
||||||
|
"content": assistant_message.content,
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": tool_call.id,
|
||||||
|
"type": tool_call.type,
|
||||||
|
"function": {
|
||||||
|
"name": tool_call.function.name,
|
||||||
|
"arguments": tool_call.function.arguments
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for tool_call in assistant_message.tool_calls
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
# Execute each tool call
|
||||||
|
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
|
||||||
|
function_name = tool_call.function.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
function_args = json.loads(tool_call.function.arguments)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f"❌ Invalid JSON in tool call arguments: {e}")
|
||||||
|
function_args = {}
|
||||||
|
|
||||||
|
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})")
|
||||||
|
|
||||||
|
# Execute the tool
|
||||||
|
function_result = handle_function_call(function_name, function_args)
|
||||||
|
|
||||||
|
# Add tool result to conversation
|
||||||
|
messages.append({
|
||||||
|
"role": "tool",
|
||||||
|
"content": function_result,
|
||||||
|
"tool_call_id": tool_call.id
|
||||||
|
})
|
||||||
|
|
||||||
|
print(f" ✅ Tool {i} completed")
|
||||||
|
|
||||||
|
# Delay between tool calls
|
||||||
|
if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
||||||
|
time.sleep(self.tool_delay)
|
||||||
|
|
||||||
|
# Continue loop for next response
|
||||||
|
continue
|
||||||
|
|
||||||
|
else:
|
||||||
|
# No tool calls - this is the final response
|
||||||
|
final_response = assistant_message.content or ""
|
||||||
|
|
||||||
|
# Add final assistant message
|
||||||
|
messages.append({
|
||||||
|
"role": "assistant",
|
||||||
|
"content": final_response
|
||||||
|
})
|
||||||
|
|
||||||
|
print(f"🎉 Conversation completed after {api_call_count} API call(s)")
|
||||||
|
break
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Error during API call #{api_call_count}: {str(e)}"
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
|
||||||
|
# Add error to conversation and try to continue
|
||||||
|
messages.append({
|
||||||
|
"role": "assistant",
|
||||||
|
"content": f"I encountered an error: {error_msg}. Let me try a different approach."
|
||||||
|
})
|
||||||
|
|
||||||
|
# If we're near the limit, break to avoid infinite loops
|
||||||
|
if api_call_count >= self.max_iterations - 1:
|
||||||
|
final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
|
||||||
|
break
|
||||||
|
|
||||||
|
# Handle max iterations reached
|
||||||
|
if api_call_count >= self.max_iterations:
|
||||||
|
print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Stopping to prevent infinite loop.")
|
||||||
|
if final_response is None:
|
||||||
|
final_response = "I've reached the maximum number of iterations. Here's what I found so far."
|
||||||
|
|
||||||
|
return {
|
||||||
|
"final_response": final_response,
|
||||||
|
"messages": messages,
|
||||||
|
"api_calls": api_call_count,
|
||||||
|
"completed": final_response is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
def chat(self, message: str) -> str:
|
||||||
|
"""
|
||||||
|
Simple chat interface that returns just the final response.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message (str): User message
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Final assistant response
|
||||||
|
"""
|
||||||
|
result = self.run_conversation(message)
|
||||||
|
return result["final_response"]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
Main function for running the agent directly.
|
||||||
|
"""
|
||||||
|
print("🤖 AI Agent with Tool Calling")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Initialize agent with local SGLang server (modify as needed)
|
||||||
|
try:
|
||||||
|
agent = AIAgent(
|
||||||
|
base_url="https://api.anthropic.com/v1/",
|
||||||
|
model="claude-opus-4-20250514"
|
||||||
|
)
|
||||||
|
except RuntimeError as e:
|
||||||
|
print(f"❌ Failed to initialize agent: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Example conversation
|
||||||
|
user_query = (
|
||||||
|
"Tell me about the latest developments in Python 3.12 and what new features "
|
||||||
|
"developers should know about. Please search for current information."
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n📝 User Query: {user_query}")
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
|
||||||
|
# Run conversation
|
||||||
|
result = agent.run_conversation(user_query)
|
||||||
|
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("📋 CONVERSATION SUMMARY")
|
||||||
|
print("=" * 50)
|
||||||
|
print(f"✅ Completed: {result['completed']}")
|
||||||
|
print(f"📞 API Calls: {result['api_calls']}")
|
||||||
|
print(f"💬 Messages: {len(result['messages'])}")
|
||||||
|
|
||||||
|
if result['final_response']:
|
||||||
|
print(f"\n🎯 FINAL RESPONSE:")
|
||||||
|
print("-" * 30)
|
||||||
|
print(result['final_response'])
|
||||||
|
|
||||||
|
print("\n👋 Agent execution completed!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
0
terminal_tool.py
Normal file
0
terminal_tool.py
Normal file
265
web_tools.py
Normal file
265
web_tools.py
Normal file
@@ -0,0 +1,265 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Standalone Web Tools Module
|
||||||
|
|
||||||
|
This module provides generic web tools that work with multiple backend providers.
|
||||||
|
Currently uses Tavily as the backend, but the interface makes it easy to swap
|
||||||
|
to other providers like Firecrawl without changing the function signatures.
|
||||||
|
|
||||||
|
Available tools:
|
||||||
|
- web_search_tool: Search the web for information
|
||||||
|
- web_extract_tool: Extract content from specific web pages
|
||||||
|
- web_crawl_tool: Crawl websites with specific instructions
|
||||||
|
|
||||||
|
Backend compatibility:
|
||||||
|
- Tavily: https://docs.tavily.com/
|
||||||
|
- Firecrawl: https://docs.firecrawl.dev/features/search
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from web_tools import web_search_tool, web_extract_tool, web_crawl_tool
|
||||||
|
|
||||||
|
# Search the web
|
||||||
|
results = web_search_tool("Python machine learning libraries", limit=3)
|
||||||
|
|
||||||
|
# Extract content from URLs
|
||||||
|
content = web_extract_tool(["https://example.com"], format="markdown")
|
||||||
|
|
||||||
|
# Crawl a website
|
||||||
|
crawl_data = web_crawl_tool("example.com", "Find contact information")
|
||||||
|
"""
|
||||||
|
|
||||||
|
#TODO: Search Capabilities over the scraped pages
|
||||||
|
#TODO: Store the pages in something
|
||||||
|
#TODO: Tool to see what pages are available/saved to search over
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import List
|
||||||
|
from tavily import TavilyClient
|
||||||
|
|
||||||
|
# Initialize Tavily client once at module level
|
||||||
|
tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
|
||||||
|
|
||||||
|
|
||||||
|
def clean_base64_images(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Remove base64 encoded images from text to reduce token count and clutter.
|
||||||
|
|
||||||
|
This function finds and removes base64 encoded images in various formats:
|
||||||
|
- (data:image/png;base64,...)
|
||||||
|
- (data:image/jpeg;base64,...)
|
||||||
|
- (data:image/svg+xml;base64,...)
|
||||||
|
- data:image/[type];base64,... (without parentheses)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The text content to clean
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Cleaned text with base64 images replaced with placeholders
|
||||||
|
"""
|
||||||
|
# Pattern to match base64 encoded images wrapped in parentheses
|
||||||
|
# Matches: (data:image/[type];base64,[base64-string])
|
||||||
|
base64_with_parens_pattern = r'\(data:image/[^;]+;base64,[A-Za-z0-9+/=]+\)'
|
||||||
|
|
||||||
|
# Pattern to match base64 encoded images without parentheses
|
||||||
|
# Matches: data:image/[type];base64,[base64-string]
|
||||||
|
base64_pattern = r'data:image/[^;]+;base64,[A-Za-z0-9+/=]+'
|
||||||
|
|
||||||
|
# Replace parentheses-wrapped images first
|
||||||
|
cleaned_text = re.sub(base64_with_parens_pattern, '[BASE64_IMAGE_REMOVED]', text)
|
||||||
|
|
||||||
|
# Then replace any remaining non-parentheses images
|
||||||
|
cleaned_text = re.sub(base64_pattern, '[BASE64_IMAGE_REMOVED]', cleaned_text)
|
||||||
|
|
||||||
|
return cleaned_text
|
||||||
|
|
||||||
|
|
||||||
|
def web_search_tool(query: str, limit: int = 5) -> str:
|
||||||
|
"""
|
||||||
|
Search the web for information using available search API backend.
|
||||||
|
|
||||||
|
This function provides a generic interface for web search that can work
|
||||||
|
with multiple backends. Currently uses Tavily but can be easily swapped.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query (str): The search query to look up
|
||||||
|
limit (int): Maximum number of results to return (default: 5)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: JSON string containing search results with the following structure:
|
||||||
|
{
|
||||||
|
"query": str,
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"title": str,
|
||||||
|
"url": str,
|
||||||
|
"content": str,
|
||||||
|
"score": float
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If search fails or API key is not set
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
print(f"🔍 Searching the web for: '{query}' (limit: {limit})")
|
||||||
|
|
||||||
|
# Use Tavily's search functionality
|
||||||
|
response = tavily_client.search(query=query, max_results=limit, search_depth="advanced")
|
||||||
|
|
||||||
|
print(f"✅ Found {len(response.get('results', []))} results")
|
||||||
|
result_json = json.dumps(response, indent=2)
|
||||||
|
# Clean base64 images from search results
|
||||||
|
return clean_base64_images(result_json)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Error searching web: {str(e)}"
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
return json.dumps({"error": error_msg})
|
||||||
|
|
||||||
|
|
||||||
|
def web_extract_tool(urls: List[str], format: str = None) -> str:
|
||||||
|
"""
|
||||||
|
Extract content from specific web pages using available extraction API backend.
|
||||||
|
|
||||||
|
This function provides a generic interface for web content extraction that
|
||||||
|
can work with multiple backends. Currently uses Tavily but can be easily swapped.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
urls (List[str]): List of URLs to extract content from
|
||||||
|
format (str): Desired output format ("markdown" or "html", optional)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: JSON string containing extracted content with the following structure:
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"url": str,
|
||||||
|
"title": str,
|
||||||
|
"raw_content": str,
|
||||||
|
"content": str
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If extraction fails or API key is not set
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
print(f"📄 Extracting content from {len(urls)} URL(s)")
|
||||||
|
|
||||||
|
# Use Tavily's extract functionality
|
||||||
|
response = tavily_client.extract(urls=urls, format=format)
|
||||||
|
|
||||||
|
print(f"✅ Extracted content from {len(response.get('results', []))} pages")
|
||||||
|
|
||||||
|
# Print summary of extracted pages for debugging
|
||||||
|
for result in response.get('results', []):
|
||||||
|
url = result.get('url', 'Unknown URL')
|
||||||
|
content_length = len(result.get('raw_content', ''))
|
||||||
|
print(f" 📝 {url} ({content_length} characters)")
|
||||||
|
|
||||||
|
result_json = json.dumps(response, indent=2)
|
||||||
|
# Clean base64 images from extracted content
|
||||||
|
return clean_base64_images(result_json)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Error extracting content: {str(e)}"
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
return json.dumps({"error": error_msg})
|
||||||
|
|
||||||
|
|
||||||
|
def web_crawl_tool(url: str, instructions: str = None, depth: str = "basic") -> str:
|
||||||
|
"""
|
||||||
|
Crawl a website with specific instructions using available crawling API backend.
|
||||||
|
|
||||||
|
This function provides a generic interface for web crawling that can work
|
||||||
|
with multiple backends. Currently uses Tavily but can be easily swapped.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url (str): The base URL to crawl (can include or exclude https://)
|
||||||
|
instructions (str): Instructions for what to crawl/extract using LLM intelligence (optional)
|
||||||
|
depth (str): Depth of extraction ("basic" or "advanced", default: "basic")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: JSON string containing crawled content with the following structure:
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"url": str,
|
||||||
|
"title": str,
|
||||||
|
"content": str
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If crawling fails or API key is not set
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
instructions_text = f" with instructions: '{instructions}'" if instructions else ""
|
||||||
|
print(f"🕷️ Crawling {url}{instructions_text}")
|
||||||
|
|
||||||
|
# Use Tavily's crawl functionality
|
||||||
|
response = tavily_client.crawl(
|
||||||
|
url=url,
|
||||||
|
limit=20, # Reasonable limit for most use cases
|
||||||
|
instructions=instructions or "Get all available content",
|
||||||
|
extract_depth=depth
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"✅ Crawled {len(response.get('results', []))} pages")
|
||||||
|
|
||||||
|
# Print summary of crawled pages for debugging
|
||||||
|
for result in response.get('results', []):
|
||||||
|
page_url = result.get('url', 'Unknown URL')
|
||||||
|
content_length = len(result.get('content', ''))
|
||||||
|
print(f" 🌐 {page_url} ({content_length} characters)")
|
||||||
|
|
||||||
|
result_json = json.dumps(response, indent=2)
|
||||||
|
# Clean base64 images from crawled content
|
||||||
|
return clean_base64_images(result_json)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Error crawling website: {str(e)}"
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
return json.dumps({"error": error_msg})
|
||||||
|
|
||||||
|
|
||||||
|
# Convenience function to check if API key is available
|
||||||
|
def check_tavily_api_key() -> bool:
|
||||||
|
"""
|
||||||
|
Check if the Tavily API key is available in environment variables.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if API key is set, False otherwise
|
||||||
|
"""
|
||||||
|
return bool(os.getenv("TAVILY_API_KEY"))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
"""
|
||||||
|
Simple test/demo when run directly
|
||||||
|
"""
|
||||||
|
print("🌐 Standalone Web Tools Module")
|
||||||
|
print("=" * 40)
|
||||||
|
|
||||||
|
# Check if API key is available
|
||||||
|
if not check_tavily_api_key():
|
||||||
|
print("❌ TAVILY_API_KEY environment variable not set")
|
||||||
|
print("Please set your API key: export TAVILY_API_KEY='your-key-here'")
|
||||||
|
print("Get API key at: https://tavily.com/")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
print("✅ Tavily API key found")
|
||||||
|
print("🛠️ Web tools ready for use!")
|
||||||
|
print("\nExample usage:")
|
||||||
|
print(" from web_tools import web_search_tool, web_extract_tool, web_crawl_tool")
|
||||||
|
print(" results = web_search_tool('Python tutorials')")
|
||||||
|
print(" content = web_extract_tool(['https://example.com'])")
|
||||||
|
print(" crawl_data = web_crawl_tool('example.com', 'Find documentation')")
|
||||||
Reference in New Issue
Block a user