initital commit

This commit is contained in:
Teknium
2025-07-22 18:32:44 -07:00
commit 21d80ca683
8 changed files with 865 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
/venv/
/_pycache/

Binary file not shown.

Binary file not shown.

272
model_tools.py Normal file
View File

@@ -0,0 +1,272 @@
#!/usr/bin/env python3
"""
Model Tools Module
This module constructs tool schemas and handlers for AI model API calls.
It imports tools from various toolset modules and provides a unified interface
for defining tools and executing function calls.
Currently supports:
- Web tools (search, extract, crawl) from web_tools.py
Usage:
from model_tools import get_tool_definitions, handle_function_call
# Get tool definitions for model API
tools = get_tool_definitions()
# Handle function calls from model
result = handle_function_call("web_search_tool", {"query": "Python", "limit": 3})
"""
import json
from typing import Dict, Any, List
# Import toolsets
from web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_tavily_api_key
def get_web_tool_definitions() -> List[Dict[str, Any]]:
"""
Get tool definitions for web tools in OpenAI's expected format.
Returns:
List[Dict]: List of web tool definitions compatible with OpenAI API
"""
return [
{
"type": "function",
"function": {
"name": "web_search_tool",
"description": "Search the web for information on any topic. Returns relevant results with titles, URLs, content snippets, and answers. Uses advanced search depth for comprehensive results.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query to look up on the web"
},
"limit": {
"type": "integer",
"description": "Maximum number of results to return (default: 5, max: 10)",
"default": 5,
"minimum": 1,
"maximum": 10
}
},
"required": ["query"]
}
}
},
{
"type": "function",
"function": {
"name": "web_extract_tool",
"description": "Extract and read the full content from specific web page URLs. Useful for getting detailed information from webpages found through search.",
"parameters": {
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": {"type": "string"},
"description": "List of URLs to extract content from (max 5 URLs per call)",
"maxItems": 5
},
"format": {
"type": "string",
"enum": ["markdown", "html"],
"description": "Desired output format for extracted content (optional)"
}
},
"required": ["urls"]
}
}
},
{
"type": "function",
"function": {
"name": "web_crawl_tool",
"description": "Crawl a website with specific instructions to find and extract targeted content. Uses AI to intelligently navigate and extract relevant information from across the site.",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The base URL to crawl (can include or exclude https://)"
},
"instructions": {
"type": "string",
"description": "Specific instructions for what to crawl/extract using AI intelligence (e.g., 'Find pricing information', 'Get documentation pages', 'Extract contact details')"
},
"depth": {
"type": "string",
"enum": ["basic", "advanced"],
"description": "Depth of extraction - 'basic' for surface content, 'advanced' for deeper analysis (default: basic)",
"default": "basic"
}
},
"required": ["url"]
}
}
}
]
def get_tool_definitions() -> List[Dict[str, Any]]:
"""
Get all available tool definitions for model API calls.
This function aggregates tool definitions from all available toolsets.
Currently includes web tools, but can be extended to include other toolsets.
Returns:
List[Dict]: Complete list of all available tool definitions
"""
tools = []
# Add web tools
tools.extend(get_web_tool_definitions())
# Future toolsets can be added here:
# tools.extend(get_file_tool_definitions())
# tools.extend(get_code_tool_definitions())
# tools.extend(get_database_tool_definitions())
return tools
def handle_web_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
"""
Handle function calls for web tools.
Args:
function_name (str): Name of the web function to call
function_args (Dict): Arguments for the function
Returns:
str: Function result as JSON string
"""
if function_name == "web_search_tool":
query = function_args.get("query", "")
limit = function_args.get("limit", 5)
# Ensure limit is within bounds
limit = max(1, min(10, limit))
return web_search_tool(query, limit)
elif function_name == "web_extract_tool":
urls = function_args.get("urls", [])
# Limit URLs to prevent abuse
urls = urls[:5] if isinstance(urls, list) else []
format = function_args.get("format")
return web_extract_tool(urls, format)
elif function_name == "web_crawl_tool":
url = function_args.get("url", "")
instructions = function_args.get("instructions")
depth = function_args.get("depth", "basic")
return web_crawl_tool(url, instructions, depth)
else:
return json.dumps({"error": f"Unknown web function: {function_name}"})
def handle_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
"""
Main function call dispatcher that routes calls to appropriate toolsets.
This function determines which toolset a function belongs to and dispatches
the call to the appropriate handler. This makes it easy to add new toolsets
without changing the main calling interface.
Args:
function_name (str): Name of the function to call
function_args (Dict): Arguments for the function
Returns:
str: Function result as JSON string
Raises:
None: Returns error as JSON string instead of raising exceptions
"""
try:
# Route web tools
if function_name in ["web_search_tool", "web_extract_tool", "web_crawl_tool"]:
return handle_web_function_call(function_name, function_args)
# Future toolsets can be routed here:
# elif function_name in ["file_read_tool", "file_write_tool"]:
# return handle_file_function_call(function_name, function_args)
# elif function_name in ["code_execute_tool", "code_analyze_tool"]:
# return handle_code_function_call(function_name, function_args)
else:
error_msg = f"Unknown function: {function_name}"
print(f"{error_msg}")
return json.dumps({"error": error_msg})
except Exception as e:
error_msg = f"Error executing {function_name}: {str(e)}"
print(f"{error_msg}")
return json.dumps({"error": error_msg})
def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
"""
Get information about all available toolsets and their status.
Returns:
Dict: Information about each toolset including availability and tools
"""
toolsets = {
"web_tools": {
"available": check_tavily_api_key(),
"tools": ["web_search_tool", "web_extract_tool", "web_crawl_tool"],
"description": "Web search, content extraction, and website crawling tools",
"requirements": ["TAVILY_API_KEY environment variable"]
}
# Future toolsets can be added here
}
return toolsets
def check_toolset_requirements() -> Dict[str, bool]:
"""
Check if all requirements for available toolsets are met.
Returns:
Dict: Status of each toolset's requirements
"""
return {
"web_tools": check_tavily_api_key()
}
if __name__ == "__main__":
"""
Simple test/demo when run directly
"""
print("🛠️ Model Tools Module")
print("=" * 40)
# Check toolset requirements
requirements = check_toolset_requirements()
print("📋 Toolset Requirements:")
for toolset, available in requirements.items():
status = "" if available else ""
print(f" {status} {toolset}: {'Available' if available else 'Missing requirements'}")
# Show available tools
tools = get_tool_definitions()
print(f"\n🔧 Available Tools ({len(tools)} total):")
for tool in tools:
func_name = tool["function"]["name"]
desc = tool["function"]["description"]
print(f" 📌 {func_name}: {desc[:80]}{'...' if len(desc) > 80 else ''}")
# Show toolset info
toolsets = get_available_toolsets()
print(f"\n📦 Toolset Information:")
for name, info in toolsets.items():
status = "" if info["available"] else ""
print(f" {status} {name}: {info['description']}")
if not info["available"]:
print(f" Requirements: {', '.join(info['requirements'])}")
print("\n💡 Usage Example:")
print(" from model_tools import get_tool_definitions, handle_function_call")
print(" tools = get_tool_definitions()")
print(" result = handle_function_call('web_search_tool', {'query': 'Python'})")

2
requirements.txt Normal file
View File

@@ -0,0 +1,2 @@
tavily-python
openai

324
run_agent.py Normal file
View File

@@ -0,0 +1,324 @@
#!/usr/bin/env python3
"""
AI Agent Runner with Tool Calling
This module provides a clean, standalone agent that can execute AI models
with tool calling capabilities. It handles the conversation loop, tool execution,
and response management.
Features:
- Automatic tool calling loop until completion
- Configurable model parameters
- Error handling and recovery
- Message history management
- Support for multiple model providers
Usage:
from run_agent import AIAgent
agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514")
response = agent.run_conversation("Tell me about the latest Python updates")
"""
import json
import os
import time
from typing import List, Dict, Any, Optional
from openai import OpenAI
# Import our tool system
from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
class AIAgent:
"""
AI Agent with tool calling capabilities.
This class manages the conversation flow, tool execution, and response handling
for AI models that support function calling.
"""
def __init__(
self,
base_url: str = None,
api_key: str = None,
model: str = "gpt-4",
max_iterations: int = 10,
tool_delay: float = 1.0
):
"""
Initialize the AI Agent.
Args:
base_url (str): Base URL for the model API (optional)
api_key (str): API key for authentication (optional, uses env var if not provided)
model (str): Model name to use (default: "gpt-4")
max_iterations (int): Maximum number of tool calling iterations (default: 10)
tool_delay (float): Delay between tool calls in seconds (default: 1.0)
"""
self.model = model
self.max_iterations = max_iterations
self.tool_delay = tool_delay
# Initialize OpenAI client
client_kwargs = {}
if base_url:
client_kwargs["base_url"] = base_url
if api_key:
client_kwargs["api_key"] = api_key
else:
client_kwargs["api_key"] = os.getenv("OPENAI_API_KEY", "dummy-key")
try:
self.client = OpenAI(**client_kwargs)
print(f"🤖 AI Agent initialized with model: {self.model}")
if base_url:
print(f"🔗 Using custom base URL: {base_url}")
except Exception as e:
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
# Get available tools
self.tools = get_tool_definitions()
print(f"🛠️ Loaded {len(self.tools)} tools")
# Check tool requirements
requirements = check_toolset_requirements()
missing_reqs = [name for name, available in requirements.items() if not available]
if missing_reqs:
print(f"⚠️ Some tools may not work due to missing requirements: {missing_reqs}")
def create_system_message(self, custom_system: str = None) -> str:
"""
Create the system message for the agent.
Args:
custom_system (str): Custom system message (optional)
Returns:
str: System message content
"""
if custom_system:
return custom_system
return (
"You are an AI assistant that provides helpful responses. You may use extremely long chains of thought "
"to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help "
"come to a correct solution prior to answering. You should enclose your thoughts and internal monologue "
"inside <thinking> tags.\n\n"
"You are equipped with web research tools that allow you to search the web, extract content from web pages, "
"and crawl websites. Use these tools to gather current information and provide accurate, well-researched responses. "
"You can call multiple tools in parallel if they are not reliant on each other's results. You can also use "
"sequential tool calls to build on data you've collected from previous tool calls. Continue using tools until "
"you feel confident you have enough information to provide a comprehensive answer."
)
def run_conversation(
self,
user_message: str,
system_message: str = None,
conversation_history: List[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Run a complete conversation with tool calling until completion.
Args:
user_message (str): The user's message/question
system_message (str): Custom system message (optional)
conversation_history (List[Dict]): Previous conversation messages (optional)
Returns:
Dict: Complete conversation result with final response and message history
"""
# Initialize conversation
messages = conversation_history or []
# Add system message if not already present
if not messages or messages[0]["role"] != "system":
messages.insert(0, {
"role": "system",
"content": self.create_system_message(system_message)
})
# Add user message
messages.append({
"role": "user",
"content": user_message
})
print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
# Main conversation loop
api_call_count = 0
final_response = None
while api_call_count < self.max_iterations:
api_call_count += 1
print(f"\n🔄 Making API call #{api_call_count}...")
try:
# Make API call with tools
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
tools=self.tools if self.tools else None
)
assistant_message = response.choices[0].message
# Handle assistant response
if assistant_message.content:
print(f"🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
# Check for tool calls
if assistant_message.tool_calls:
print(f"🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
# Add assistant message with tool calls to conversation
messages.append({
"role": "assistant",
"content": assistant_message.content,
"tool_calls": [
{
"id": tool_call.id,
"type": tool_call.type,
"function": {
"name": tool_call.function.name,
"arguments": tool_call.function.arguments
}
}
for tool_call in assistant_message.tool_calls
]
})
# Execute each tool call
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
function_name = tool_call.function.name
try:
function_args = json.loads(tool_call.function.arguments)
except json.JSONDecodeError as e:
print(f"❌ Invalid JSON in tool call arguments: {e}")
function_args = {}
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})")
# Execute the tool
function_result = handle_function_call(function_name, function_args)
# Add tool result to conversation
messages.append({
"role": "tool",
"content": function_result,
"tool_call_id": tool_call.id
})
print(f" ✅ Tool {i} completed")
# Delay between tool calls
if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
time.sleep(self.tool_delay)
# Continue loop for next response
continue
else:
# No tool calls - this is the final response
final_response = assistant_message.content or ""
# Add final assistant message
messages.append({
"role": "assistant",
"content": final_response
})
print(f"🎉 Conversation completed after {api_call_count} API call(s)")
break
except Exception as e:
error_msg = f"Error during API call #{api_call_count}: {str(e)}"
print(f"{error_msg}")
# Add error to conversation and try to continue
messages.append({
"role": "assistant",
"content": f"I encountered an error: {error_msg}. Let me try a different approach."
})
# If we're near the limit, break to avoid infinite loops
if api_call_count >= self.max_iterations - 1:
final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
break
# Handle max iterations reached
if api_call_count >= self.max_iterations:
print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Stopping to prevent infinite loop.")
if final_response is None:
final_response = "I've reached the maximum number of iterations. Here's what I found so far."
return {
"final_response": final_response,
"messages": messages,
"api_calls": api_call_count,
"completed": final_response is not None
}
def chat(self, message: str) -> str:
"""
Simple chat interface that returns just the final response.
Args:
message (str): User message
Returns:
str: Final assistant response
"""
result = self.run_conversation(message)
return result["final_response"]
def main():
"""
Main function for running the agent directly.
"""
print("🤖 AI Agent with Tool Calling")
print("=" * 50)
# Initialize agent with local SGLang server (modify as needed)
try:
agent = AIAgent(
base_url="https://api.anthropic.com/v1/",
model="claude-opus-4-20250514"
)
except RuntimeError as e:
print(f"❌ Failed to initialize agent: {e}")
return
# Example conversation
user_query = (
"Tell me about the latest developments in Python 3.12 and what new features "
"developers should know about. Please search for current information."
)
print(f"\n📝 User Query: {user_query}")
print("\n" + "=" * 50)
# Run conversation
result = agent.run_conversation(user_query)
print("\n" + "=" * 50)
print("📋 CONVERSATION SUMMARY")
print("=" * 50)
print(f"✅ Completed: {result['completed']}")
print(f"📞 API Calls: {result['api_calls']}")
print(f"💬 Messages: {len(result['messages'])}")
if result['final_response']:
print(f"\n🎯 FINAL RESPONSE:")
print("-" * 30)
print(result['final_response'])
print("\n👋 Agent execution completed!")
if __name__ == "__main__":
main()

0
terminal_tool.py Normal file
View File

265
web_tools.py Normal file
View File

@@ -0,0 +1,265 @@
#!/usr/bin/env python3
"""
Standalone Web Tools Module
This module provides generic web tools that work with multiple backend providers.
Currently uses Tavily as the backend, but the interface makes it easy to swap
to other providers like Firecrawl without changing the function signatures.
Available tools:
- web_search_tool: Search the web for information
- web_extract_tool: Extract content from specific web pages
- web_crawl_tool: Crawl websites with specific instructions
Backend compatibility:
- Tavily: https://docs.tavily.com/
- Firecrawl: https://docs.firecrawl.dev/features/search
Usage:
from web_tools import web_search_tool, web_extract_tool, web_crawl_tool
# Search the web
results = web_search_tool("Python machine learning libraries", limit=3)
# Extract content from URLs
content = web_extract_tool(["https://example.com"], format="markdown")
# Crawl a website
crawl_data = web_crawl_tool("example.com", "Find contact information")
"""
#TODO: Search Capabilities over the scraped pages
#TODO: Store the pages in something
#TODO: Tool to see what pages are available/saved to search over
import json
import os
import re
from typing import List
from tavily import TavilyClient
# Initialize Tavily client once at module level
tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
def clean_base64_images(text: str) -> str:
"""
Remove base64 encoded images from text to reduce token count and clutter.
This function finds and removes base64 encoded images in various formats:
- (data:image/png;base64,...)
- (data:image/jpeg;base64,...)
- (data:image/svg+xml;base64,...)
- data:image/[type];base64,... (without parentheses)
Args:
text: The text content to clean
Returns:
Cleaned text with base64 images replaced with placeholders
"""
# Pattern to match base64 encoded images wrapped in parentheses
# Matches: (data:image/[type];base64,[base64-string])
base64_with_parens_pattern = r'\(data:image/[^;]+;base64,[A-Za-z0-9+/=]+\)'
# Pattern to match base64 encoded images without parentheses
# Matches: data:image/[type];base64,[base64-string]
base64_pattern = r'data:image/[^;]+;base64,[A-Za-z0-9+/=]+'
# Replace parentheses-wrapped images first
cleaned_text = re.sub(base64_with_parens_pattern, '[BASE64_IMAGE_REMOVED]', text)
# Then replace any remaining non-parentheses images
cleaned_text = re.sub(base64_pattern, '[BASE64_IMAGE_REMOVED]', cleaned_text)
return cleaned_text
def web_search_tool(query: str, limit: int = 5) -> str:
"""
Search the web for information using available search API backend.
This function provides a generic interface for web search that can work
with multiple backends. Currently uses Tavily but can be easily swapped.
Args:
query (str): The search query to look up
limit (int): Maximum number of results to return (default: 5)
Returns:
str: JSON string containing search results with the following structure:
{
"query": str,
"results": [
{
"title": str,
"url": str,
"content": str,
"score": float
},
...
]
}
Raises:
Exception: If search fails or API key is not set
"""
try:
print(f"🔍 Searching the web for: '{query}' (limit: {limit})")
# Use Tavily's search functionality
response = tavily_client.search(query=query, max_results=limit, search_depth="advanced")
print(f"✅ Found {len(response.get('results', []))} results")
result_json = json.dumps(response, indent=2)
# Clean base64 images from search results
return clean_base64_images(result_json)
except Exception as e:
error_msg = f"Error searching web: {str(e)}"
print(f"{error_msg}")
return json.dumps({"error": error_msg})
def web_extract_tool(urls: List[str], format: str = None) -> str:
"""
Extract content from specific web pages using available extraction API backend.
This function provides a generic interface for web content extraction that
can work with multiple backends. Currently uses Tavily but can be easily swapped.
Args:
urls (List[str]): List of URLs to extract content from
format (str): Desired output format ("markdown" or "html", optional)
Returns:
str: JSON string containing extracted content with the following structure:
{
"results": [
{
"url": str,
"title": str,
"raw_content": str,
"content": str
},
...
]
}
Raises:
Exception: If extraction fails or API key is not set
"""
try:
print(f"📄 Extracting content from {len(urls)} URL(s)")
# Use Tavily's extract functionality
response = tavily_client.extract(urls=urls, format=format)
print(f"✅ Extracted content from {len(response.get('results', []))} pages")
# Print summary of extracted pages for debugging
for result in response.get('results', []):
url = result.get('url', 'Unknown URL')
content_length = len(result.get('raw_content', ''))
print(f" 📝 {url} ({content_length} characters)")
result_json = json.dumps(response, indent=2)
# Clean base64 images from extracted content
return clean_base64_images(result_json)
except Exception as e:
error_msg = f"Error extracting content: {str(e)}"
print(f"{error_msg}")
return json.dumps({"error": error_msg})
def web_crawl_tool(url: str, instructions: str = None, depth: str = "basic") -> str:
"""
Crawl a website with specific instructions using available crawling API backend.
This function provides a generic interface for web crawling that can work
with multiple backends. Currently uses Tavily but can be easily swapped.
Args:
url (str): The base URL to crawl (can include or exclude https://)
instructions (str): Instructions for what to crawl/extract using LLM intelligence (optional)
depth (str): Depth of extraction ("basic" or "advanced", default: "basic")
Returns:
str: JSON string containing crawled content with the following structure:
{
"results": [
{
"url": str,
"title": str,
"content": str
},
...
]
}
Raises:
Exception: If crawling fails or API key is not set
"""
try:
instructions_text = f" with instructions: '{instructions}'" if instructions else ""
print(f"🕷️ Crawling {url}{instructions_text}")
# Use Tavily's crawl functionality
response = tavily_client.crawl(
url=url,
limit=20, # Reasonable limit for most use cases
instructions=instructions or "Get all available content",
extract_depth=depth
)
print(f"✅ Crawled {len(response.get('results', []))} pages")
# Print summary of crawled pages for debugging
for result in response.get('results', []):
page_url = result.get('url', 'Unknown URL')
content_length = len(result.get('content', ''))
print(f" 🌐 {page_url} ({content_length} characters)")
result_json = json.dumps(response, indent=2)
# Clean base64 images from crawled content
return clean_base64_images(result_json)
except Exception as e:
error_msg = f"Error crawling website: {str(e)}"
print(f"{error_msg}")
return json.dumps({"error": error_msg})
# Convenience function to check if API key is available
def check_tavily_api_key() -> bool:
"""
Check if the Tavily API key is available in environment variables.
Returns:
bool: True if API key is set, False otherwise
"""
return bool(os.getenv("TAVILY_API_KEY"))
if __name__ == "__main__":
"""
Simple test/demo when run directly
"""
print("🌐 Standalone Web Tools Module")
print("=" * 40)
# Check if API key is available
if not check_tavily_api_key():
print("❌ TAVILY_API_KEY environment variable not set")
print("Please set your API key: export TAVILY_API_KEY='your-key-here'")
print("Get API key at: https://tavily.com/")
exit(1)
print("✅ Tavily API key found")
print("🛠️ Web tools ready for use!")
print("\nExample usage:")
print(" from web_tools import web_search_tool, web_extract_tool, web_crawl_tool")
print(" results = web_search_tool('Python tutorials')")
print(" content = web_extract_tool(['https://example.com'])")
print(" crawl_data = web_crawl_tool('example.com', 'Find documentation')")