A bit of restructuring for simplicity and organization

This commit is contained in:
teknium
2025-10-01 23:29:25 +00:00
parent 0411ca1880
commit a7ff4d49e9
8 changed files with 2005 additions and 1945 deletions

28
pyproject.toml Normal file
View File

@@ -0,0 +1,28 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "hermes-agent"
version = "0.1.0"
description = "AI agent with advanced tool-calling and toolsets"
readme = "README.md"
requires-python = ">=3.10"
authors = [{ name = "Hermes Agent" }]
license = { text = "MIT" }
dependencies = [
"firecrawl-py",
"openai",
"fal-client",
"python-dotenv",
"fire"
]
[project.scripts]
hermes-agent = "run_agent:main"
[tool.setuptools]
py-modules = ["run_agent", "model_tools", "toolsets"]
[tool.setuptools.packages.find]
include = ["tools"]

View File

@@ -13,6 +13,16 @@ PROMPT="$1"
# Set debug mode for web tools
export WEB_TOOLS_DEBUG=true
# Resolve repository root relative to this script and run from there
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$REPO_ROOT"
# Prefer local venv if present
if [ -f "venv/bin/activate" ]; then
source venv/bin/activate
fi
# Run the agent with the provided prompt
python run_agent.py \
--query "$PROMPT" \

0
tests/__init__.py Normal file
View File

View File

@@ -23,8 +23,8 @@ import argparse
from datetime import datetime
from typing import List, Dict, Any
# Import the web tools to test
from web_tools import (
# Import the web tools to test (updated path after moving tools/)
from tools.web_tools import (
web_search_tool,
web_extract_tool,
web_crawl_tool,

File diff suppressed because it is too large Load Diff

View File

@@ -22,8 +22,6 @@ Usage:
import json
import os
from typing import Optional, Dict, Any
from hecate import run_tool_with_lifecycle_management
from morphcloud._llm import ToolCall
# Detailed description for the terminal tool based on Hermes Terminal system prompt
TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure, persistent Linux VM environment with full interactive application support.
@@ -114,6 +112,22 @@ def terminal_tool(
>>> result = terminal_tool(command="sleep 60", background=True)
"""
try:
# Import hecate and ToolCall lazily so this module can be imported
# even when hecate is not installed. If unavailable, gracefully
# indicate that the terminal tool is disabled.
try:
from hecate import run_tool_with_lifecycle_management
from morphcloud._llm import ToolCall
except ImportError:
return json.dumps({
"output": "",
"screen": "",
"session_id": None,
"exit_code": -1,
"error": "Terminal tool is disabled: 'hecate' is not installed. Install with: pip install hecate",
"status": "disabled"
})
# Build tool input based on provided parameters
tool_input = {}

View File

@@ -1,346 +1,346 @@
#!/usr/bin/env python3
"""
Vision Tools Module
This module provides vision analysis tools that work with image URLs.
Uses Gemini Flash via Nous Research API for intelligent image understanding.
Available tools:
- vision_analyze_tool: Analyze images from URLs with custom prompts
Features:
- Comprehensive image description
- Context-aware analysis based on user queries
- Proper error handling and validation
- Debug logging support
Usage:
from vision_tools import vision_analyze_tool
import asyncio
# Analyze an image
result = await vision_analyze_tool(
image_url="https://example.com/image.jpg",
user_prompt="What architectural style is this building?"
)
"""
import json
import os
import asyncio
import uuid
import datetime
from pathlib import Path
from typing import Dict, Any, Optional
from openai import AsyncOpenAI
# Initialize Nous Research API client for vision processing
nous_client = AsyncOpenAI(
api_key=os.getenv("NOUS_API_KEY"),
base_url="https://inference-api.nousresearch.com/v1"
)
# Configuration for vision processing
DEFAULT_VISION_MODEL = "gemini-2.5-flash"
# Debug mode configuration
DEBUG_MODE = os.getenv("VISION_TOOLS_DEBUG", "false").lower() == "true"
DEBUG_SESSION_ID = str(uuid.uuid4())
DEBUG_LOG_PATH = Path("./logs")
DEBUG_DATA = {
"session_id": DEBUG_SESSION_ID,
"start_time": datetime.datetime.now().isoformat(),
"debug_enabled": DEBUG_MODE,
"tool_calls": []
} if DEBUG_MODE else None
# Create logs directory if debug mode is enabled
if DEBUG_MODE:
DEBUG_LOG_PATH.mkdir(exist_ok=True)
print(f"🐛 Vision debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
"""
Log a debug call entry to the global debug data structure.
Args:
tool_name (str): Name of the tool being called
call_data (Dict[str, Any]): Data about the call including parameters and results
"""
if not DEBUG_MODE or not DEBUG_DATA:
return
call_entry = {
"timestamp": datetime.datetime.now().isoformat(),
"tool_name": tool_name,
**call_data
}
DEBUG_DATA["tool_calls"].append(call_entry)
def _save_debug_log() -> None:
"""
Save the current debug data to a JSON file in the logs directory.
"""
if not DEBUG_MODE or not DEBUG_DATA:
return
try:
debug_filename = f"vision_tools_debug_{DEBUG_SESSION_ID}.json"
debug_filepath = DEBUG_LOG_PATH / debug_filename
# Update end time
DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat()
DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"])
with open(debug_filepath, 'w', encoding='utf-8') as f:
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
print(f"🐛 Vision debug log saved: {debug_filepath}")
except Exception as e:
print(f"❌ Error saving vision debug log: {str(e)}")
def _validate_image_url(url: str) -> bool:
"""
Basic validation of image URL format.
Args:
url (str): The URL to validate
Returns:
bool: True if URL appears to be valid, False otherwise
"""
if not url or not isinstance(url, str):
return False
# Check if it's a valid URL format
if not (url.startswith('http://') or url.startswith('https://')):
return False
# Check for common image extensions (optional, as URLs may not have extensions)
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
return True # Allow all HTTP/HTTPS URLs for flexibility
async def vision_analyze_tool(
image_url: str,
user_prompt: str,
model: str = DEFAULT_VISION_MODEL
) -> str:
"""
Analyze an image from a URL using vision AI.
This tool processes images using Gemini Flash via Nous Research API.
The user_prompt parameter is expected to be pre-formatted by the calling
function (typically model_tools.py) to include both full description
requests and specific questions.
Args:
image_url (str): The URL of the image to analyze
user_prompt (str): The pre-formatted prompt for the vision model
model (str): The vision model to use (default: gemini-2.5-flash)
Returns:
str: JSON string containing the analysis results with the following structure:
{
"success": bool,
"analysis": str (defaults to error message if None)
}
Raises:
Exception: If analysis fails or API key is not set
"""
debug_call_data = {
"parameters": {
"image_url": image_url,
"user_prompt": user_prompt,
"model": model
},
"error": None,
"success": False,
"analysis_length": 0,
"model_used": model
}
try:
print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}")
print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
# Validate image URL
if not _validate_image_url(image_url):
raise ValueError("Invalid image URL format. Must start with http:// or https://")
# Check API key availability
if not os.getenv("NOUS_API_KEY"):
raise ValueError("NOUS_API_KEY environment variable not set")
# Use the prompt as provided (model_tools.py now handles full description formatting)
comprehensive_prompt = user_prompt
# Prepare the message with image URL format
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": comprehensive_prompt
},
{
"type": "image_url",
"image_url": {
"url": image_url
}
}
]
}
]
print(f"🧠 Processing image with {model}...")
# Call the vision API
response = await nous_client.chat.completions.create(
model=model,
messages=messages,
temperature=0.1, # Low temperature for consistent analysis
max_tokens=2000 # Generous limit for detailed analysis
)
# Extract the analysis
analysis = response.choices[0].message.content.strip()
analysis_length = len(analysis)
print(f"✅ Image analysis completed ({analysis_length} characters)")
# Prepare successful response
result = {
"success": True,
"analysis": analysis or "There was a problem with the request and the image could not be analyzed."
}
debug_call_data["success"] = True
debug_call_data["analysis_length"] = analysis_length
# Log debug information
_log_debug_call("vision_analyze_tool", debug_call_data)
_save_debug_log()
return json.dumps(result, indent=2)
except Exception as e:
error_msg = f"Error analyzing image: {str(e)}"
print(f"{error_msg}")
# Prepare error response
result = {
"success": False,
"analysis": "There was a problem with the request and the image could not be analyzed."
}
debug_call_data["error"] = error_msg
_log_debug_call("vision_analyze_tool", debug_call_data)
_save_debug_log()
return json.dumps(result, indent=2)
def check_nous_api_key() -> bool:
"""
Check if the Nous Research API key is available in environment variables.
Returns:
bool: True if API key is set, False otherwise
"""
return bool(os.getenv("NOUS_API_KEY"))
def check_vision_requirements() -> bool:
"""
Check if all requirements for vision tools are met.
Returns:
bool: True if requirements are met, False otherwise
"""
return check_nous_api_key()
def get_debug_session_info() -> Dict[str, Any]:
"""
Get information about the current debug session.
Returns:
Dict[str, Any]: Dictionary containing debug session information
"""
if not DEBUG_MODE or not DEBUG_DATA:
return {
"enabled": False,
"session_id": None,
"log_path": None,
"total_calls": 0
}
return {
"enabled": True,
"session_id": DEBUG_SESSION_ID,
"log_path": str(DEBUG_LOG_PATH / f"vision_tools_debug_{DEBUG_SESSION_ID}.json"),
"total_calls": len(DEBUG_DATA["tool_calls"])
}
if __name__ == "__main__":
"""
Simple test/demo when run directly
"""
print("👁️ Vision Tools Module")
print("=" * 40)
# Check if API key is available
api_available = check_nous_api_key()
if not api_available:
print("❌ NOUS_API_KEY environment variable not set")
print("Please set your API key: export NOUS_API_KEY='your-key-here'")
print("Get API key at: https://inference-api.nousresearch.com/")
exit(1)
else:
print("✅ Nous Research API key found")
print("🛠️ Vision tools ready for use!")
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
# Show debug mode status
if DEBUG_MODE:
print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
print(f" Debug logs will be saved to: ./logs/vision_tools_debug_{DEBUG_SESSION_ID}.json")
else:
print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)")
print("\nBasic usage:")
print(" from vision_tools import vision_analyze_tool")
print(" import asyncio")
print("")
print(" async def main():")
print(" result = await vision_analyze_tool(")
print(" image_url='https://example.com/image.jpg',")
print(" user_prompt='What do you see in this image?'")
print(" )")
print(" print(result)")
print(" asyncio.run(main())")
print("\nExample prompts:")
print(" - 'What architectural style is this building?'")
print(" - 'Describe the emotions and mood in this image'")
print(" - 'What text can you read in this image?'")
print(" - 'Identify any safety hazards visible'")
print(" - 'What products or brands are shown?'")
print("\nDebug mode:")
print(" # Enable debug logging")
print(" export VISION_TOOLS_DEBUG=true")
print(" # Debug logs capture all vision analysis calls and results")
print(" # Logs saved to: ./logs/vision_tools_debug_UUID.json")
#!/usr/bin/env python3
"""
Vision Tools Module
This module provides vision analysis tools that work with image URLs.
Uses Gemini Flash via Nous Research API for intelligent image understanding.
Available tools:
- vision_analyze_tool: Analyze images from URLs with custom prompts
Features:
- Comprehensive image description
- Context-aware analysis based on user queries
- Proper error handling and validation
- Debug logging support
Usage:
from vision_tools import vision_analyze_tool
import asyncio
# Analyze an image
result = await vision_analyze_tool(
image_url="https://example.com/image.jpg",
user_prompt="What architectural style is this building?"
)
"""
import json
import os
import asyncio
import uuid
import datetime
from pathlib import Path
from typing import Dict, Any, Optional
from openai import AsyncOpenAI
# Initialize Nous Research API client for vision processing
nous_client = AsyncOpenAI(
api_key=os.getenv("NOUS_API_KEY"),
base_url="https://inference-api.nousresearch.com/v1"
)
# Configuration for vision processing
DEFAULT_VISION_MODEL = "gemini-2.5-flash"
# Debug mode configuration
DEBUG_MODE = os.getenv("VISION_TOOLS_DEBUG", "false").lower() == "true"
DEBUG_SESSION_ID = str(uuid.uuid4())
DEBUG_LOG_PATH = Path("./logs")
DEBUG_DATA = {
"session_id": DEBUG_SESSION_ID,
"start_time": datetime.datetime.now().isoformat(),
"debug_enabled": DEBUG_MODE,
"tool_calls": []
} if DEBUG_MODE else None
# Create logs directory if debug mode is enabled
if DEBUG_MODE:
DEBUG_LOG_PATH.mkdir(exist_ok=True)
print(f"🐛 Vision debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
"""
Log a debug call entry to the global debug data structure.
Args:
tool_name (str): Name of the tool being called
call_data (Dict[str, Any]): Data about the call including parameters and results
"""
if not DEBUG_MODE or not DEBUG_DATA:
return
call_entry = {
"timestamp": datetime.datetime.now().isoformat(),
"tool_name": tool_name,
**call_data
}
DEBUG_DATA["tool_calls"].append(call_entry)
def _save_debug_log() -> None:
"""
Save the current debug data to a JSON file in the logs directory.
"""
if not DEBUG_MODE or not DEBUG_DATA:
return
try:
debug_filename = f"vision_tools_debug_{DEBUG_SESSION_ID}.json"
debug_filepath = DEBUG_LOG_PATH / debug_filename
# Update end time
DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat()
DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"])
with open(debug_filepath, 'w', encoding='utf-8') as f:
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
print(f"🐛 Vision debug log saved: {debug_filepath}")
except Exception as e:
print(f"❌ Error saving vision debug log: {str(e)}")
def _validate_image_url(url: str) -> bool:
"""
Basic validation of image URL format.
Args:
url (str): The URL to validate
Returns:
bool: True if URL appears to be valid, False otherwise
"""
if not url or not isinstance(url, str):
return False
# Check if it's a valid URL format
if not (url.startswith('http://') or url.startswith('https://')):
return False
# Check for common image extensions (optional, as URLs may not have extensions)
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
return True # Allow all HTTP/HTTPS URLs for flexibility
async def vision_analyze_tool(
image_url: str,
user_prompt: str,
model: str = DEFAULT_VISION_MODEL
) -> str:
"""
Analyze an image from a URL using vision AI.
This tool processes images using Gemini Flash via Nous Research API.
The user_prompt parameter is expected to be pre-formatted by the calling
function (typically model_tools.py) to include both full description
requests and specific questions.
Args:
image_url (str): The URL of the image to analyze
user_prompt (str): The pre-formatted prompt for the vision model
model (str): The vision model to use (default: gemini-2.5-flash)
Returns:
str: JSON string containing the analysis results with the following structure:
{
"success": bool,
"analysis": str (defaults to error message if None)
}
Raises:
Exception: If analysis fails or API key is not set
"""
debug_call_data = {
"parameters": {
"image_url": image_url,
"user_prompt": user_prompt,
"model": model
},
"error": None,
"success": False,
"analysis_length": 0,
"model_used": model
}
try:
print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}")
print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
# Validate image URL
if not _validate_image_url(image_url):
raise ValueError("Invalid image URL format. Must start with http:// or https://")
# Check API key availability
if not os.getenv("NOUS_API_KEY"):
raise ValueError("NOUS_API_KEY environment variable not set")
# Use the prompt as provided (model_tools.py now handles full description formatting)
comprehensive_prompt = user_prompt
# Prepare the message with image URL format
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": comprehensive_prompt
},
{
"type": "image_url",
"image_url": {
"url": image_url
}
}
]
}
]
print(f"🧠 Processing image with {model}...")
# Call the vision API
response = await nous_client.chat.completions.create(
model=model,
messages=messages,
temperature=0.1, # Low temperature for consistent analysis
max_tokens=2000 # Generous limit for detailed analysis
)
# Extract the analysis
analysis = response.choices[0].message.content.strip()
analysis_length = len(analysis)
print(f"✅ Image analysis completed ({analysis_length} characters)")
# Prepare successful response
result = {
"success": True,
"analysis": analysis or "There was a problem with the request and the image could not be analyzed."
}
debug_call_data["success"] = True
debug_call_data["analysis_length"] = analysis_length
# Log debug information
_log_debug_call("vision_analyze_tool", debug_call_data)
_save_debug_log()
return json.dumps(result, indent=2)
except Exception as e:
error_msg = f"Error analyzing image: {str(e)}"
print(f"{error_msg}")
# Prepare error response
result = {
"success": False,
"analysis": "There was a problem with the request and the image could not be analyzed."
}
debug_call_data["error"] = error_msg
_log_debug_call("vision_analyze_tool", debug_call_data)
_save_debug_log()
return json.dumps(result, indent=2)
def check_nous_api_key() -> bool:
"""
Check if the Nous Research API key is available in environment variables.
Returns:
bool: True if API key is set, False otherwise
"""
return bool(os.getenv("NOUS_API_KEY"))
def check_vision_requirements() -> bool:
"""
Check if all requirements for vision tools are met.
Returns:
bool: True if requirements are met, False otherwise
"""
return check_nous_api_key()
def get_debug_session_info() -> Dict[str, Any]:
"""
Get information about the current debug session.
Returns:
Dict[str, Any]: Dictionary containing debug session information
"""
if not DEBUG_MODE or not DEBUG_DATA:
return {
"enabled": False,
"session_id": None,
"log_path": None,
"total_calls": 0
}
return {
"enabled": True,
"session_id": DEBUG_SESSION_ID,
"log_path": str(DEBUG_LOG_PATH / f"vision_tools_debug_{DEBUG_SESSION_ID}.json"),
"total_calls": len(DEBUG_DATA["tool_calls"])
}
if __name__ == "__main__":
"""
Simple test/demo when run directly
"""
print("👁️ Vision Tools Module")
print("=" * 40)
# Check if API key is available
api_available = check_nous_api_key()
if not api_available:
print("❌ NOUS_API_KEY environment variable not set")
print("Please set your API key: export NOUS_API_KEY='your-key-here'")
print("Get API key at: https://inference-api.nousresearch.com/")
exit(1)
else:
print("✅ Nous Research API key found")
print("🛠️ Vision tools ready for use!")
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
# Show debug mode status
if DEBUG_MODE:
print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
print(f" Debug logs will be saved to: ./logs/vision_tools_debug_{DEBUG_SESSION_ID}.json")
else:
print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)")
print("\nBasic usage:")
print(" from vision_tools import vision_analyze_tool")
print(" import asyncio")
print("")
print(" async def main():")
print(" result = await vision_analyze_tool(")
print(" image_url='https://example.com/image.jpg',")
print(" user_prompt='What do you see in this image?'")
print(" )")
print(" print(result)")
print(" asyncio.run(main())")
print("\nExample prompts:")
print(" - 'What architectural style is this building?'")
print(" - 'Describe the emotions and mood in this image'")
print(" - 'What text can you read in this image?'")
print(" - 'Identify any safety hazards visible'")
print(" - 'What products or brands are shown?'")
print("\nDebug mode:")
print(" # Enable debug logging")
print(" export VISION_TOOLS_DEBUG=true")
print(" # Debug logs capture all vision analysis calls and results")
print(" # Logs saved to: ./logs/vision_tools_debug_UUID.json")

File diff suppressed because it is too large Load Diff