A bit of restructuring for simplicity and organization

This commit is contained in:
teknium
2025-10-01 23:29:25 +00:00
parent 0411ca1880
commit a7ff4d49e9
8 changed files with 2005 additions and 1945 deletions

28
pyproject.toml Normal file
View File

@@ -0,0 +1,28 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "hermes-agent"
version = "0.1.0"
description = "AI agent with advanced tool-calling and toolsets"
readme = "README.md"
requires-python = ">=3.10"
authors = [{ name = "Hermes Agent" }]
license = { text = "MIT" }
dependencies = [
"firecrawl-py",
"openai",
"fal-client",
"python-dotenv",
"fire"
]
[project.scripts]
hermes-agent = "run_agent:main"
[tool.setuptools]
py-modules = ["run_agent", "model_tools", "toolsets"]
[tool.setuptools.packages.find]
include = ["tools"]

View File

@@ -13,6 +13,16 @@ PROMPT="$1"
# Set debug mode for web tools # Set debug mode for web tools
export WEB_TOOLS_DEBUG=true export WEB_TOOLS_DEBUG=true
# Resolve repository root relative to this script and run from there
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$REPO_ROOT"
# Prefer local venv if present
if [ -f "venv/bin/activate" ]; then
source venv/bin/activate
fi
# Run the agent with the provided prompt # Run the agent with the provided prompt
python run_agent.py \ python run_agent.py \
--query "$PROMPT" \ --query "$PROMPT" \

0
tests/__init__.py Normal file
View File

View File

@@ -23,8 +23,8 @@ import argparse
from datetime import datetime from datetime import datetime
from typing import List, Dict, Any from typing import List, Dict, Any
# Import the web tools to test # Import the web tools to test (updated path after moving tools/)
from web_tools import ( from tools.web_tools import (
web_search_tool, web_search_tool,
web_extract_tool, web_extract_tool,
web_crawl_tool, web_crawl_tool,

File diff suppressed because it is too large Load Diff

View File

@@ -22,8 +22,6 @@ Usage:
import json import json
import os import os
from typing import Optional, Dict, Any from typing import Optional, Dict, Any
from hecate import run_tool_with_lifecycle_management
from morphcloud._llm import ToolCall
# Detailed description for the terminal tool based on Hermes Terminal system prompt # Detailed description for the terminal tool based on Hermes Terminal system prompt
TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure, persistent Linux VM environment with full interactive application support. TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure, persistent Linux VM environment with full interactive application support.
@@ -114,6 +112,22 @@ def terminal_tool(
>>> result = terminal_tool(command="sleep 60", background=True) >>> result = terminal_tool(command="sleep 60", background=True)
""" """
try: try:
# Import hecate and ToolCall lazily so this module can be imported
# even when hecate is not installed. If unavailable, gracefully
# indicate that the terminal tool is disabled.
try:
from hecate import run_tool_with_lifecycle_management
from morphcloud._llm import ToolCall
except ImportError:
return json.dumps({
"output": "",
"screen": "",
"session_id": None,
"exit_code": -1,
"error": "Terminal tool is disabled: 'hecate' is not installed. Install with: pip install hecate",
"status": "disabled"
})
# Build tool input based on provided parameters # Build tool input based on provided parameters
tool_input = {} tool_input = {}

View File

@@ -1,346 +1,346 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Vision Tools Module Vision Tools Module
This module provides vision analysis tools that work with image URLs. This module provides vision analysis tools that work with image URLs.
Uses Gemini Flash via Nous Research API for intelligent image understanding. Uses Gemini Flash via Nous Research API for intelligent image understanding.
Available tools: Available tools:
- vision_analyze_tool: Analyze images from URLs with custom prompts - vision_analyze_tool: Analyze images from URLs with custom prompts
Features: Features:
- Comprehensive image description - Comprehensive image description
- Context-aware analysis based on user queries - Context-aware analysis based on user queries
- Proper error handling and validation - Proper error handling and validation
- Debug logging support - Debug logging support
Usage: Usage:
from vision_tools import vision_analyze_tool from vision_tools import vision_analyze_tool
import asyncio import asyncio
# Analyze an image # Analyze an image
result = await vision_analyze_tool( result = await vision_analyze_tool(
image_url="https://example.com/image.jpg", image_url="https://example.com/image.jpg",
user_prompt="What architectural style is this building?" user_prompt="What architectural style is this building?"
) )
""" """
import json import json
import os import os
import asyncio import asyncio
import uuid import uuid
import datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
from openai import AsyncOpenAI from openai import AsyncOpenAI
# Initialize Nous Research API client for vision processing # Initialize Nous Research API client for vision processing
nous_client = AsyncOpenAI( nous_client = AsyncOpenAI(
api_key=os.getenv("NOUS_API_KEY"), api_key=os.getenv("NOUS_API_KEY"),
base_url="https://inference-api.nousresearch.com/v1" base_url="https://inference-api.nousresearch.com/v1"
) )
# Configuration for vision processing # Configuration for vision processing
DEFAULT_VISION_MODEL = "gemini-2.5-flash" DEFAULT_VISION_MODEL = "gemini-2.5-flash"
# Debug mode configuration # Debug mode configuration
DEBUG_MODE = os.getenv("VISION_TOOLS_DEBUG", "false").lower() == "true" DEBUG_MODE = os.getenv("VISION_TOOLS_DEBUG", "false").lower() == "true"
DEBUG_SESSION_ID = str(uuid.uuid4()) DEBUG_SESSION_ID = str(uuid.uuid4())
DEBUG_LOG_PATH = Path("./logs") DEBUG_LOG_PATH = Path("./logs")
DEBUG_DATA = { DEBUG_DATA = {
"session_id": DEBUG_SESSION_ID, "session_id": DEBUG_SESSION_ID,
"start_time": datetime.datetime.now().isoformat(), "start_time": datetime.datetime.now().isoformat(),
"debug_enabled": DEBUG_MODE, "debug_enabled": DEBUG_MODE,
"tool_calls": [] "tool_calls": []
} if DEBUG_MODE else None } if DEBUG_MODE else None
# Create logs directory if debug mode is enabled # Create logs directory if debug mode is enabled
if DEBUG_MODE: if DEBUG_MODE:
DEBUG_LOG_PATH.mkdir(exist_ok=True) DEBUG_LOG_PATH.mkdir(exist_ok=True)
print(f"🐛 Vision debug mode enabled - Session ID: {DEBUG_SESSION_ID}") print(f"🐛 Vision debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None: def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
""" """
Log a debug call entry to the global debug data structure. Log a debug call entry to the global debug data structure.
Args: Args:
tool_name (str): Name of the tool being called tool_name (str): Name of the tool being called
call_data (Dict[str, Any]): Data about the call including parameters and results call_data (Dict[str, Any]): Data about the call including parameters and results
""" """
if not DEBUG_MODE or not DEBUG_DATA: if not DEBUG_MODE or not DEBUG_DATA:
return return
call_entry = { call_entry = {
"timestamp": datetime.datetime.now().isoformat(), "timestamp": datetime.datetime.now().isoformat(),
"tool_name": tool_name, "tool_name": tool_name,
**call_data **call_data
} }
DEBUG_DATA["tool_calls"].append(call_entry) DEBUG_DATA["tool_calls"].append(call_entry)
def _save_debug_log() -> None: def _save_debug_log() -> None:
""" """
Save the current debug data to a JSON file in the logs directory. Save the current debug data to a JSON file in the logs directory.
""" """
if not DEBUG_MODE or not DEBUG_DATA: if not DEBUG_MODE or not DEBUG_DATA:
return return
try: try:
debug_filename = f"vision_tools_debug_{DEBUG_SESSION_ID}.json" debug_filename = f"vision_tools_debug_{DEBUG_SESSION_ID}.json"
debug_filepath = DEBUG_LOG_PATH / debug_filename debug_filepath = DEBUG_LOG_PATH / debug_filename
# Update end time # Update end time
DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat() DEBUG_DATA["end_time"] = datetime.datetime.now().isoformat()
DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"]) DEBUG_DATA["total_calls"] = len(DEBUG_DATA["tool_calls"])
with open(debug_filepath, 'w', encoding='utf-8') as f: with open(debug_filepath, 'w', encoding='utf-8') as f:
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False) json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
print(f"🐛 Vision debug log saved: {debug_filepath}") print(f"🐛 Vision debug log saved: {debug_filepath}")
except Exception as e: except Exception as e:
print(f"❌ Error saving vision debug log: {str(e)}") print(f"❌ Error saving vision debug log: {str(e)}")
def _validate_image_url(url: str) -> bool: def _validate_image_url(url: str) -> bool:
""" """
Basic validation of image URL format. Basic validation of image URL format.
Args: Args:
url (str): The URL to validate url (str): The URL to validate
Returns: Returns:
bool: True if URL appears to be valid, False otherwise bool: True if URL appears to be valid, False otherwise
""" """
if not url or not isinstance(url, str): if not url or not isinstance(url, str):
return False return False
# Check if it's a valid URL format # Check if it's a valid URL format
if not (url.startswith('http://') or url.startswith('https://')): if not (url.startswith('http://') or url.startswith('https://')):
return False return False
# Check for common image extensions (optional, as URLs may not have extensions) # Check for common image extensions (optional, as URLs may not have extensions)
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg'] image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
return True # Allow all HTTP/HTTPS URLs for flexibility return True # Allow all HTTP/HTTPS URLs for flexibility
async def vision_analyze_tool( async def vision_analyze_tool(
image_url: str, image_url: str,
user_prompt: str, user_prompt: str,
model: str = DEFAULT_VISION_MODEL model: str = DEFAULT_VISION_MODEL
) -> str: ) -> str:
""" """
Analyze an image from a URL using vision AI. Analyze an image from a URL using vision AI.
This tool processes images using Gemini Flash via Nous Research API. This tool processes images using Gemini Flash via Nous Research API.
The user_prompt parameter is expected to be pre-formatted by the calling The user_prompt parameter is expected to be pre-formatted by the calling
function (typically model_tools.py) to include both full description function (typically model_tools.py) to include both full description
requests and specific questions. requests and specific questions.
Args: Args:
image_url (str): The URL of the image to analyze image_url (str): The URL of the image to analyze
user_prompt (str): The pre-formatted prompt for the vision model user_prompt (str): The pre-formatted prompt for the vision model
model (str): The vision model to use (default: gemini-2.5-flash) model (str): The vision model to use (default: gemini-2.5-flash)
Returns: Returns:
str: JSON string containing the analysis results with the following structure: str: JSON string containing the analysis results with the following structure:
{ {
"success": bool, "success": bool,
"analysis": str (defaults to error message if None) "analysis": str (defaults to error message if None)
} }
Raises: Raises:
Exception: If analysis fails or API key is not set Exception: If analysis fails or API key is not set
""" """
debug_call_data = { debug_call_data = {
"parameters": { "parameters": {
"image_url": image_url, "image_url": image_url,
"user_prompt": user_prompt, "user_prompt": user_prompt,
"model": model "model": model
}, },
"error": None, "error": None,
"success": False, "success": False,
"analysis_length": 0, "analysis_length": 0,
"model_used": model "model_used": model
} }
try: try:
print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}") print(f"🔍 Analyzing image from URL: {image_url[:60]}{'...' if len(image_url) > 60 else ''}")
print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}") print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
# Validate image URL # Validate image URL
if not _validate_image_url(image_url): if not _validate_image_url(image_url):
raise ValueError("Invalid image URL format. Must start with http:// or https://") raise ValueError("Invalid image URL format. Must start with http:// or https://")
# Check API key availability # Check API key availability
if not os.getenv("NOUS_API_KEY"): if not os.getenv("NOUS_API_KEY"):
raise ValueError("NOUS_API_KEY environment variable not set") raise ValueError("NOUS_API_KEY environment variable not set")
# Use the prompt as provided (model_tools.py now handles full description formatting) # Use the prompt as provided (model_tools.py now handles full description formatting)
comprehensive_prompt = user_prompt comprehensive_prompt = user_prompt
# Prepare the message with image URL format # Prepare the message with image URL format
messages = [ messages = [
{ {
"role": "user", "role": "user",
"content": [ "content": [
{ {
"type": "text", "type": "text",
"text": comprehensive_prompt "text": comprehensive_prompt
}, },
{ {
"type": "image_url", "type": "image_url",
"image_url": { "image_url": {
"url": image_url "url": image_url
} }
} }
] ]
} }
] ]
print(f"🧠 Processing image with {model}...") print(f"🧠 Processing image with {model}...")
# Call the vision API # Call the vision API
response = await nous_client.chat.completions.create( response = await nous_client.chat.completions.create(
model=model, model=model,
messages=messages, messages=messages,
temperature=0.1, # Low temperature for consistent analysis temperature=0.1, # Low temperature for consistent analysis
max_tokens=2000 # Generous limit for detailed analysis max_tokens=2000 # Generous limit for detailed analysis
) )
# Extract the analysis # Extract the analysis
analysis = response.choices[0].message.content.strip() analysis = response.choices[0].message.content.strip()
analysis_length = len(analysis) analysis_length = len(analysis)
print(f"✅ Image analysis completed ({analysis_length} characters)") print(f"✅ Image analysis completed ({analysis_length} characters)")
# Prepare successful response # Prepare successful response
result = { result = {
"success": True, "success": True,
"analysis": analysis or "There was a problem with the request and the image could not be analyzed." "analysis": analysis or "There was a problem with the request and the image could not be analyzed."
} }
debug_call_data["success"] = True debug_call_data["success"] = True
debug_call_data["analysis_length"] = analysis_length debug_call_data["analysis_length"] = analysis_length
# Log debug information # Log debug information
_log_debug_call("vision_analyze_tool", debug_call_data) _log_debug_call("vision_analyze_tool", debug_call_data)
_save_debug_log() _save_debug_log()
return json.dumps(result, indent=2) return json.dumps(result, indent=2)
except Exception as e: except Exception as e:
error_msg = f"Error analyzing image: {str(e)}" error_msg = f"Error analyzing image: {str(e)}"
print(f"{error_msg}") print(f"{error_msg}")
# Prepare error response # Prepare error response
result = { result = {
"success": False, "success": False,
"analysis": "There was a problem with the request and the image could not be analyzed." "analysis": "There was a problem with the request and the image could not be analyzed."
} }
debug_call_data["error"] = error_msg debug_call_data["error"] = error_msg
_log_debug_call("vision_analyze_tool", debug_call_data) _log_debug_call("vision_analyze_tool", debug_call_data)
_save_debug_log() _save_debug_log()
return json.dumps(result, indent=2) return json.dumps(result, indent=2)
def check_nous_api_key() -> bool: def check_nous_api_key() -> bool:
""" """
Check if the Nous Research API key is available in environment variables. Check if the Nous Research API key is available in environment variables.
Returns: Returns:
bool: True if API key is set, False otherwise bool: True if API key is set, False otherwise
""" """
return bool(os.getenv("NOUS_API_KEY")) return bool(os.getenv("NOUS_API_KEY"))
def check_vision_requirements() -> bool: def check_vision_requirements() -> bool:
""" """
Check if all requirements for vision tools are met. Check if all requirements for vision tools are met.
Returns: Returns:
bool: True if requirements are met, False otherwise bool: True if requirements are met, False otherwise
""" """
return check_nous_api_key() return check_nous_api_key()
def get_debug_session_info() -> Dict[str, Any]: def get_debug_session_info() -> Dict[str, Any]:
""" """
Get information about the current debug session. Get information about the current debug session.
Returns: Returns:
Dict[str, Any]: Dictionary containing debug session information Dict[str, Any]: Dictionary containing debug session information
""" """
if not DEBUG_MODE or not DEBUG_DATA: if not DEBUG_MODE or not DEBUG_DATA:
return { return {
"enabled": False, "enabled": False,
"session_id": None, "session_id": None,
"log_path": None, "log_path": None,
"total_calls": 0 "total_calls": 0
} }
return { return {
"enabled": True, "enabled": True,
"session_id": DEBUG_SESSION_ID, "session_id": DEBUG_SESSION_ID,
"log_path": str(DEBUG_LOG_PATH / f"vision_tools_debug_{DEBUG_SESSION_ID}.json"), "log_path": str(DEBUG_LOG_PATH / f"vision_tools_debug_{DEBUG_SESSION_ID}.json"),
"total_calls": len(DEBUG_DATA["tool_calls"]) "total_calls": len(DEBUG_DATA["tool_calls"])
} }
if __name__ == "__main__": if __name__ == "__main__":
""" """
Simple test/demo when run directly Simple test/demo when run directly
""" """
print("👁️ Vision Tools Module") print("👁️ Vision Tools Module")
print("=" * 40) print("=" * 40)
# Check if API key is available # Check if API key is available
api_available = check_nous_api_key() api_available = check_nous_api_key()
if not api_available: if not api_available:
print("❌ NOUS_API_KEY environment variable not set") print("❌ NOUS_API_KEY environment variable not set")
print("Please set your API key: export NOUS_API_KEY='your-key-here'") print("Please set your API key: export NOUS_API_KEY='your-key-here'")
print("Get API key at: https://inference-api.nousresearch.com/") print("Get API key at: https://inference-api.nousresearch.com/")
exit(1) exit(1)
else: else:
print("✅ Nous Research API key found") print("✅ Nous Research API key found")
print("🛠️ Vision tools ready for use!") print("🛠️ Vision tools ready for use!")
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}") print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
# Show debug mode status # Show debug mode status
if DEBUG_MODE: if DEBUG_MODE:
print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}") print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
print(f" Debug logs will be saved to: ./logs/vision_tools_debug_{DEBUG_SESSION_ID}.json") print(f" Debug logs will be saved to: ./logs/vision_tools_debug_{DEBUG_SESSION_ID}.json")
else: else:
print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)") print("🐛 Debug mode disabled (set VISION_TOOLS_DEBUG=true to enable)")
print("\nBasic usage:") print("\nBasic usage:")
print(" from vision_tools import vision_analyze_tool") print(" from vision_tools import vision_analyze_tool")
print(" import asyncio") print(" import asyncio")
print("") print("")
print(" async def main():") print(" async def main():")
print(" result = await vision_analyze_tool(") print(" result = await vision_analyze_tool(")
print(" image_url='https://example.com/image.jpg',") print(" image_url='https://example.com/image.jpg',")
print(" user_prompt='What do you see in this image?'") print(" user_prompt='What do you see in this image?'")
print(" )") print(" )")
print(" print(result)") print(" print(result)")
print(" asyncio.run(main())") print(" asyncio.run(main())")
print("\nExample prompts:") print("\nExample prompts:")
print(" - 'What architectural style is this building?'") print(" - 'What architectural style is this building?'")
print(" - 'Describe the emotions and mood in this image'") print(" - 'Describe the emotions and mood in this image'")
print(" - 'What text can you read in this image?'") print(" - 'What text can you read in this image?'")
print(" - 'Identify any safety hazards visible'") print(" - 'Identify any safety hazards visible'")
print(" - 'What products or brands are shown?'") print(" - 'What products or brands are shown?'")
print("\nDebug mode:") print("\nDebug mode:")
print(" # Enable debug logging") print(" # Enable debug logging")
print(" export VISION_TOOLS_DEBUG=true") print(" export VISION_TOOLS_DEBUG=true")
print(" # Debug logs capture all vision analysis calls and results") print(" # Debug logs capture all vision analysis calls and results")
print(" # Logs saved to: ./logs/vision_tools_debug_UUID.json") print(" # Logs saved to: ./logs/vision_tools_debug_UUID.json")

File diff suppressed because it is too large Load Diff