refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.
This commit is contained in:
teknium1
2026-02-21 03:11:11 -08:00
parent b6247b71b5
commit a885d2f240
14 changed files with 303 additions and 303 deletions

7
cli.py
View File

@@ -12,6 +12,7 @@ Usage:
python cli.py --list-tools # List available tools and exit
"""
import logging
import os
import sys
import json
@@ -21,6 +22,8 @@ from pathlib import Path
from datetime import datetime
from typing import List, Dict, Any, Optional
logger = logging.getLogger(__name__)
# Suppress startup messages for clean CLI experience
os.environ["MSWEA_SILENT_STARTUP"] = "1" # mini-swe-agent
os.environ["HERMES_QUIET"] = "1" # Our own modules
@@ -183,7 +186,7 @@ def load_cli_config() -> Dict[str, Any]:
if "max_turns" in file_config and "agent" not in file_config:
defaults["agent"]["max_turns"] = file_config["max_turns"]
except Exception as e:
print(f"[Warning] Failed to load cli-config.yaml: {e}")
logger.warning("Failed to load cli-config.yaml: %s", e)
# Apply terminal config to environment variables (so terminal_tool picks them up)
terminal_config = defaults.get("terminal", {})
@@ -646,7 +649,7 @@ def save_config_value(key_path: str, value: any) -> bool:
return True
except Exception as e:
print(f"(x_x) Failed to save config: {e}")
logger.error("Failed to save config: %s", e)
return False

View File

@@ -6,6 +6,7 @@ This module provides:
- run_daemon(): Run continuously, checking every 60 seconds
"""
import logging
import os
import sys
import time
@@ -14,6 +15,8 @@ from datetime import datetime
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
@@ -33,8 +36,8 @@ def run_job(job: dict) -> tuple[bool, str, Optional[str]]:
job_name = job["name"]
prompt = job["prompt"]
print(f"[cron] Running job '{job_name}' (ID: {job_id})")
print(f"[cron] Prompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}")
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
logger.info("Prompt: %s", prompt[:100])
try:
# Create agent with default settings
@@ -69,12 +72,12 @@ def run_job(job: dict) -> tuple[bool, str, Optional[str]]:
{final_response}
"""
print(f"[cron] Job '{job_name}' completed successfully")
logger.info("Job '%s' completed successfully", job_name)
return True, output, None
except Exception as e:
error_msg = f"{type(e).__name__}: {str(e)}"
print(f"[cron] Job '{job_name}' failed: {error_msg}")
logger.error("Job '%s' failed: %s", job_name, error_msg)
# Build error output
output = f"""# Cron Job: {job_name} (FAILED)
@@ -114,11 +117,11 @@ def tick(verbose: bool = True) -> int:
due_jobs = get_due_jobs()
if verbose and not due_jobs:
print(f"[cron] {datetime.now().strftime('%H:%M:%S')} - No jobs due")
logger.info("%s - No jobs due", datetime.now().strftime('%H:%M:%S'))
return 0
if verbose:
print(f"[cron] {datetime.now().strftime('%H:%M:%S')} - {len(due_jobs)} job(s) due")
logger.info("%s - %s job(s) due", datetime.now().strftime('%H:%M:%S'), len(due_jobs))
executed = 0
for job in due_jobs:
@@ -128,14 +131,14 @@ def tick(verbose: bool = True) -> int:
# Save output to file
output_file = save_job_output(job["id"], output)
if verbose:
print(f"[cron] Output saved to: {output_file}")
logger.info("Output saved to: %s", output_file)
# Mark job as run (handles repeat counting, next_run computation)
mark_job_run(job["id"], success, error)
executed += 1
except Exception as e:
print(f"[cron] Error processing job {job['id']}: {e}")
logger.error("Error processing job %s: %s", job['id'], e)
mark_job_run(job["id"], False, str(e))
return executed
@@ -151,21 +154,20 @@ def run_daemon(check_interval: int = 60, verbose: bool = True):
check_interval: Seconds between checks (default: 60)
verbose: Whether to print status messages
"""
print(f"[cron] Starting daemon (checking every {check_interval}s)")
print(f"[cron] Press Ctrl+C to stop")
print()
logger.info("Starting daemon (checking every %ss)", check_interval)
logger.info("Press Ctrl+C to stop")
try:
while True:
try:
tick(verbose=verbose)
except Exception as e:
print(f"[cron] Tick error: {e}")
logger.error("Tick error: %s", e)
time.sleep(check_interval)
except KeyboardInterrupt:
print("\n[cron] Daemon stopped")
logger.info("Daemon stopped")
if __name__ == "__main__":

View File

@@ -14,6 +14,7 @@ Usage:
"""
import asyncio
import logging
import os
import re
import sys
@@ -60,6 +61,8 @@ from gateway.session import (
from gateway.delivery import DeliveryRouter, DeliveryTarget
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
logger = logging.getLogger(__name__)
class GatewayRunner:
"""
@@ -106,8 +109,8 @@ class GatewayRunner:
Returns True if at least one adapter connected successfully.
"""
print("[gateway] Starting Hermes Gateway...")
print(f"[gateway] Session storage: {self.config.sessions_dir}")
logger.info("Starting Hermes Gateway...")
logger.info("Session storage: %s", self.config.sessions_dir)
# Discover and load event hooks
self.hooks.discover_and_load()
@@ -117,9 +120,9 @@ class GatewayRunner:
from tools.process_registry import process_registry
recovered = process_registry.recover_from_checkpoint()
if recovered:
print(f"[gateway] Recovered {recovered} background process(es) from previous run")
logger.info("Recovered %s background process(es) from previous run", recovered)
except Exception as e:
print(f"[gateway] Process checkpoint recovery: {e}")
logger.warning("Process checkpoint recovery: %s", e)
connected_count = 0
@@ -130,27 +133,27 @@ class GatewayRunner:
adapter = self._create_adapter(platform, platform_config)
if not adapter:
print(f"[gateway] No adapter available for {platform.value}")
logger.warning("No adapter available for %s", platform.value)
continue
# Set up message handler
adapter.set_message_handler(self._handle_message)
# Try to connect
print(f"[gateway] Connecting to {platform.value}...")
logger.info("Connecting to %s...", platform.value)
try:
success = await adapter.connect()
if success:
self.adapters[platform] = adapter
connected_count += 1
print(f"[gateway] ✓ {platform.value} connected")
logger.info("%s connected", platform.value)
else:
print(f"[gateway] ✗ {platform.value} failed to connect")
logger.warning("%s failed to connect", platform.value)
except Exception as e:
print(f"[gateway] ✗ {platform.value} error: {e}")
logger.error("%s error: %s", platform.value, e)
if connected_count == 0:
print("[gateway] No platforms connected. Check your configuration.")
logger.warning("No platforms connected. Check your configuration.")
return False
# Update delivery router with adapters
@@ -161,31 +164,31 @@ class GatewayRunner:
# Emit gateway:startup hook
hook_count = len(self.hooks.loaded_hooks)
if hook_count:
print(f"[gateway] {hook_count} hook(s) loaded")
logger.info("%s hook(s) loaded", hook_count)
await self.hooks.emit("gateway:startup", {
"platforms": [p.value for p in self.adapters.keys()],
})
print(f"[gateway] Gateway running with {connected_count} platform(s)")
print("[gateway] Press Ctrl+C to stop")
logger.info("Gateway running with %s platform(s)", connected_count)
logger.info("Press Ctrl+C to stop")
return True
async def stop(self) -> None:
"""Stop the gateway and disconnect all adapters."""
print("[gateway] Stopping gateway...")
logger.info("Stopping gateway...")
self._running = False
for platform, adapter in self.adapters.items():
try:
await adapter.disconnect()
print(f"[gateway] ✓ {platform.value} disconnected")
logger.info("%s disconnected", platform.value)
except Exception as e:
print(f"[gateway] ✗ {platform.value} disconnect error: {e}")
logger.error("%s disconnect error: %s", platform.value, e)
self.adapters.clear()
self._shutdown_event.set()
print("[gateway] Gateway stopped")
logger.info("Gateway stopped")
async def wait_for_shutdown(self) -> None:
"""Wait for shutdown signal."""
@@ -200,28 +203,28 @@ class GatewayRunner:
if platform == Platform.TELEGRAM:
from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
if not check_telegram_requirements():
print(f"[gateway] Telegram: python-telegram-bot not installed")
logger.warning("Telegram: python-telegram-bot not installed")
return None
return TelegramAdapter(config)
elif platform == Platform.DISCORD:
from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
if not check_discord_requirements():
print(f"[gateway] Discord: discord.py not installed")
logger.warning("Discord: discord.py not installed")
return None
return DiscordAdapter(config)
elif platform == Platform.WHATSAPP:
from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
if not check_whatsapp_requirements():
print(f"[gateway] WhatsApp: Node.js not installed or bridge not configured")
logger.warning("WhatsApp: Node.js not installed or bridge not configured")
return None
return WhatsAppAdapter(config)
elif platform == Platform.SLACK:
from gateway.platforms.slack import SlackAdapter, check_slack_requirements
if not check_slack_requirements():
print(f"[gateway] Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
logger.warning("Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
return None
return SlackAdapter(config)
@@ -286,7 +289,7 @@ class GatewayRunner:
# Check if user is authorized
if not self._is_user_authorized(source):
print(f"[gateway] Unauthorized user: {source.user_id} ({source.user_name}) on {source.platform.value}")
logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
# In DMs: offer pairing code. In groups: silently ignore.
if source.chat_type == "dm":
platform_name = source.platform.value if source.platform else "unknown"
@@ -347,7 +350,7 @@ class GatewayRunner:
approval = self._pending_approvals.pop(session_key_preview)
cmd = approval["command"]
pattern_key = approval.get("pattern_key", "")
print(f"[gateway] ✅ User approved dangerous command: {cmd[:60]}...")
logger.info("User approved dangerous command: %s...", cmd[:60])
# Approve for session and re-run via terminal_tool with force=True
from tools.terminal_tool import terminal_tool, _session_approved_patterns
_session_approved_patterns.add(pattern_key)
@@ -365,7 +368,7 @@ class GatewayRunner:
# Check if there's already a running agent for this session
if session_key in self._running_agents:
running_agent = self._running_agents[session_key]
print(f"[gateway] ⚡ Interrupting running agent for session {session_key[:20]}...")
logger.debug("Interrupting running agent for session %s...", session_key[:20])
running_agent.interrupt(event.text)
# Store the new message to be processed after current agent finishes
self._pending_messages[session_key] = event.text
@@ -466,7 +469,7 @@ class GatewayRunner:
watcher = process_registry.pending_watchers.pop(0)
asyncio.create_task(self._run_process_watcher(watcher))
except Exception as e:
print(f"[gateway] Process watcher setup error: {e}", flush=True)
logger.error("Process watcher setup error: %s", e)
# Check if the agent encountered a dangerous command needing approval
# The terminal tool stores the last pending approval globally
@@ -533,7 +536,7 @@ class GatewayRunner:
return response
except Exception as e:
print(f"[gateway] Agent error: {e}")
logger.error("Agent error: %s", e)
return f"Sorry, I encountered an error: {str(e)}"
finally:
# Clear session env
@@ -766,7 +769,7 @@ class GatewayRunner:
enriched_parts = []
for path in image_paths:
try:
print(f"[gateway] Auto-analyzing user image: {path}", flush=True)
logger.debug("Auto-analyzing user image: %s", path)
result_json = await vision_analyze_tool(
image_url=path,
user_prompt=analysis_prompt,
@@ -786,7 +789,7 @@ class GatewayRunner:
f"with vision_analyze using image_url: {path}]"
)
except Exception as e:
print(f"[gateway] Vision auto-analysis error: {e}", flush=True)
logger.error("Vision auto-analysis error: %s", e)
enriched_parts.append(
f"[The user sent an image but something went wrong when I "
f"tried to look at it~ You can try examining it yourself "
@@ -823,7 +826,7 @@ class GatewayRunner:
enriched_parts = []
for path in audio_paths:
try:
print(f"[gateway] Transcribing user voice: {path}", flush=True)
logger.debug("Transcribing user voice: %s", path)
result = await asyncio.to_thread(transcribe_audio, path)
if result["success"]:
transcript = result["transcript"]
@@ -845,7 +848,7 @@ class GatewayRunner:
f"transcribing it~ ({error})]"
)
except Exception as e:
print(f"[gateway] Transcription error: {e}", flush=True)
logger.error("Transcription error: %s", e)
enriched_parts.append(
"[The user sent a voice message but something went wrong "
"when I tried to listen to it~ Let them know!]"
@@ -873,7 +876,7 @@ class GatewayRunner:
platform_name = watcher.get("platform", "")
chat_id = watcher.get("chat_id", "")
print(f"[gateway] Process watcher started: {session_id} (every {interval}s)", flush=True)
logger.debug("Process watcher started: %s (every %ss)", session_id, interval)
last_output_len = 0
while True:
@@ -904,7 +907,7 @@ class GatewayRunner:
try:
await adapter.send(chat_id, message_text)
except Exception as e:
print(f"[gateway] Watcher delivery error: {e}", flush=True)
logger.error("Watcher delivery error: %s", e)
break
elif has_new_output:
@@ -923,9 +926,9 @@ class GatewayRunner:
try:
await adapter.send(chat_id, message_text)
except Exception as e:
print(f"[gateway] Watcher delivery error: {e}", flush=True)
logger.error("Watcher delivery error: %s", e)
print(f"[gateway] Process watcher ended: {session_id}", flush=True)
logger.debug("Process watcher ended: %s", session_id)
async def _run_agent(
self,
@@ -1085,7 +1088,7 @@ class GatewayRunner:
break
return
except Exception as e:
print(f"[Gateway] Progress message error: {e}")
logger.error("Progress message error: %s", e)
await asyncio.sleep(1)
# We need to share the agent instance for interrupt support
@@ -1243,7 +1246,7 @@ class GatewayRunner:
if agent:
pending_event = adapter.get_pending_message(chat_id)
pending_text = pending_event.text if pending_event else None
print(f"[gateway] ⚡ Interrupt detected from adapter, signaling agent...")
logger.debug("Interrupt detected from adapter, signaling agent...")
agent.interrupt(pending_text)
break
@@ -1268,7 +1271,7 @@ class GatewayRunner:
pending = result.get("interrupt_message")
if pending:
print(f"[gateway] 📨 Processing interrupted message: '{pending[:40]}...'")
logger.debug("Processing interrupted message: '%s...'", pending[:40])
# Clear the adapter's interrupt event so the next _run_agent call
# doesn't immediately re-trigger the interrupt before the new agent

View File

@@ -23,6 +23,7 @@ Usage:
import copy
import json
import logging
logger = logging.getLogger(__name__)
import os
import random
import re
@@ -43,10 +44,9 @@ from dotenv import load_dotenv
env_path = Path(__file__).parent / '.env'
if env_path.exists():
load_dotenv(dotenv_path=env_path)
if not os.getenv("HERMES_QUIET"):
print(f"✅ Loaded environment variables from {env_path}")
elif not os.getenv("HERMES_QUIET"):
print(f" No .env file found at {env_path}. Using system environment variables.")
logger.info("Loaded environment variables from %s", env_path)
else:
logger.info("No .env file found at %s. Using system environment variables.", env_path)
# Import our tool system
from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
@@ -160,8 +160,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
_model_metadata_cache = cache
_model_metadata_cache_time = time.time()
if not os.getenv("HERMES_QUIET"):
logging.debug(f"Fetched metadata for {len(cache)} models from OpenRouter")
logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
return cache
@@ -1195,8 +1194,7 @@ class AIAgent:
logging.getLogger('grpc').setLevel(logging.WARNING)
logging.getLogger('modal').setLevel(logging.WARNING)
logging.getLogger('rex-deploy').setLevel(logging.INFO) # Keep INFO for sandbox status
if not self.quiet_mode:
print("🔍 Verbose logging enabled (third-party library logs suppressed)")
logger.info("Verbose logging enabled (third-party library logs suppressed)")
else:
# Set logging to INFO level for important messages only
logging.basicConfig(
@@ -2017,9 +2015,9 @@ class AIAgent:
try:
with open(filename, "a", encoding="utf-8") as f:
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
print(f"💾 Trajectory saved to {filename}")
logger.info("Trajectory saved to %s", filename)
except Exception as e:
print(f"⚠️ Failed to save trajectory: {e}")
logger.warning("Failed to save trajectory: %s", e)
def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
if not key:

View File

@@ -45,6 +45,7 @@ Usage:
import atexit
import json
import logging
import os
import signal
import subprocess
@@ -59,6 +60,8 @@ from typing import Dict, Any, Optional, List
from pathlib import Path
from hermes_constants import OPENROUTER_CHAT_URL
logger = logging.getLogger(__name__)
# Try to import httpx for async LLM calls
try:
import httpx
@@ -123,14 +126,14 @@ def _emergency_cleanup_all_sessions():
if not _active_sessions:
return
print(f"\n[browser_tool] Emergency cleanup: closing {len(_active_sessions)} active session(s)...", file=sys.stderr)
logger.info("Emergency cleanup: closing %s active session(s)...", len(_active_sessions))
try:
api_key = os.environ.get("BROWSERBASE_API_KEY")
project_id = os.environ.get("BROWSERBASE_PROJECT_ID")
if not api_key or not project_id:
print("[browser_tool] WARNING: Cannot cleanup - missing BROWSERBASE credentials", file=sys.stderr)
logger.warning("Cannot cleanup - missing BROWSERBASE credentials")
return
for task_id, session_info in list(_active_sessions.items()):
@@ -150,20 +153,20 @@ def _emergency_cleanup_all_sessions():
timeout=5 # Short timeout for cleanup
)
if response.status_code in (200, 201, 204):
print(f"[browser_tool] Closed session {bb_session_id}", file=sys.stderr)
logger.info("Closed session %s", bb_session_id)
else:
print(f"[browser_tool] Failed to close session {bb_session_id}: HTTP {response.status_code}", file=sys.stderr)
logger.warning("Failed to close session %s: HTTP %s", bb_session_id, response.status_code)
except Exception as e:
print(f"[browser_tool] Error closing session {bb_session_id}: {e}", file=sys.stderr)
logger.error("Error closing session %s: %s", bb_session_id, e)
_active_sessions.clear()
except Exception as e:
print(f"[browser_tool] Emergency cleanup error: {e}", file=sys.stderr)
logger.error("Emergency cleanup error: %s", e)
def _signal_handler(signum, frame):
"""Handle interrupt signals to cleanup sessions before exit."""
print(f"\n[browser_tool] Received signal {signum}, cleaning up...", file=sys.stderr)
logger.warning("Received signal %s, cleaning up...", signum)
_emergency_cleanup_all_sessions()
sys.exit(128 + signum)
@@ -202,17 +205,14 @@ def _cleanup_inactive_browser_sessions():
for task_id in sessions_to_cleanup:
try:
if not os.getenv("HERMES_QUIET"):
elapsed = int(current_time - _session_last_activity.get(task_id, current_time))
print(f"[browser_tool] Cleaning up inactive session for task: {task_id} "
f"(inactive for {elapsed}s)", file=sys.stderr)
elapsed = int(current_time - _session_last_activity.get(task_id, current_time))
logger.info("Cleaning up inactive session for task: %s (inactive for %ss)", task_id, elapsed)
cleanup_browser(task_id)
with _cleanup_lock:
if task_id in _session_last_activity:
del _session_last_activity[task_id]
except Exception as e:
if not os.getenv("HERMES_QUIET"):
print(f"[browser_tool] Error cleaning up inactive session {task_id}: {e}", file=sys.stderr)
logger.warning("Error cleaning up inactive session %s: %s", task_id, e)
def _browser_cleanup_thread_worker():
@@ -228,8 +228,7 @@ def _browser_cleanup_thread_worker():
try:
_cleanup_inactive_browser_sessions()
except Exception as e:
if not os.getenv("HERMES_QUIET"):
print(f"[browser_tool] Cleanup thread error: {e}", file=sys.stderr)
logger.warning("Cleanup thread error: %s", e)
# Sleep in 1-second intervals so we can stop quickly if needed
for _ in range(30):
@@ -251,9 +250,7 @@ def _start_browser_cleanup_thread():
name="browser-cleanup"
)
_cleanup_thread.start()
if not os.getenv("HERMES_QUIET"):
print(f"[browser_tool] Started inactivity cleanup thread "
f"(timeout: {BROWSER_SESSION_INACTIVITY_TIMEOUT}s)", file=sys.stderr)
logger.info("Started inactivity cleanup thread (timeout: %ss)", BROWSER_SESSION_INACTIVITY_TIMEOUT)
def _stop_browser_cleanup_thread():
@@ -480,8 +477,7 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
if timeout_val > 0:
session_config["timeout"] = timeout_val
except ValueError:
print(f"[browser_tool] WARNING: Invalid BROWSERBASE_SESSION_TIMEOUT value: {custom_timeout_ms}",
file=sys.stderr)
logger.warning("Invalid BROWSERBASE_SESSION_TIMEOUT value: %s", custom_timeout_ms)
# Enable proxies for better CAPTCHA solving (default: true)
# Routes traffic through residential IPs for more reliable access
@@ -516,8 +512,8 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
# First try without keepAlive (most likely culprit for paid plan requirement)
if enable_keep_alive:
keepalive_fallback = True
print(f"[browser_tool] WARNING: keepAlive may require paid plan (402), retrying without it. "
f"Sessions may timeout during long operations.", file=sys.stderr)
logger.warning("keepAlive may require paid plan (402), retrying without it. "
"Sessions may timeout during long operations.")
session_config.pop("keepAlive", None)
response = requests.post(
"https://api.browserbase.com/v1/sessions",
@@ -532,8 +528,8 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
# If still 402, try without proxies too
if response.status_code == 402 and enable_proxies:
proxies_fallback = True
print(f"[browser_tool] WARNING: Proxies unavailable (402), retrying without proxies. "
f"Bot detection may be less effective.", file=sys.stderr)
logger.warning("Proxies unavailable (402), retrying without proxies. "
"Bot detection may be less effective.")
session_config.pop("proxies", None)
response = requests.post(
"https://api.browserbase.com/v1/sessions",
@@ -563,7 +559,7 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:
# Log session info for debugging
feature_str = ", ".join(k for k, v in features_enabled.items() if v)
print(f"[browser_tool] Created session {session_name} with features: {feature_str}", file=sys.stderr)
logger.info("Created session %s with features: %s", session_name, feature_str)
return {
"session_name": session_name,
@@ -751,8 +747,8 @@ def _run_browser_command(
)
# Log stderr for diagnostics (agent-browser may emit warnings there)
if result.stderr and result.stderr.strip() and not os.getenv("HERMES_QUIET"):
print(f"[browser_tool] stderr from '{command}': {result.stderr.strip()[:200]}", file=sys.stderr)
if result.stderr and result.stderr.strip():
logger.debug("stderr from '%s': %s", command, result.stderr.strip()[:200])
# Parse JSON output
if result.stdout.strip():
@@ -762,9 +758,9 @@ def _run_browser_command(
if command == "snapshot" and parsed.get("success"):
snap_data = parsed.get("data", {})
if not snap_data.get("snapshot") and not snap_data.get("refs"):
print(f"[browser_tool] WARNING: snapshot returned empty content. "
f"Possible stale daemon or CDP connection issue. "
f"returncode={result.returncode}", file=sys.stderr)
logger.warning("snapshot returned empty content. "
"Possible stale daemon or CDP connection issue. "
"returncode=%s", result.returncode)
return parsed
except json.JSONDecodeError:
# If not valid JSON, return as raw output
@@ -1196,7 +1192,7 @@ def browser_close(task_id: Optional[str] = None) -> str:
config = _get_browserbase_config()
_close_browserbase_session(bb_session_id, config["api_key"], config["project_id"])
except Exception as e:
print(f"[browser_tool] Warning: Could not close BrowserBase session: {e}", file=sys.stderr)
logger.warning("Could not close BrowserBase session: %s", e)
del _active_sessions[session_key]
if result.get("success"):
@@ -1484,14 +1480,14 @@ def _close_browserbase_session(session_id: str, api_key: str, project_id: str) -
)
if response.status_code in (200, 201, 204):
print(f"[browser_tool] Successfully closed BrowserBase session {session_id}", file=sys.stderr)
logger.debug("Successfully closed BrowserBase session %s", session_id)
return True
else:
print(f"[browser_tool] Failed to close session {session_id}: HTTP {response.status_code} - {response.text[:200]}", file=sys.stderr)
logger.warning("Failed to close session %s: HTTP %s - %s", session_id, response.status_code, response.text[:200])
return False
except Exception as e:
print(f"[browser_tool] Exception closing session {session_id}: {e}", file=sys.stderr)
logger.error("Exception closing session %s: %s", session_id, e)
return False
@@ -1508,9 +1504,8 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
if task_id is None:
task_id = "default"
if not os.getenv("HERMES_QUIET"):
print(f"[browser_tool] cleanup_browser called for task_id: {task_id}", file=sys.stderr)
print(f"[browser_tool] Active sessions: {list(_active_sessions.keys())}", file=sys.stderr)
logger.debug("cleanup_browser called for task_id: %s", task_id)
logger.debug("Active sessions: %s", list(_active_sessions.keys()))
# Check if session exists (under lock), but don't remove yet -
# _run_browser_command needs it to build the close command.
@@ -1519,14 +1514,14 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
if session_info:
bb_session_id = session_info.get("bb_session_id", "unknown")
print(f"[browser_tool] Found session for task {task_id}: bb_session_id={bb_session_id}", file=sys.stderr)
logger.debug("Found session for task %s: bb_session_id=%s", task_id, bb_session_id)
# Try to close via agent-browser first (needs session in _active_sessions)
try:
_run_browser_command(task_id, "close", [], timeout=10)
print(f"[browser_tool] agent-browser close command completed for task {task_id}", file=sys.stderr)
logger.debug("agent-browser close command completed for task %s", task_id)
except Exception as e:
print(f"[browser_tool] agent-browser close failed for task {task_id}: {e}", file=sys.stderr)
logger.warning("agent-browser close failed for task %s: %s", task_id, e)
# Now remove from tracking under lock
with _cleanup_lock:
@@ -1538,9 +1533,9 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
config = _get_browserbase_config()
success = _close_browserbase_session(bb_session_id, config["api_key"], config["project_id"])
if not success:
print(f"[browser_tool] WARNING: Could not close BrowserBase session {bb_session_id}", file=sys.stderr)
logger.warning("Could not close BrowserBase session %s", bb_session_id)
except Exception as e:
print(f"[browser_tool] Exception during BrowserBase session close: {e}", file=sys.stderr)
logger.error("Exception during BrowserBase session close: %s", e)
# Kill the daemon process and clean up socket directory
session_name = session_info.get("session_name", "")
@@ -1553,16 +1548,14 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
try:
daemon_pid = int(open(pid_file).read().strip())
os.kill(daemon_pid, signal.SIGTERM)
if not os.getenv("HERMES_QUIET"):
print(f"[browser_tool] Killed daemon pid {daemon_pid} for {session_name}", file=sys.stderr)
logger.debug("Killed daemon pid %s for %s", daemon_pid, session_name)
except (ProcessLookupError, ValueError, PermissionError, OSError):
pass
shutil.rmtree(socket_dir, ignore_errors=True)
if not os.getenv("HERMES_QUIET"):
print(f"[browser_tool] Removed task {task_id} from active sessions", file=sys.stderr)
elif not os.getenv("HERMES_QUIET"):
print(f"[browser_tool] No active session found for task_id: {task_id}", file=sys.stderr)
logger.debug("Removed task %s from active sessions", task_id)
else:
logger.debug("No active session found for task_id: %s", task_id)
def cleanup_all_browsers() -> None:

View File

@@ -2,11 +2,14 @@
"""File Tools Module - LLM agent file manipulation tools."""
import json
import logging
import os
import threading
from typing import Optional
from tools.file_operations import ShellFileOperations
logger = logging.getLogger(__name__)
_file_ops_lock = threading.Lock()
_file_ops_cache: dict = {}
@@ -76,8 +79,7 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
image = ""
cwd = overrides.get("cwd") or config["cwd"]
if not os.getenv("HERMES_QUIET"):
print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True)
logger.info("Creating new %s environment for task %s...", env_type, task_id[:8])
terminal_env = _create_environment(
env_type=env_type,
@@ -91,8 +93,7 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
_last_activity[task_id] = time.time()
_start_cleanup_thread()
if not os.getenv("HERMES_QUIET"):
print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True)
logger.info("%s environment ready for task %s", env_type, task_id[:8])
# Build file_ops from the (guaranteed live) environment and cache it
file_ops = ShellFileOperations(terminal_env)

View File

@@ -29,6 +29,7 @@ Usage:
"""
import json
import logging
import os
import asyncio
import uuid
@@ -37,6 +38,8 @@ from pathlib import Path
from typing import Dict, Any, Optional, Union
import fal_client
logger = logging.getLogger(__name__)
# Configuration for image generation
DEFAULT_MODEL = "fal-ai/flux-2-pro"
DEFAULT_ASPECT_RATIO = "landscape"
@@ -89,7 +92,7 @@ DEBUG_DATA = {
# Create logs directory if debug mode is enabled
if DEBUG_MODE:
DEBUG_LOG_PATH.mkdir(exist_ok=True)
print(f"🐛 Image generation debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
logger.debug("Image generation debug mode enabled - Session ID: %s", DEBUG_SESSION_ID)
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
@@ -130,10 +133,10 @@ def _save_debug_log() -> None:
with open(debug_filepath, 'w', encoding='utf-8') as f:
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
print(f"🐛 Image generation debug log saved: {debug_filepath}")
logger.debug("Image generation debug log saved: %s", debug_filepath)
except Exception as e:
print(f"Error saving image generation debug log: {str(e)}")
logger.error("Error saving image generation debug log: %s", e)
def _validate_parameters(
@@ -221,7 +224,7 @@ async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]
Dict[str, Any]: Upscaled image data or None if upscaling fails
"""
try:
print(f"🔍 Upscaling image with Clarity Upscaler...")
logger.info("Upscaling image with Clarity Upscaler...")
# Prepare arguments for upscaler
upscaler_arguments = {
@@ -247,7 +250,7 @@ async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]
if result and "image" in result:
upscaled_image = result["image"]
print(f"Image upscaled successfully to {upscaled_image.get('width', 'unknown')}x{upscaled_image.get('height', 'unknown')}")
logger.info("Image upscaled successfully to %sx%s", upscaled_image.get('width', 'unknown'), upscaled_image.get('height', 'unknown'))
return {
"url": upscaled_image["url"],
"width": upscaled_image.get("width", 0),
@@ -256,11 +259,11 @@ async def _upscale_image(image_url: str, original_prompt: str) -> Dict[str, Any]
"upscale_factor": UPSCALER_FACTOR
}
else:
print("Upscaler returned invalid response")
logger.error("Upscaler returned invalid response")
return None
except Exception as e:
print(f"Error upscaling image: {str(e)}")
logger.error("Error upscaling image: %s", e)
return None
@@ -300,7 +303,7 @@ async def image_generate_tool(
# Validate and map aspect_ratio to actual image_size
aspect_ratio_lower = aspect_ratio.lower().strip() if aspect_ratio else DEFAULT_ASPECT_RATIO
if aspect_ratio_lower not in ASPECT_RATIO_MAP:
print(f"⚠️ Invalid aspect_ratio '{aspect_ratio}', defaulting to '{DEFAULT_ASPECT_RATIO}'")
logger.warning("Invalid aspect_ratio '%s', defaulting to '%s'", aspect_ratio, DEFAULT_ASPECT_RATIO)
aspect_ratio_lower = DEFAULT_ASPECT_RATIO
image_size = ASPECT_RATIO_MAP[aspect_ratio_lower]
@@ -324,7 +327,7 @@ async def image_generate_tool(
start_time = datetime.datetime.now()
try:
print(f"🎨 Generating {num_images} image(s) with FLUX 2 Pro: {prompt[:80]}{'...' if len(prompt) > 80 else ''}")
logger.info("Generating %s image(s) with FLUX 2 Pro: %s", num_images, prompt[:80])
# Validate prompt
if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0:
@@ -356,11 +359,11 @@ async def image_generate_tool(
if seed is not None and isinstance(seed, int):
arguments["seed"] = seed
print(f"🚀 Submitting generation request to FAL.ai FLUX 2 Pro...")
print(f" Model: {DEFAULT_MODEL}")
print(f" Aspect Ratio: {aspect_ratio_lower}{image_size}")
print(f" Steps: {validated_params['num_inference_steps']}")
print(f" Guidance: {validated_params['guidance_scale']}")
logger.info("Submitting generation request to FAL.ai FLUX 2 Pro...")
logger.info(" Model: %s", DEFAULT_MODEL)
logger.info(" Aspect Ratio: %s -> %s", aspect_ratio_lower, image_size)
logger.info(" Steps: %s", validated_params['num_inference_steps'])
logger.info(" Guidance: %s", validated_params['guidance_scale'])
# Submit request to FAL.ai
handler = await fal_client.submit_async(
@@ -399,7 +402,7 @@ async def image_generate_tool(
formatted_images.append(upscaled_image)
else:
# Fall back to original image if upscaling fails
print(f"⚠️ Using original image as fallback")
logger.warning("Using original image as fallback")
original_image["upscaled"] = False
formatted_images.append(original_image)
@@ -407,7 +410,7 @@ async def image_generate_tool(
raise ValueError("No valid image URLs returned from API")
upscaled_count = sum(1 for img in formatted_images if img.get("upscaled", False))
print(f"Generated {len(formatted_images)} image(s) in {generation_time:.1f}s ({upscaled_count} upscaled)")
logger.info("Generated %s image(s) in %.1fs (%s upscaled)", len(formatted_images), generation_time, upscaled_count)
# Prepare successful response - minimal format
response_data = {
@@ -428,7 +431,7 @@ async def image_generate_tool(
except Exception as e:
generation_time = (datetime.datetime.now() - start_time).total_seconds()
error_msg = f"Error generating image: {str(e)}"
print(f"{error_msg}")
logger.error("%s", error_msg)
# Prepare error response - minimal format
response_data = {

View File

@@ -46,6 +46,7 @@ Usage:
"""
import json
import logging
import os
import asyncio
import uuid
@@ -55,6 +56,8 @@ from typing import Dict, Any, List, Optional
from openai import AsyncOpenAI
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
_openrouter_client = None
def _get_openrouter_client():
@@ -108,7 +111,7 @@ DEBUG_DATA = {
# Create logs directory if debug mode is enabled
if DEBUG_MODE:
DEBUG_LOG_PATH.mkdir(exist_ok=True)
print(f"🐛 MoA debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
logger.debug("MoA debug mode enabled - Session ID: %s", DEBUG_SESSION_ID)
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
@@ -149,10 +152,10 @@ def _save_debug_log() -> None:
with open(debug_filepath, 'w', encoding='utf-8') as f:
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
print(f"🐛 MoA debug log saved: {debug_filepath}")
logger.debug("MoA debug log saved: %s", debug_filepath)
except Exception as e:
print(f"Error saving MoA debug log: {str(e)}")
logger.error("Error saving MoA debug log: %s", e)
def _construct_aggregator_prompt(system_prompt: str, responses: List[str]) -> str:
@@ -192,7 +195,7 @@ async def _run_reference_model_safe(
"""
for attempt in range(max_retries):
try:
print(f"🤖 Querying {model} (attempt {attempt + 1}/{max_retries})")
logger.info("Querying %s (attempt %s/%s)", model, attempt + 1, max_retries)
# Build parameters for the API call
api_params = {
@@ -214,27 +217,27 @@ async def _run_reference_model_safe(
response = await _get_openrouter_client().chat.completions.create(**api_params)
content = response.choices[0].message.content.strip()
print(f"{model} responded ({len(content)} characters)")
logger.info("%s responded (%s characters)", model, len(content))
return model, content, True
except Exception as e:
error_str = str(e)
# Log more detailed error information for debugging
if "invalid" in error_str.lower():
print(f"⚠️ {model} invalid request error (attempt {attempt + 1}): {error_str}")
logger.warning("%s invalid request error (attempt %s): %s", model, attempt + 1, error_str)
elif "rate" in error_str.lower() or "limit" in error_str.lower():
print(f"⚠️ {model} rate limit error (attempt {attempt + 1}): {error_str}")
logger.warning("%s rate limit error (attempt %s): %s", model, attempt + 1, error_str)
else:
print(f"⚠️ {model} unknown error (attempt {attempt + 1}): {error_str}")
logger.warning("%s unknown error (attempt %s): %s", model, attempt + 1, error_str)
if attempt < max_retries - 1:
# Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
sleep_time = min(2 ** (attempt + 1), 60)
print(f" Retrying in {sleep_time}s...")
logger.info("Retrying in %ss...", sleep_time)
await asyncio.sleep(sleep_time)
else:
error_msg = f"{model} failed after {max_retries} attempts: {error_str}"
print(f"{error_msg}")
logger.error("%s", error_msg)
return model, error_msg, False
@@ -256,7 +259,7 @@ async def _run_aggregator_model(
Returns:
str: Synthesized final response
"""
print(f"🧠 Running aggregator model: {AGGREGATOR_MODEL}")
logger.info("Running aggregator model: %s", AGGREGATOR_MODEL)
# Build parameters for the API call
api_params = {
@@ -281,7 +284,7 @@ async def _run_aggregator_model(
response = await _get_openrouter_client().chat.completions.create(**api_params)
content = response.choices[0].message.content.strip()
print(f"Aggregation complete ({len(content)} characters)")
logger.info("Aggregation complete (%s characters)", len(content))
return content
@@ -348,8 +351,8 @@ async def mixture_of_agents_tool(
}
try:
print(f"🚀 Starting Mixture-of-Agents processing...")
print(f"📝 Query: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
logger.info("Starting Mixture-of-Agents processing...")
logger.info("Query: %s", user_prompt[:100])
# Validate API key availability
if not os.getenv("OPENROUTER_API_KEY"):
@@ -359,10 +362,10 @@ async def mixture_of_agents_tool(
ref_models = reference_models or REFERENCE_MODELS
agg_model = aggregator_model or AGGREGATOR_MODEL
print(f"🔄 Using {len(ref_models)} reference models in 2-layer MoA architecture")
logger.info("Using %s reference models in 2-layer MoA architecture", len(ref_models))
# Layer 1: Generate diverse responses from reference models (with failure handling)
print("📡 Layer 1: Generating reference responses...")
logger.info("Layer 1: Generating reference responses...")
model_results = await asyncio.gather(*[
_run_reference_model_safe(model, user_prompt, REFERENCE_TEMPERATURE)
for model in ref_models
@@ -381,10 +384,10 @@ async def mixture_of_agents_tool(
successful_count = len(successful_responses)
failed_count = len(failed_models)
print(f"📊 Reference model results: {successful_count} successful, {failed_count} failed")
logger.info("Reference model results: %s successful, %s failed", successful_count, failed_count)
if failed_models:
print(f"⚠️ Failed models: {', '.join(failed_models)}")
logger.warning("Failed models: %s", ', '.join(failed_models))
# Check if we have enough successful responses to proceed
if successful_count < MIN_SUCCESSFUL_REFERENCES:
@@ -395,7 +398,7 @@ async def mixture_of_agents_tool(
debug_call_data["failed_models"] = failed_models
# Layer 2: Aggregate responses using the aggregator model
print("🧠 Layer 2: Synthesizing final response...")
logger.info("Layer 2: Synthesizing final response...")
aggregator_system_prompt = _construct_aggregator_prompt(
AGGREGATOR_SYSTEM_PROMPT,
successful_responses
@@ -411,7 +414,7 @@ async def mixture_of_agents_tool(
end_time = datetime.datetime.now()
processing_time = (end_time - start_time).total_seconds()
print(f"MoA processing completed in {processing_time:.2f} seconds")
logger.info("MoA processing completed in %.2f seconds", processing_time)
# Prepare successful response (only final aggregated result, minimal fields)
result = {
@@ -436,7 +439,7 @@ async def mixture_of_agents_tool(
except Exception as e:
error_msg = f"Error in MoA processing: {str(e)}"
print(f"{error_msg}")
logger.error("%s", error_msg)
# Calculate processing time even for errors
end_time = datetime.datetime.now()

View File

@@ -30,6 +30,7 @@ Usage:
"""
import json
import logging
import os
import signal
import subprocess
@@ -40,6 +41,8 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# Checkpoint file for crash recovery (gateway only)
CHECKPOINT_PATH = Path(os.path.expanduser("~/.hermes/processes.json"))
@@ -152,10 +155,9 @@ class ProcessRegistry:
return session
except ImportError:
# ptyprocess not installed -- fall back to Popen
print(f"[ProcessRegistry] ptyprocess not installed, falling back to pipe mode", flush=True)
logger.warning("ptyprocess not installed, falling back to pipe mode")
except Exception as e:
print(f"[ProcessRegistry] PTY spawn failed ({e}), falling back to pipe mode", flush=True)
logger.warning("PTY spawn failed (%s), falling back to pipe mode", e)
# Standard Popen path (non-PTY or PTY fallback)
proc = subprocess.Popen(
@@ -712,7 +714,7 @@ class ProcessRegistry:
with self._lock:
self._running[session.id] = session
recovered += 1
print(f"[ProcessRegistry] Recovered detached process: {session.command[:60]} (pid={pid})", flush=True)
logger.info("Recovered detached process: %s (pid=%d)", session.command[:60], pid)
# Clear the checkpoint (will be rewritten as processes finish)
try:

View File

@@ -27,6 +27,7 @@ Usage:
"""
import json
import logging
import os
import signal
import sys
@@ -40,6 +41,8 @@ import uuid
from pathlib import Path
from typing import Optional, Dict, Any
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Global interrupt event: set by the agent when a user interrupt arrives.
@@ -87,13 +90,11 @@ def _get_scratch_dir() -> Path:
# Create user-specific subdirectory
user_scratch = scratch / os.getenv("USER", "hermes") / "hermes-agent"
user_scratch.mkdir(parents=True, exist_ok=True)
if not os.getenv("HERMES_QUIET"):
print(f"[Terminal] Using /scratch for sandboxes: {user_scratch}")
logger.info("Using /scratch for sandboxes: %s", user_scratch)
return user_scratch
# Fall back to /tmp
if not os.getenv("HERMES_QUIET"):
print("[Terminal] Warning: /scratch not available, using /tmp (limited space)")
logger.warning("/scratch not available, using /tmp (limited space)")
return Path(tempfile.gettempdir())
@@ -155,9 +156,9 @@ def _get_or_build_sif(image: str, executable: str = "apptainer") -> str:
if sif_path.exists():
return str(sif_path)
print(f"[Terminal] Building SIF image (one-time setup)...")
print(f"[Terminal] Source: {image}")
print(f"[Terminal] Target: {sif_path}")
logger.info("Building SIF image (one-time setup)...")
logger.info(" Source: %s", image)
logger.info(" Target: %s", sif_path)
# Ensure tmp directory exists for build
tmp_dir = cache_dir / "tmp"
@@ -177,21 +178,21 @@ def _get_or_build_sif(image: str, executable: str = "apptainer") -> str:
env=env
)
if result.returncode != 0:
print(f"[Terminal] ⚠️ SIF build failed, falling back to docker:// URL")
print(f"[Terminal] Error: {result.stderr[:500]}")
logger.warning("SIF build failed, falling back to docker:// URL")
logger.warning(" Error: %s", result.stderr[:500])
return image
print(f"[Terminal] ✅ SIF image built successfully")
logger.info("SIF image built successfully")
return str(sif_path)
except subprocess.TimeoutExpired:
print(f"[Terminal] ⚠️ SIF build timed out, falling back to docker:// URL")
logger.warning("SIF build timed out, falling back to docker:// URL")
# Clean up partial file
if sif_path.exists():
sif_path.unlink()
return image
except Exception as e:
print(f"[Terminal] ⚠️ SIF build error: {e}, falling back to docker:// URL")
logger.warning("SIF build error: %s, falling back to docker:// URL", e)
return image
@@ -218,8 +219,8 @@ def _check_disk_usage_warning():
total_gb = total_bytes / (1024 ** 3)
if total_gb > DISK_USAGE_WARNING_THRESHOLD_GB:
print(f"⚠️ [Terminal] WARNING: Disk usage ({total_gb:.1f}GB) exceeds threshold ({DISK_USAGE_WARNING_THRESHOLD_GB}GB)")
print(f" Consider running cleanup_all_environments() or reducing parallel workers")
logger.warning("Disk usage (%.1fGB) exceeds threshold (%.0fGB). Consider running cleanup_all_environments().",
total_gb, DISK_USAGE_WARNING_THRESHOLD_GB)
return True
return False
@@ -280,7 +281,7 @@ def _save_permanent_allowlist(patterns: set):
config["command_allowlist"] = list(patterns)
save_config(config)
except Exception as e:
print(f" ⚠️ Could not save allowlist: {e}")
logger.warning("Could not save allowlist: %s", e)
def _detect_dangerous_command(command: str) -> tuple:
@@ -822,7 +823,7 @@ class _SingularityEnvironment:
raise RuntimeError(f"Failed to start instance: {result.stderr}")
self._instance_started = True
print(f"[Singularity] Instance {self.instance_id} started (persistent container)", flush=True)
logger.info("Singularity instance %s started (persistent container)", self.instance_id)
except subprocess.TimeoutExpired:
raise RuntimeError("Instance start timed out")
@@ -881,9 +882,9 @@ class _SingularityEnvironment:
text=True,
timeout=30,
)
print(f"[Singularity] Instance {self.instance_id} stopped", flush=True)
logger.info("Singularity instance %s stopped", self.instance_id)
except Exception as e:
print(f"[Singularity] Warning: failed to stop instance {self.instance_id}: {e}", flush=True)
logger.warning("Failed to stop Singularity instance %s: %s", self.instance_id, e)
self._instance_started = False
def stop(self):
@@ -1282,11 +1283,9 @@ def _get_env_config() -> Dict[str, Any]:
# inside a container. Also catch Windows-style paths (C:\...).
host_prefixes = ("/Users/", "/home/", "C:\\", "C:/")
if any(cwd.startswith(p) for p in host_prefixes) and cwd != default_cwd:
if not os.getenv("HERMES_QUIET"):
print(
f"[Terminal] Ignoring TERMINAL_CWD={cwd!r} for {env_type} backend "
f"(host path won't exist in sandbox). Using {default_cwd!r} instead."
)
logger.info("Ignoring TERMINAL_CWD=%r for %s backend "
"(host path won't exist in sandbox). Using %r instead.",
cwd, env_type, default_cwd)
cwd = default_cwd
return {
@@ -1406,16 +1405,14 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
elif hasattr(env, 'terminate'):
env.terminate()
if not os.getenv("HERMES_QUIET"):
print(f"[Terminal Cleanup] Cleaned up inactive environment for task: {task_id}")
logger.info("Cleaned up inactive environment for task: %s", task_id)
except Exception as e:
error_str = str(e)
if not os.getenv("HERMES_QUIET"):
if "404" in error_str or "not found" in error_str.lower():
print(f"[Terminal Cleanup] Environment for task {task_id} already cleaned up")
else:
print(f"[Terminal Cleanup] Error cleaning up environment for task {task_id}: {e}")
if "404" in error_str or "not found" in error_str.lower():
logger.info("Environment for task %s already cleaned up", task_id)
else:
logger.warning("Error cleaning up environment for task %s: %s", task_id, e)
def _cleanup_thread_worker():
@@ -1427,8 +1424,7 @@ def _cleanup_thread_worker():
config = _get_env_config()
_cleanup_inactive_envs(config["lifetime_seconds"])
except Exception as e:
if not os.getenv("HERMES_QUIET"):
print(f"[Terminal Cleanup] Error in cleanup thread: {e}")
logger.warning("Error in cleanup thread: %s", e)
for _ in range(60):
if not _cleanup_running:
@@ -1493,7 +1489,7 @@ def cleanup_all_environments():
cleanup_vm(task_id)
cleaned += 1
except Exception as e:
print(f"[Terminal Cleanup] Error cleaning {task_id}: {e}")
logger.error("Error cleaning %s: %s", task_id, e)
# Also clean any orphaned directories
scratch_dir = _get_scratch_dir()
@@ -1501,12 +1497,12 @@ def cleanup_all_environments():
for path in glob.glob(str(scratch_dir / "hermes-*")):
try:
shutil.rmtree(path, ignore_errors=True)
print(f"[Terminal Cleanup] Removed orphaned: {path}")
logger.info("Removed orphaned: %s", path)
except OSError:
pass
if not os.getenv("HERMES_QUIET") and cleaned > 0:
print(f"[Terminal Cleanup] Cleaned {cleaned} environments")
if cleaned > 0:
logger.info("Cleaned %d environments", cleaned)
return cleaned
@@ -1545,16 +1541,14 @@ def cleanup_vm(task_id: str):
elif hasattr(env, 'terminate'):
env.terminate()
if not os.getenv("HERMES_QUIET"):
print(f"[Terminal Cleanup] Manually cleaned up environment for task: {task_id}")
logger.info("Manually cleaned up environment for task: %s", task_id)
except Exception as e:
if not os.getenv("HERMES_QUIET"):
error_str = str(e)
if "404" in error_str or "not found" in error_str.lower():
print(f"[Terminal Cleanup] Environment for task {task_id} already cleaned up")
else:
print(f"[Terminal Cleanup] Error cleaning up environment for task {task_id}: {e}")
error_str = str(e)
if "404" in error_str or "not found" in error_str.lower():
logger.info("Environment for task %s already cleaned up", task_id)
else:
logger.warning("Error cleaning up environment for task %s: %s", task_id, e)
def _atexit_cleanup():
@@ -1562,7 +1556,7 @@ def _atexit_cleanup():
_stop_cleanup_thread()
if _active_environments:
count = len(_active_environments)
print(f"\n[Terminal Cleanup] Shutting down {count} remaining sandbox(es)...")
logger.info("Shutting down %d remaining sandbox(es)...", count)
cleanup_all_environments()
atexit.register(_atexit_cleanup)
@@ -1679,8 +1673,7 @@ def terminal_tool(
if needs_creation:
if env_type in ("singularity", "local"):
_check_disk_usage_warning()
if not os.getenv("HERMES_QUIET"):
print(f"[Terminal] Creating new {env_type} environment for task {effective_task_id[:8]}...", flush=True)
logger.info("Creating new %s environment for task %s...", env_type, effective_task_id[:8])
try:
ssh_config = None
if env_type == "ssh":
@@ -1710,8 +1703,7 @@ def terminal_tool(
_active_environments[effective_task_id] = new_env
_last_activity[effective_task_id] = time.time()
env = new_env
if not os.getenv("HERMES_QUIET"):
print(f"[Terminal] {env_type} environment ready for task {effective_task_id[:8]}", flush=True)
logger.info("%s environment ready for task %s", env_type, effective_task_id[:8])
# Check for dangerous commands (only for local/ssh in interactive modes)
# Skip check if force=True (user has confirmed they want to run it)
@@ -1828,17 +1820,13 @@ def terminal_tool(
if retry_count < max_retries:
retry_count += 1
wait_time = 2 ** retry_count
print(f"⚠️ Terminal: execution error, retrying in {wait_time}s (attempt {retry_count}/{max_retries})")
print(f" Command: {command[:200]}")
print(f" Error: {type(e).__name__}: {e}")
print(f" Task ID: {effective_task_id}, Backend: {env_type}")
logger.warning("Execution error, retrying in %ds (attempt %d/%d) - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
wait_time, retry_count, max_retries, command[:200], type(e).__name__, e, effective_task_id, env_type)
time.sleep(wait_time)
continue
print(f"❌ Terminal: execution failed after {max_retries} retries")
print(f" Command: {command[:200]}")
print(f" Error: {type(e).__name__}: {e}")
print(f" Task ID: {effective_task_id}, Backend: {env_type}")
logger.error("Execution failed after %d retries - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
max_retries, command[:200], type(e).__name__, e, effective_task_id, env_type)
return json.dumps({
"output": "",
"exit_code": -1,
@@ -1908,7 +1896,7 @@ def check_terminal_requirements() -> bool:
else:
return False
except Exception as e:
print(f"Terminal requirements check failed: {e}")
logger.error("Terminal requirements check failed: %s", e)
return False

View File

@@ -21,10 +21,13 @@ Usage:
print(result["transcript"])
"""
import logging
import os
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
# Default STT model -- cheapest and widely available
DEFAULT_STT_MODEL = "whisper-1"
@@ -85,7 +88,7 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> dict:
# The response is a plain string when response_format="text"
transcript_text = str(transcription).strip()
print(f"[STT] Transcribed {audio_path.name} ({len(transcript_text)} chars)", flush=True)
logger.info("Transcribed %s (%d chars)", audio_path.name, len(transcript_text))
return {
"success": True,
@@ -93,7 +96,7 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> dict:
}
except Exception as e:
print(f"[STT] Transcription error: {e}", flush=True)
logger.error("Transcription error: %s", e)
return {
"success": False,
"transcript": "",

View File

@@ -23,6 +23,7 @@ Usage:
import asyncio
import datetime
import json
import logging
import os
import shutil
import subprocess
@@ -30,6 +31,8 @@ import tempfile
from pathlib import Path
from typing import Dict, Any, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Optional imports -- providers degrade gracefully if not installed
# ---------------------------------------------------------------------------
@@ -262,7 +265,7 @@ def text_to_speech_tool(
# Truncate very long text with a warning
if len(text) > MAX_TEXT_LENGTH:
print(f"⚠️ TTS text too long ({len(text)} chars), truncating to {MAX_TEXT_LENGTH}")
logger.warning("TTS text too long (%d chars), truncating to %d", len(text), MAX_TEXT_LENGTH)
text = text[:MAX_TEXT_LENGTH]
tts_config = _load_tts_config()
@@ -301,7 +304,7 @@ def text_to_speech_tool(
"success": False,
"error": "ElevenLabs provider selected but 'elevenlabs' package not installed. Run: pip install elevenlabs"
}, ensure_ascii=False)
print(f"🔊 Generating speech with ElevenLabs...")
logger.info("Generating speech with ElevenLabs...")
_generate_elevenlabs(text, file_str, tts_config)
elif provider == "openai":
@@ -310,7 +313,7 @@ def text_to_speech_tool(
"success": False,
"error": "OpenAI provider selected but 'openai' package not installed."
}, ensure_ascii=False)
print(f"🔊 Generating speech with OpenAI TTS...")
logger.info("Generating speech with OpenAI TTS...")
_generate_openai_tts(text, file_str, tts_config)
else:
@@ -320,7 +323,7 @@ def text_to_speech_tool(
"success": False,
"error": "Edge TTS not available. Run: pip install edge-tts"
}, ensure_ascii=False)
print(f"🔊 Generating speech with Edge TTS...")
logger.info("Generating speech with Edge TTS...")
# Edge TTS is async, run it
try:
loop = asyncio.get_running_loop()
@@ -351,7 +354,7 @@ def text_to_speech_tool(
voice_compatible = file_str.endswith(".ogg")
file_size = os.path.getsize(file_str)
print(f"TTS audio saved: {file_str} ({file_size:,} bytes, provider: {provider})")
logger.info("TTS audio saved: %s (%s bytes, provider: %s)", file_str, f"{file_size:,}", provider)
# Build response with MEDIA tag for platform delivery
media_tag = f"MEDIA:{file_str}"
@@ -368,7 +371,7 @@ def text_to_speech_tool(
except Exception as e:
error_msg = f"TTS generation failed ({provider}): {e}"
print(f"{error_msg}")
logger.error("%s", error_msg)
return json.dumps({"success": False, "error": error_msg}, ensure_ascii=False)

View File

@@ -28,6 +28,7 @@ Usage:
"""
import json
import logging
import os
import asyncio
import uuid
@@ -39,6 +40,8 @@ from openai import AsyncOpenAI
import httpx
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
_openrouter_client = None
def _get_openrouter_client():
@@ -71,7 +74,7 @@ DEBUG_DATA = {
# Create logs directory if debug mode is enabled
if DEBUG_MODE:
DEBUG_LOG_PATH.mkdir(exist_ok=True)
print(f"🐛 Vision debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
logger.debug("Vision debug mode enabled - Session ID: %s", DEBUG_SESSION_ID)
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
@@ -112,10 +115,10 @@ def _save_debug_log() -> None:
with open(debug_filepath, 'w', encoding='utf-8') as f:
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
print(f"🐛 Vision debug log saved: {debug_filepath}")
logger.debug("Vision debug log saved: %s", debug_filepath)
except Exception as e:
print(f"Error saving vision debug log: {str(e)}")
logger.error("Error saving vision debug log: %s", e)
def _validate_image_url(url: str) -> bool:
@@ -184,11 +187,11 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
last_error = e
if attempt < max_retries - 1:
wait_time = 2 ** (attempt + 1) # 2s, 4s, 8s
print(f"⚠️ Image download failed (attempt {attempt + 1}/{max_retries}): {str(e)[:50]}")
print(f" Retrying in {wait_time}s...")
logger.warning("Image download failed (attempt %s/%s): %s", attempt + 1, max_retries, str(e)[:50])
logger.warning("Retrying in %ss...", wait_time)
await asyncio.sleep(wait_time)
else:
print(f"Image download failed after {max_retries} attempts: {str(e)[:100]}")
logger.error("Image download failed after %s attempts: %s", max_retries, str(e)[:100])
raise last_error
@@ -298,8 +301,8 @@ async def vision_analyze_tool(
should_cleanup = True
try:
print(f"🔍 Analyzing image: {image_url[:60]}{'...' if len(image_url) > 60 else ''}", flush=True)
print(f"📝 User prompt: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}", flush=True)
logger.info("Analyzing image: %s", image_url[:60])
logger.info("User prompt: %s", user_prompt[:100])
# Check API key availability
if not os.getenv("OPENROUTER_API_KEY"):
@@ -309,12 +312,12 @@ async def vision_analyze_tool(
local_path = Path(image_url)
if local_path.is_file():
# Local file path (e.g. from platform image cache) -- skip download
print(f"📁 Using local image file: {image_url}", flush=True)
logger.info("Using local image file: %s", image_url)
temp_image_path = local_path
should_cleanup = False # Don't delete cached/local files
elif _validate_image_url(image_url):
# Remote URL -- download to a temporary location
print(f"⬇️ Downloading image from URL...", flush=True)
logger.info("Downloading image from URL...")
temp_dir = Path("./temp_vision_images")
temp_image_path = temp_dir / f"temp_image_{uuid.uuid4()}.jpg"
await _download_image(image_url, temp_image_path)
@@ -327,14 +330,14 @@ async def vision_analyze_tool(
# Get image file size for logging
image_size_bytes = temp_image_path.stat().st_size
image_size_kb = image_size_bytes / 1024
print(f"Image ready ({image_size_kb:.1f} KB)", flush=True)
logger.info("Image ready (%.1f KB)", image_size_kb)
# Convert image to base64 data URL
print(f"🔄 Converting image to base64...", flush=True)
logger.info("Converting image to base64...")
image_data_url = _image_to_base64_data_url(temp_image_path)
# Calculate size in KB for better readability
data_size_kb = len(image_data_url) / 1024
print(f"Image converted to base64 ({data_size_kb:.1f} KB)", flush=True)
logger.info("Image converted to base64 (%.1f KB)", data_size_kb)
debug_call_data["image_size_bytes"] = image_size_bytes
@@ -360,7 +363,7 @@ async def vision_analyze_tool(
}
]
print(f"🧠 Processing image with {model}...", flush=True)
logger.info("Processing image with %s...", model)
# Call the vision API with reasoning enabled
response = await _get_openrouter_client().chat.completions.create(
@@ -380,7 +383,7 @@ async def vision_analyze_tool(
analysis = response.choices[0].message.content.strip()
analysis_length = len(analysis)
print(f"Image analysis completed ({analysis_length} characters)", flush=True)
logger.info("Image analysis completed (%s characters)", analysis_length)
# Prepare successful response
result = {
@@ -399,7 +402,7 @@ async def vision_analyze_tool(
except Exception as e:
error_msg = f"Error analyzing image: {str(e)}"
print(f"{error_msg}", flush=True)
logger.error("%s", error_msg)
# Prepare error response
result = {
@@ -418,9 +421,9 @@ async def vision_analyze_tool(
if should_cleanup and temp_image_path and temp_image_path.exists():
try:
temp_image_path.unlink()
print(f"🧹 Cleaned up temporary image file", flush=True)
logger.debug("Cleaned up temporary image file")
except Exception as cleanup_error:
print(f"⚠️ Warning: Could not delete temporary file: {cleanup_error}", flush=True)
logger.warning("Could not delete temporary file: %s", cleanup_error)
def check_openrouter_api_key() -> bool:

View File

@@ -41,6 +41,7 @@ Usage:
#TODO: Tool to see what pages are available/saved to search over
import json
import logging
import os
import re
import asyncio
@@ -52,6 +53,8 @@ from firecrawl import Firecrawl
from openai import AsyncOpenAI
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
# Initialize Firecrawl client lazily (only when needed)
# This prevents import errors when FIRECRAWL_API_KEY is not set
_firecrawl_client = None
@@ -100,13 +103,7 @@ DEBUG_DATA = {
# Create logs directory if debug mode is enabled
if DEBUG_MODE:
DEBUG_LOG_PATH.mkdir(exist_ok=True)
_verbose_print(f"🐛 Debug mode enabled - Session ID: {DEBUG_SESSION_ID}")
def _verbose_print(*args, **kwargs):
"""Print only if not in quiet mode (HERMES_QUIET not set)."""
if not os.getenv("HERMES_QUIET"):
print(*args, **kwargs)
logger.info("Debug mode enabled - Session ID: %s", DEBUG_SESSION_ID)
def _log_debug_call(tool_name: str, call_data: Dict[str, Any]) -> None:
@@ -147,10 +144,10 @@ def _save_debug_log() -> None:
with open(debug_filepath, 'w', encoding='utf-8') as f:
json.dump(DEBUG_DATA, f, indent=2, ensure_ascii=False)
_verbose_print(f"🐛 Debug log saved: {debug_filepath}")
logger.debug("Debug log saved: %s", debug_filepath)
except Exception as e:
print(f"Error saving debug log: {str(e)}")
logger.error("Error saving debug log: %s", e)
async def process_content_with_llm(
@@ -192,12 +189,12 @@ async def process_content_with_llm(
# Refuse if content is absurdly large
if content_len > MAX_CONTENT_SIZE:
size_mb = content_len / 1_000_000
_verbose_print(f"🚫 Content too large ({size_mb:.1f}MB > 2MB limit). Refusing to process.")
logger.warning("Content too large (%.1fMB > 2MB limit). Refusing to process.", size_mb)
return f"[Content too large to process: {size_mb:.1f}MB. Try using web_crawl with specific extraction instructions, or search for a more focused source.]"
# Skip processing if content is too short
if content_len < min_length:
_verbose_print(f"📏 Content too short ({content_len} < {min_length} chars), skipping LLM processing")
logger.debug("Content too short (%d < %d chars), skipping LLM processing", content_len, min_length)
return None
# Create context information
@@ -210,13 +207,13 @@ async def process_content_with_llm(
# Check if we need chunked processing
if content_len > CHUNK_THRESHOLD:
_verbose_print(f"📦 Content large ({content_len:,} chars). Using chunked processing...")
logger.info("Content large (%d chars). Using chunked processing...", content_len)
return await _process_large_content_chunked(
content, context_str, model, CHUNK_SIZE, MAX_OUTPUT_SIZE
)
# Standard single-pass processing for normal content
_verbose_print(f"🧠 Processing content with LLM ({content_len} characters)")
logger.info("Processing content with LLM (%d characters)", content_len)
processed_content = await _call_summarizer_llm(content, context_str, model)
@@ -228,12 +225,12 @@ async def process_content_with_llm(
# Log compression metrics
processed_length = len(processed_content)
compression_ratio = processed_length / content_len if content_len > 0 else 1.0
_verbose_print(f"Content processed: {content_len}{processed_length} chars ({compression_ratio:.1%})")
logger.info("Content processed: %d -> %d chars (%.1f%%)", content_len, processed_length, compression_ratio * 100)
return processed_content
except Exception as e:
print(f"Error processing content with LLM: {str(e)}")
logger.error("Error processing content with LLM: %s", e)
return f"[Failed to process content: {str(e)[:100]}. Content size: {len(content):,} chars]"
@@ -325,8 +322,8 @@ Create a markdown summary that captures all key information in a well-organized,
except Exception as api_error:
last_error = api_error
if attempt < max_retries - 1:
_verbose_print(f"⚠️ LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
_verbose_print(f" Retrying in {retry_delay}s...")
logger.warning("LLM API call failed (attempt %d/%d): %s", attempt + 1, max_retries, str(api_error)[:100])
logger.warning("Retrying in %ds...", retry_delay)
await asyncio.sleep(retry_delay)
retry_delay = min(retry_delay * 2, 60)
else:
@@ -362,7 +359,7 @@ async def _process_large_content_chunked(
chunk = content[i:i + chunk_size]
chunks.append(chunk)
_verbose_print(f" 📦 Split into {len(chunks)} chunks of ~{chunk_size:,} chars each")
logger.info("Split into %d chunks of ~%d chars each", len(chunks), chunk_size)
# Summarize each chunk in parallel
async def summarize_chunk(chunk_idx: int, chunk_content: str) -> tuple[int, Optional[str]]:
@@ -378,10 +375,10 @@ async def _process_large_content_chunked(
chunk_info=chunk_info
)
if summary:
_verbose_print(f"Chunk {chunk_idx + 1}/{len(chunks)} summarized: {len(chunk_content):,}{len(summary):,} chars")
logger.info("Chunk %d/%d summarized: %d -> %d chars", chunk_idx + 1, len(chunks), len(chunk_content), len(summary))
return chunk_idx, summary
except Exception as e:
_verbose_print(f" ⚠️ Chunk {chunk_idx + 1}/{len(chunks)} failed: {str(e)[:50]}")
logger.warning("Chunk %d/%d failed: %s", chunk_idx + 1, len(chunks), str(e)[:50])
return chunk_idx, None
# Run all chunk summarizations in parallel
@@ -395,10 +392,10 @@ async def _process_large_content_chunked(
summaries.append(f"## Section {chunk_idx + 1}\n{summary}")
if not summaries:
print(f"All chunk summarizations failed")
logger.error("All chunk summarizations failed")
return "[Failed to process large content: all chunk summarizations failed]"
_verbose_print(f" 📊 Got {len(summaries)}/{len(chunks)} chunk summaries")
logger.info("Got %d/%d chunk summaries", len(summaries), len(chunks))
# If only one chunk succeeded, just return it (with cap)
if len(summaries) == 1:
@@ -408,7 +405,7 @@ async def _process_large_content_chunked(
return result
# Synthesize the summaries into a final summary
_verbose_print(f" 🔗 Synthesizing {len(summaries)} summaries...")
logger.info("Synthesizing %d summaries...", len(summaries))
combined_summaries = "\n\n---\n\n".join(summaries)
@@ -450,11 +447,11 @@ Create a single, unified markdown summary."""
final_len = len(final_summary)
compression = final_len / original_len if original_len > 0 else 1.0
_verbose_print(f"Synthesis complete: {original_len:,}{final_len:,} chars ({compression:.2%})")
logger.info("Synthesis complete: %d -> %d chars (%.2f%%)", original_len, final_len, compression * 100)
return final_summary
except Exception as e:
_verbose_print(f" ⚠️ Synthesis failed: {str(e)[:100]}")
logger.warning("Synthesis failed: %s", str(e)[:100])
# Fall back to concatenated summaries with truncation
fallback = "\n\n".join(summaries)
if len(fallback) > max_output_size:
@@ -541,8 +538,7 @@ def web_search_tool(query: str, limit: int = 5) -> str:
}
try:
if not os.getenv("HERMES_QUIET"):
_verbose_print(f"🔍 Searching the web for: '{query}' (limit: {limit})")
logger.info("Searching the web for: '%s' (limit: %d)", query, limit)
# Use Firecrawl's v2 search functionality WITHOUT scraping
# We only want search result metadata, not scraped content
@@ -582,8 +578,7 @@ def web_search_tool(query: str, limit: int = 5) -> str:
web_results = response['web']
results_count = len(web_results)
if not os.getenv("HERMES_QUIET"):
_verbose_print(f"✅ Found {results_count} search results")
logger.info("Found %d search results", results_count)
# Build response with just search metadata (URLs, titles, descriptions)
response_data = {
@@ -609,7 +604,7 @@ def web_search_tool(query: str, limit: int = 5) -> str:
except Exception as e:
error_msg = f"Error searching web: {str(e)}"
print(f"{error_msg}")
logger.error("%s", error_msg)
debug_call_data["error"] = error_msg
_log_debug_call("web_search_tool", debug_call_data)
@@ -663,7 +658,7 @@ async def web_extract_tool(
}
try:
_verbose_print(f"📄 Extracting content from {len(urls)} URL(s)")
logger.info("Extracting content from %d URL(s)", len(urls))
# Determine requested formats for Firecrawl v2
formats: List[str] = []
@@ -681,7 +676,7 @@ async def web_extract_tool(
for url in urls:
try:
_verbose_print(f" 📄 Scraping: {url}")
logger.info("Scraping: %s", url)
scrape_result = _get_firecrawl_client().scrape(
url=url,
formats=formats
@@ -745,7 +740,7 @@ async def web_extract_tool(
})
except Exception as scrape_err:
print(f"Error scraping {url}: {str(scrape_err)}")
logger.error("Error scraping %s: %s", url, scrape_err)
results.append({
"url": url,
"title": "",
@@ -757,14 +752,14 @@ async def web_extract_tool(
response = {"results": results}
pages_extracted = len(response.get('results', []))
_verbose_print(f"Extracted content from {pages_extracted} pages")
logger.info("Extracted content from %d pages", pages_extracted)
debug_call_data["pages_extracted"] = pages_extracted
debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled
if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
_verbose_print("🧠 Processing extracted content with LLM (parallel)...")
logger.info("Processing extracted content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing")
# Prepare tasks for parallel processing
@@ -822,22 +817,22 @@ async def web_extract_tool(
if status == "processed":
debug_call_data["compression_metrics"].append(metrics)
debug_call_data["pages_processed_with_llm"] += 1
_verbose_print(f" 📝 {url} (processed)")
logger.info("%s (processed)", url)
elif status == "too_short":
debug_call_data["compression_metrics"].append(metrics)
_verbose_print(f" 📝 {url} (no processing - content too short)")
logger.info("%s (no processing - content too short)", url)
else:
_verbose_print(f" ⚠️ {url} (no content to process)")
logger.warning("%s (no content to process)", url)
else:
if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
print("⚠️ LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of extracted pages for debugging (original behavior)
for result in response.get('results', []):
url = result.get('url', 'Unknown URL')
content_length = len(result.get('raw_content', ''))
_verbose_print(f" 📝 {url} ({content_length} characters)")
logger.info("%s (%d characters)", url, content_length)
# Trim output to minimal fields per entry: title, content, error
trimmed_results = [
@@ -871,7 +866,7 @@ async def web_extract_tool(
except Exception as e:
error_msg = f"Error extracting content: {str(e)}"
print(f"{error_msg}")
logger.error("%s", error_msg)
debug_call_data["error"] = error_msg
_log_debug_call("web_extract_tool", debug_call_data)
@@ -932,10 +927,10 @@ async def web_crawl_tool(
# Ensure URL has protocol
if not url.startswith(('http://', 'https://')):
url = f'https://{url}'
_verbose_print(f" 📝 Added https:// prefix to URL: {url}")
logger.info("Added https:// prefix to URL: %s", url)
instructions_text = f" with instructions: '{instructions}'" if instructions else ""
_verbose_print(f"🕷️ Crawling {url}{instructions_text}")
logger.info("Crawling %s%s", url, instructions_text)
# Use Firecrawl's v2 crawl functionality
# Docs: https://docs.firecrawl.dev/features/crawl
@@ -952,7 +947,7 @@ async def web_crawl_tool(
# Note: The 'prompt' parameter is not documented for crawl
# Instructions are typically used with the Extract endpoint, not Crawl
if instructions:
_verbose_print(f" Note: Instructions parameter ignored (not supported in crawl API)")
logger.info("Instructions parameter ignored (not supported in crawl API)")
# Use the crawl method which waits for completion automatically
try:
@@ -961,7 +956,7 @@ async def web_crawl_tool(
**crawl_params
)
except Exception as e:
print(f"Crawl API call failed: {e}")
logger.error("Crawl API call failed: %s", e)
raise
pages: List[Dict[str, Any]] = []
@@ -972,23 +967,23 @@ async def web_crawl_tool(
# The crawl_result is a CrawlJob object with a 'data' attribute containing list of Document objects
if hasattr(crawl_result, 'data'):
data_list = crawl_result.data if crawl_result.data else []
_verbose_print(f" 📊 Status: {getattr(crawl_result, 'status', 'unknown')}")
_verbose_print(f" 📄 Retrieved {len(data_list)} pages")
logger.info("Status: %s", getattr(crawl_result, 'status', 'unknown'))
logger.info("Retrieved %d pages", len(data_list))
# Debug: Check other attributes if no data
if not data_list:
_verbose_print(f" 🔍 Debug - CrawlJob attributes: {[attr for attr in dir(crawl_result) if not attr.startswith('_')]}")
_verbose_print(f" 🔍 Debug - Status: {getattr(crawl_result, 'status', 'N/A')}")
_verbose_print(f" 🔍 Debug - Total: {getattr(crawl_result, 'total', 'N/A')}")
_verbose_print(f" 🔍 Debug - Completed: {getattr(crawl_result, 'completed', 'N/A')}")
logger.debug("CrawlJob attributes: %s", [attr for attr in dir(crawl_result) if not attr.startswith('_')])
logger.debug("Status: %s", getattr(crawl_result, 'status', 'N/A'))
logger.debug("Total: %s", getattr(crawl_result, 'total', 'N/A'))
logger.debug("Completed: %s", getattr(crawl_result, 'completed', 'N/A'))
elif isinstance(crawl_result, dict) and 'data' in crawl_result:
data_list = crawl_result.get("data", [])
else:
print(" ⚠️ Unexpected crawl result type")
_verbose_print(f" 🔍 Debug - Result type: {type(crawl_result)}")
logger.warning("Unexpected crawl result type")
logger.debug("Result type: %s", type(crawl_result))
if hasattr(crawl_result, '__dict__'):
_verbose_print(f" 🔍 Debug - Result attributes: {list(crawl_result.__dict__.keys())}")
logger.debug("Result attributes: %s", list(crawl_result.__dict__.keys()))
for item in data_list:
# Process each crawled page - properly handle object serialization
@@ -1053,14 +1048,14 @@ async def web_crawl_tool(
response = {"results": pages}
pages_crawled = len(response.get('results', []))
_verbose_print(f"Crawled {pages_crawled} pages")
logger.info("Crawled %d pages", pages_crawled)
debug_call_data["pages_crawled"] = pages_crawled
debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled
if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
_verbose_print("🧠 Processing crawled content with LLM (parallel)...")
logger.info("Processing crawled content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing")
# Prepare tasks for parallel processing
@@ -1118,22 +1113,22 @@ async def web_crawl_tool(
if status == "processed":
debug_call_data["compression_metrics"].append(metrics)
debug_call_data["pages_processed_with_llm"] += 1
_verbose_print(f" 🌐 {page_url} (processed)")
logger.info("%s (processed)", page_url)
elif status == "too_short":
debug_call_data["compression_metrics"].append(metrics)
_verbose_print(f" 🌐 {page_url} (no processing - content too short)")
logger.info("%s (no processing - content too short)", page_url)
else:
_verbose_print(f" ⚠️ {page_url} (no content to process)")
logger.warning("%s (no content to process)", page_url)
else:
if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
print("⚠️ LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of crawled pages for debugging (original behavior)
for result in response.get('results', []):
page_url = result.get('url', 'Unknown URL')
content_length = len(result.get('content', ''))
_verbose_print(f" 🌐 {page_url} ({content_length} characters)")
logger.info("%s (%d characters)", page_url, content_length)
# Trim output to minimal fields per entry: title, content, error
trimmed_results = [
@@ -1161,7 +1156,7 @@ async def web_crawl_tool(
except Exception as e:
error_msg = f"Error crawling website: {str(e)}"
print(f"{error_msg}")
logger.error("%s", error_msg)
debug_call_data["error"] = error_msg
_log_debug_call("web_crawl_tool", debug_call_data)
@@ -1255,7 +1250,7 @@ if __name__ == "__main__":
# Show debug mode status
if DEBUG_MODE:
_verbose_print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
print(f"🐛 Debug mode ENABLED - Session ID: {DEBUG_SESSION_ID}")
print(f" Debug logs will be saved to: ./logs/web_tools_debug_{DEBUG_SESSION_ID}.json")
else:
print("🐛 Debug mode disabled (set WEB_TOOLS_DEBUG=true to enable)")