diff --git a/.env.example b/.env.example index 9f701bf3..ccd724be 100644 --- a/.env.example +++ b/.env.example @@ -106,9 +106,12 @@ BROWSERBASE_PROXIES=true # Enable advanced stealth mode (default: false, requires Scale Plan) BROWSERBASE_ADVANCED_STEALTH=false -# Browser session timeout in seconds (default: 300) +# Browser session timeout in seconds - Browserbase session duration (default: 300) BROWSER_SESSION_TIMEOUT=300 +# Browser inactivity timeout in seconds - auto-cleanup inactive sessions (default: 120) +BROWSER_INACTIVITY_TIMEOUT=120 + # ============================================================================= # LEGACY/OPTIONAL # ============================================================================= diff --git a/TODO.md b/TODO.md index bfce758d..6d60ac22 100644 --- a/TODO.md +++ b/TODO.md @@ -4,7 +4,42 @@ --- -## 1. Memory & Context Management 🧠 +## 🚨 HIGH PRIORITY - Immediate Fixes + +These items need to be addressed ASAP: + +### 1. SUDO Breaking Terminal Tool 🔐 +- [ ] **Problem:** SUDO commands break the terminal tool execution +- [ ] **Fix:** Handle password prompts / TTY requirements gracefully +- [ ] **Options:** + - Configure passwordless sudo for specific commands + - Detect sudo and warn user / request alternative approach + - Use `sudo -S` with stdin handling if password can be provided securely + +### 2. Fix `browser_get_images` Tool 🖼️ +- [ ] **Problem:** `browser_get_images` tool is broken/not working correctly +- [ ] **Debug:** Investigate what's failing - selector issues? async timing? +- [ ] **Fix:** Ensure it properly extracts image URLs and alt text from pages + +### 3. Better Action Logging for Debugging 📝 +- [ ] **Problem:** Need better logging of agent actions for debugging +- [ ] **Implementation:** + - Log all tool calls with inputs/outputs + - Timestamps for each action + - Structured log format (JSON?) for easy parsing + - Log levels (DEBUG, INFO, ERROR) + - Option to write to file vs stdout + +### 4. Stream Thinking Summaries in Real-Time 💭 +- [ ] **Problem:** Thinking/reasoning summaries not shown while streaming +- [ ] **Implementation:** + - Use streaming API to show thinking summaries as they're generated + - Display intermediate reasoning before final response + - Let user see the agent "thinking" in real-time + +--- + +## 1. Context Management **Problem:** Context grows unbounded during long conversations. Trajectory compression exists for training data post-hoc, but live conversations lack intelligent context management. diff --git a/cli.py b/cli.py index 0fd9a06b..23cb25cd 100755 --- a/cli.py +++ b/cli.py @@ -67,6 +67,9 @@ def load_cli_config() -> Dict[str, Any]: "singularity_image": "docker://python:3.11", "modal_image": "python:3.11", }, + "browser": { + "inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min + }, "agent": { "max_turns": 20, "verbose": False, @@ -138,6 +141,16 @@ def load_cli_config() -> Dict[str, Any]: if config_key in terminal_config: os.environ[env_var] = str(terminal_config[config_key]) + # Apply browser config to environment variables + browser_config = defaults.get("browser", {}) + browser_env_mappings = { + "inactivity_timeout": "BROWSER_INACTIVITY_TIMEOUT", + } + + for config_key, env_var in browser_env_mappings.items(): + if config_key in browser_config: + os.environ[env_var] = str(browser_config[config_key]) + return defaults # Load configuration at module startup diff --git a/package-lock.json b/package-lock.json index bc944395..2d16eeb8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,9 +7,13 @@ "": { "name": "hermes-agent", "version": "1.0.0", - "license": "ISC", + "hasInstallScript": true, + "license": "MIT", "dependencies": { "agent-browser": "^0.7.6" + }, + "engines": { + "node": ">=18.0.0" } }, "node_modules/agent-browser": { diff --git a/setup-hermes.sh b/setup-hermes.sh new file mode 100755 index 00000000..f5b78f26 --- /dev/null +++ b/setup-hermes.sh @@ -0,0 +1,149 @@ +#!/bin/bash + +# Hermes Agent Setup Script +# Automated setup for all dependencies and configuration + +set -e + +echo "=========================================" +echo "Hermes Agent Setup" +echo "=========================================" +echo "" + +# Change to hermes-agent directory +cd /home/teknium/hermes-agent + +# Check Python version +echo "[1/10] Checking Python version..." +python_version=$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2) +echo "✓ Python $python_version detected" +echo "" + +# Install uv +echo "[2/10] Installing uv (fast Python package installer)..." +if ! command -v uv &> /dev/null; then + echo "Installing uv..." + curl -LsSf https://astral.sh/uv/install.sh | sh + export PATH="$HOME/.cargo/bin:$PATH" + echo "✓ uv installed" +else + echo "✓ uv already installed: $(uv --version)" +fi +echo "" + +# Install Node.js 20 using NodeSource +echo "[3/10] Installing Node.js 20..." +if ! command -v node &> /dev/null || [[ $(node --version | cut -d'v' -f2 | cut -d'.' -f1) -lt 20 ]]; then + echo "Installing Node.js 20 LTS..." + curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - + sudo apt-get install -y nodejs + echo "✓ Node.js installed" +else + echo "✓ Node.js 20+ already installed: $(node --version)" +fi +echo "" + +# Initialize git submodules +echo "[4/10] Initializing git submodules..." +git submodule update --init --recursive +echo "✓ Submodules initialized" +echo "" + +# Create Python virtual environment with uv +echo "[5/10] Creating Python virtual environment with uv..." +if [ -d "venv" ]; then + echo "Virtual environment already exists, skipping..." +else + uv venv venv + echo "✓ Virtual environment created with uv" +fi +echo "" + +# Activate virtual environment and install Python packages with uv +echo "[6/10] Installing Python dependencies with uv..." +source venv/bin/activate +uv pip install -r requirements.txt +echo "✓ Python packages installed" +echo "" + +# Install mini-swe-agent with uv +echo "[7/10] Installing mini-swe-agent..." +uv pip install -e ./mini-swe-agent +echo "✓ mini-swe-agent installed" +echo "" + +# Install Node.js dependencies +echo "[8/10] Installing Node.js dependencies..." +npm install +echo "✓ Node.js packages installed" +echo "" + +# Set up environment file +echo "[9/10] Setting up environment configuration..." +if [ -f ".env" ]; then + echo ".env file already exists, creating backup..." + cp .env .env.backup.$(date +%Y%m%d_%H%M%S) +fi +cp .env.example .env +echo "✓ .env file created from .env.example" +echo "" + +# Set up CLI config +echo "[10/10] Setting up CLI configuration..." +if [ ! -f "cli-config.yaml" ]; then + cp cli-config.yaml.example cli-config.yaml + echo "✓ cli-config.yaml created from example" +else + echo "cli-config.yaml already exists, skipping..." +fi +echo "" + +# Show Node.js and Python versions +echo "=========================================" +echo "Setup Complete!" +echo "=========================================" +echo "" +echo "Installed versions:" +echo " Node.js: $(node --version)" +echo " npm: $(npm --version)" +echo " Python: $(python3 --version)" +echo " uv: $(uv --version)" +echo "" + +echo "=========================================" +echo "Next Steps:" +echo "=========================================" +echo "" +echo "1. Configure API Keys in .env file:" +echo " nano .env" +echo "" +echo " Required API keys:" +echo " - OPENROUTER_API_KEY (https://openrouter.ai/keys)" +echo " - FIRECRAWL_API_KEY (https://firecrawl.dev/)" +echo " - NOUS_API_KEY (https://inference-api.nousresearch.com/)" +echo " - FAL_KEY (https://fal.ai/)" +echo "" +echo " Optional API keys:" +echo " - BROWSERBASE_API_KEY (https://browserbase.com/)" +echo " - BROWSERBASE_PROJECT_ID" +echo "" +echo "2. Activate the virtual environment:" +echo " source venv/bin/activate" +echo "" +echo "3. Run the CLI:" +echo " ./hermes" +echo "" +echo "4. Or run a single query:" +echo " python run_agent.py --query \"your question here\"" +echo "" +echo "5. List available tools:" +echo " python run_agent.py --list_tools" +echo "" +echo "=========================================" +echo "Configuration Files:" +echo "=========================================" +echo " .env - API keys and environment variables" +echo " cli-config.yaml - CLI settings and preferences" +echo "" +echo "For more information, see README.md" +echo "" diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 6ee5c0ae..117b96d1 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -51,6 +51,8 @@ import subprocess import shutil import sys import asyncio +import threading +import time import requests from typing import Dict, Any, Optional, List from pathlib import Path @@ -86,6 +88,22 @@ _active_sessions: Dict[str, Dict[str, str]] = {} # task_id -> {session_name, bb # Flag to track if cleanup has been done _cleanup_done = False +# ============================================================================= +# Inactivity Timeout Configuration +# ============================================================================= + +# Session inactivity timeout (seconds) - cleanup if no activity for this long +# Default: 2 minutes. Can be configured via environment variable. +BROWSER_SESSION_INACTIVITY_TIMEOUT = int(os.environ.get("BROWSER_INACTIVITY_TIMEOUT", "120")) + +# Track last activity time per session +_session_last_activity: Dict[str, float] = {} + +# Background cleanup thread state +_cleanup_thread = None +_cleanup_running = False +_cleanup_lock = threading.Lock() + def _emergency_cleanup_all_sessions(): """ @@ -157,6 +175,100 @@ except (OSError, AttributeError): pass # Signal handling not available (e.g., Windows or worker process) +# ============================================================================= +# Inactivity Cleanup Functions +# ============================================================================= + +def _cleanup_inactive_browser_sessions(): + """ + Clean up browser sessions that have been inactive for longer than the timeout. + + This function is called periodically by the background cleanup thread to + automatically close sessions that haven't been used recently, preventing + orphaned Browserbase sessions from accumulating. + """ + current_time = time.time() + sessions_to_cleanup = [] + + with _cleanup_lock: + for task_id, last_time in list(_session_last_activity.items()): + if current_time - last_time > BROWSER_SESSION_INACTIVITY_TIMEOUT: + sessions_to_cleanup.append(task_id) + + for task_id in sessions_to_cleanup: + try: + if not os.getenv("HERMES_QUIET"): + elapsed = int(current_time - _session_last_activity.get(task_id, current_time)) + print(f"[browser_tool] Cleaning up inactive session for task: {task_id} " + f"(inactive for {elapsed}s)", file=sys.stderr) + cleanup_browser(task_id) + with _cleanup_lock: + if task_id in _session_last_activity: + del _session_last_activity[task_id] + except Exception as e: + if not os.getenv("HERMES_QUIET"): + print(f"[browser_tool] Error cleaning up inactive session {task_id}: {e}", file=sys.stderr) + + +def _browser_cleanup_thread_worker(): + """ + Background thread that periodically cleans up inactive browser sessions. + + Runs every 30 seconds and checks for sessions that haven't been used + within the BROWSER_SESSION_INACTIVITY_TIMEOUT period. + """ + global _cleanup_running + + while _cleanup_running: + try: + _cleanup_inactive_browser_sessions() + except Exception as e: + if not os.getenv("HERMES_QUIET"): + print(f"[browser_tool] Cleanup thread error: {e}", file=sys.stderr) + + # Sleep in 1-second intervals so we can stop quickly if needed + for _ in range(30): + if not _cleanup_running: + break + time.sleep(1) + + +def _start_browser_cleanup_thread(): + """Start the background cleanup thread if not already running.""" + global _cleanup_thread, _cleanup_running + + with _cleanup_lock: + if _cleanup_thread is None or not _cleanup_thread.is_alive(): + _cleanup_running = True + _cleanup_thread = threading.Thread( + target=_browser_cleanup_thread_worker, + daemon=True, + name="browser-cleanup" + ) + _cleanup_thread.start() + if not os.getenv("HERMES_QUIET"): + print(f"[browser_tool] Started inactivity cleanup thread " + f"(timeout: {BROWSER_SESSION_INACTIVITY_TIMEOUT}s)", file=sys.stderr) + + +def _stop_browser_cleanup_thread(): + """Stop the background cleanup thread.""" + global _cleanup_running + _cleanup_running = False + if _cleanup_thread is not None: + _cleanup_thread.join(timeout=5) + + +def _update_session_activity(task_id: str): + """Update the last activity timestamp for a session.""" + with _cleanup_lock: + _session_last_activity[task_id] = time.time() + + +# Register cleanup thread stop on exit +atexit.register(_stop_browser_cleanup_thread) + + # ============================================================================ # Tool Schemas # ============================================================================ @@ -461,6 +573,7 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: Get or create session info for the given task. Creates a Browserbase session with proxies enabled if one doesn't exist. + Also starts the inactivity cleanup thread and updates activity tracking. Args: task_id: Unique identifier for the task @@ -471,6 +584,12 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: if task_id is None: task_id = "default" + # Start the cleanup thread if not running (handles inactivity timeouts) + _start_browser_cleanup_thread() + + # Update activity timestamp for this session + _update_session_activity(task_id) + # Check if we already have a session for this task if task_id in _active_sessions: return _active_sessions[task_id] @@ -1334,7 +1453,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None: """ Clean up browser session for a task. - Called automatically when a task completes. + Called automatically when a task completes or when inactivity timeout is reached. Closes both the agent-browser session and the Browserbase session. Args: @@ -1373,6 +1492,11 @@ def cleanup_browser(task_id: Optional[str] = None) -> None: print(f"[browser_tool] Removed task {task_id} from active sessions", file=sys.stderr) elif not os.getenv("HERMES_QUIET"): print(f"[browser_tool] No active session found for task_id: {task_id}", file=sys.stderr) + + # Clean up activity tracking + with _cleanup_lock: + if task_id in _session_last_activity: + del _session_last_activity[task_id] def cleanup_all_browsers() -> None: @@ -1383,6 +1507,10 @@ def cleanup_all_browsers() -> None: """ for task_id in list(_active_sessions.keys()): cleanup_browser(task_id) + + # Clear any remaining activity tracking + with _cleanup_lock: + _session_last_activity.clear() def get_active_browser_sessions() -> Dict[str, Dict[str, str]]: