Implement browser session inactivity timeout and cleanup

- Updated `.env.example` to include `BROWSER_INACTIVITY_TIMEOUT` for auto-cleanup of inactive sessions. - Enhanced `cli.py` to load the new inactivity timeout configuration into environment variables. - Added background thread functionality in `browser_tool.py` to periodically clean up inactive browser sessions based on the configured timeout. - Improved session management by tracking last activity timestamps and ensuring cleanup occurs when sessions exceed inactivity limits.
2026-01-31 21:42:15 -08:00
parent c360da4f35
commit 20f2875472
6 changed files with 336 additions and 4 deletions
--- a/.env.example
+++ b/.env.example
@@ -106,9 +106,12 @@ BROWSERBASE_PROXIES=true
 # Enable advanced stealth mode (default: false, requires Scale Plan)
 BROWSERBASE_ADVANCED_STEALTH=false

-# Browser session timeout in seconds (default: 300)
+# Browser session timeout in seconds - Browserbase session duration (default: 300)
 BROWSER_SESSION_TIMEOUT=300

+# Browser inactivity timeout in seconds - auto-cleanup inactive sessions (default: 120)
+BROWSER_INACTIVITY_TIMEOUT=120
+
 # =============================================================================
 # LEGACY/OPTIONAL
 # =============================================================================
--- a/TODO.md
+++ b/TODO.md
@@ -4,7 +4,42 @@

 ---

-## 1. Memory & Context Management 🧠
+## 🚨 HIGH PRIORITY - Immediate Fixes
+
+These items need to be addressed ASAP:
+
+### 1. SUDO Breaking Terminal Tool 🔐
+- [ ] **Problem:** SUDO commands break the terminal tool execution
+- [ ] **Fix:** Handle password prompts / TTY requirements gracefully
+- [ ] **Options:**
+  - Configure passwordless sudo for specific commands
+  - Detect sudo and warn user / request alternative approach
+  - Use `sudo -S` with stdin handling if password can be provided securely
+
+### 2. Fix `browser_get_images` Tool 🖼️
+- [ ] **Problem:** `browser_get_images` tool is broken/not working correctly
+- [ ] **Debug:** Investigate what's failing - selector issues? async timing? 
+- [ ] **Fix:** Ensure it properly extracts image URLs and alt text from pages
+
+### 3. Better Action Logging for Debugging 📝
+- [ ] **Problem:** Need better logging of agent actions for debugging
+- [ ] **Implementation:**
+  - Log all tool calls with inputs/outputs
+  - Timestamps for each action
+  - Structured log format (JSON?) for easy parsing
+  - Log levels (DEBUG, INFO, ERROR)
+  - Option to write to file vs stdout
+
+### 4. Stream Thinking Summaries in Real-Time 💭
+- [ ] **Problem:** Thinking/reasoning summaries not shown while streaming
+- [ ] **Implementation:**
+  - Use streaming API to show thinking summaries as they're generated
+  - Display intermediate reasoning before final response
+  - Let user see the agent "thinking" in real-time
+
+---
+
+## 1. Context Management

 **Problem:** Context grows unbounded during long conversations. Trajectory compression exists for training data post-hoc, but live conversations lack intelligent context management.

--- a/cli.py
+++ b/cli.py
@@ -67,6 +67,9 @@ def load_cli_config() -> Dict[str, Any]:
            "singularity_image": "docker://python:3.11",
            "modal_image": "python:3.11",
        },
+        "browser": {
+            "inactivity_timeout": 120,  # Auto-cleanup inactive browser sessions after 2 min
+        },
        "agent": {
            "max_turns": 20,
            "verbose": False,
@@ -138,6 +141,16 @@ def load_cli_config() -> Dict[str, Any]:
        if config_key in terminal_config:
            os.environ[env_var] = str(terminal_config[config_key])
    
+    # Apply browser config to environment variables
+    browser_config = defaults.get("browser", {})
+    browser_env_mappings = {
+        "inactivity_timeout": "BROWSER_INACTIVITY_TIMEOUT",
+    }
+    
+    for config_key, env_var in browser_env_mappings.items():
+        if config_key in browser_config:
+            os.environ[env_var] = str(browser_config[config_key])
+    
    return defaults

 # Load configuration at module startup
--- a/package-lock.json
+++ b/package-lock.json
@@ -7,9 +7,13 @@
    "": {
      "name": "hermes-agent",
      "version": "1.0.0",
-      "license": "ISC",
+      "hasInstallScript": true,
+      "license": "MIT",
      "dependencies": {
        "agent-browser": "^0.7.6"
+      },
+      "engines": {
+        "node": ">=18.0.0"
      }
    },
    "node_modules/agent-browser": {
--- a/setup-hermes.sh
+++ b/setup-hermes.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+# Hermes Agent Setup Script
+# Automated setup for all dependencies and configuration
+
+set -e
+
+echo "========================================="
+echo "Hermes Agent Setup"
+echo "========================================="
+echo ""
+
+# Change to hermes-agent directory
+cd /home/teknium/hermes-agent
+
+# Check Python version
+echo "[1/10] Checking Python version..."
+python_version=$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2)
+echo "✓ Python $python_version detected"
+echo ""
+
+# Install uv
+echo "[2/10] Installing uv (fast Python package installer)..."
+if ! command -v uv &> /dev/null; then
+    echo "Installing uv..."
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+    export PATH="$HOME/.cargo/bin:$PATH"
+    echo "✓ uv installed"
+else
+    echo "✓ uv already installed: $(uv --version)"
+fi
+echo ""
+
+# Install Node.js 20 using NodeSource
+echo "[3/10] Installing Node.js 20..."
+if ! command -v node &> /dev/null || [[ $(node --version | cut -d'v' -f2 | cut -d'.' -f1) -lt 20 ]]; then
+    echo "Installing Node.js 20 LTS..."
+    curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash -
+    sudo apt-get install -y nodejs
+    echo "✓ Node.js installed"
+else
+    echo "✓ Node.js 20+ already installed: $(node --version)"
+fi
+echo ""
+
+# Initialize git submodules
+echo "[4/10] Initializing git submodules..."
+git submodule update --init --recursive
+echo "✓ Submodules initialized"
+echo ""
+
+# Create Python virtual environment with uv
+echo "[5/10] Creating Python virtual environment with uv..."
+if [ -d "venv" ]; then
+    echo "Virtual environment already exists, skipping..."
+else
+    uv venv venv
+    echo "✓ Virtual environment created with uv"
+fi
+echo ""
+
+# Activate virtual environment and install Python packages with uv
+echo "[6/10] Installing Python dependencies with uv..."
+source venv/bin/activate
+uv pip install -r requirements.txt
+echo "✓ Python packages installed"
+echo ""
+
+# Install mini-swe-agent with uv
+echo "[7/10] Installing mini-swe-agent..."
+uv pip install -e ./mini-swe-agent
+echo "✓ mini-swe-agent installed"
+echo ""
+
+# Install Node.js dependencies
+echo "[8/10] Installing Node.js dependencies..."
+npm install
+echo "✓ Node.js packages installed"
+echo ""
+
+# Set up environment file
+echo "[9/10] Setting up environment configuration..."
+if [ -f ".env" ]; then
+    echo ".env file already exists, creating backup..."
+    cp .env .env.backup.$(date +%Y%m%d_%H%M%S)
+fi
+cp .env.example .env
+echo "✓ .env file created from .env.example"
+echo ""
+
+# Set up CLI config
+echo "[10/10] Setting up CLI configuration..."
+if [ ! -f "cli-config.yaml" ]; then
+    cp cli-config.yaml.example cli-config.yaml
+    echo "✓ cli-config.yaml created from example"
+else
+    echo "cli-config.yaml already exists, skipping..."
+fi
+echo ""
+
+# Show Node.js and Python versions
+echo "========================================="
+echo "Setup Complete!"
+echo "========================================="
+echo ""
+echo "Installed versions:"
+echo "  Node.js: $(node --version)"
+echo "  npm: $(npm --version)"
+echo "  Python: $(python3 --version)"
+echo "  uv: $(uv --version)"
+echo ""
+
+echo "========================================="
+echo "Next Steps:"
+echo "========================================="
+echo ""
+echo "1. Configure API Keys in .env file:"
+echo "   nano .env"
+echo ""
+echo "   Required API keys:"
+echo "   - OPENROUTER_API_KEY (https://openrouter.ai/keys)"
+echo "   - FIRECRAWL_API_KEY (https://firecrawl.dev/)"
+echo "   - NOUS_API_KEY (https://inference-api.nousresearch.com/)"
+echo "   - FAL_KEY (https://fal.ai/)"
+echo ""
+echo "   Optional API keys:"
+echo "   - BROWSERBASE_API_KEY (https://browserbase.com/)"
+echo "   - BROWSERBASE_PROJECT_ID"
+echo ""
+echo "2. Activate the virtual environment:"
+echo "   source venv/bin/activate"
+echo ""
+echo "3. Run the CLI:"
+echo "   ./hermes"
+echo ""
+echo "4. Or run a single query:"
+echo "   python run_agent.py --query \"your question here\""
+echo ""
+echo "5. List available tools:"
+echo "   python run_agent.py --list_tools"
+echo ""
+echo "========================================="
+echo "Configuration Files:"
+echo "========================================="
+echo "  .env - API keys and environment variables"
+echo "  cli-config.yaml - CLI settings and preferences"
+echo ""
+echo "For more information, see README.md"
+echo ""
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -51,6 +51,8 @@ import subprocess
 import shutil
 import sys
 import asyncio
+import threading
+import time
 import requests
 from typing import Dict, Any, Optional, List
 from pathlib import Path
@@ -86,6 +88,22 @@ _active_sessions: Dict[str, Dict[str, str]] = {}  # task_id -> {session_name, bb
 # Flag to track if cleanup has been done
 _cleanup_done = False

+# =============================================================================
+# Inactivity Timeout Configuration
+# =============================================================================
+
+# Session inactivity timeout (seconds) - cleanup if no activity for this long
+# Default: 2 minutes. Can be configured via environment variable.
+BROWSER_SESSION_INACTIVITY_TIMEOUT = int(os.environ.get("BROWSER_INACTIVITY_TIMEOUT", "120"))
+
+# Track last activity time per session
+_session_last_activity: Dict[str, float] = {}
+
+# Background cleanup thread state
+_cleanup_thread = None
+_cleanup_running = False
+_cleanup_lock = threading.Lock()
+

 def _emergency_cleanup_all_sessions():
    """
@@ -157,6 +175,100 @@ except (OSError, AttributeError):
    pass  # Signal handling not available (e.g., Windows or worker process)


+# =============================================================================
+# Inactivity Cleanup Functions
+# =============================================================================
+
+def _cleanup_inactive_browser_sessions():
+    """
+    Clean up browser sessions that have been inactive for longer than the timeout.
+    
+    This function is called periodically by the background cleanup thread to
+    automatically close sessions that haven't been used recently, preventing
+    orphaned Browserbase sessions from accumulating.
+    """
+    current_time = time.time()
+    sessions_to_cleanup = []
+    
+    with _cleanup_lock:
+        for task_id, last_time in list(_session_last_activity.items()):
+            if current_time - last_time > BROWSER_SESSION_INACTIVITY_TIMEOUT:
+                sessions_to_cleanup.append(task_id)
+    
+    for task_id in sessions_to_cleanup:
+        try:
+            if not os.getenv("HERMES_QUIET"):
+                elapsed = int(current_time - _session_last_activity.get(task_id, current_time))
+                print(f"[browser_tool] Cleaning up inactive session for task: {task_id} "
+                      f"(inactive for {elapsed}s)", file=sys.stderr)
+            cleanup_browser(task_id)
+            with _cleanup_lock:
+                if task_id in _session_last_activity:
+                    del _session_last_activity[task_id]
+        except Exception as e:
+            if not os.getenv("HERMES_QUIET"):
+                print(f"[browser_tool] Error cleaning up inactive session {task_id}: {e}", file=sys.stderr)
+
+
+def _browser_cleanup_thread_worker():
+    """
+    Background thread that periodically cleans up inactive browser sessions.
+    
+    Runs every 30 seconds and checks for sessions that haven't been used
+    within the BROWSER_SESSION_INACTIVITY_TIMEOUT period.
+    """
+    global _cleanup_running
+    
+    while _cleanup_running:
+        try:
+            _cleanup_inactive_browser_sessions()
+        except Exception as e:
+            if not os.getenv("HERMES_QUIET"):
+                print(f"[browser_tool] Cleanup thread error: {e}", file=sys.stderr)
+        
+        # Sleep in 1-second intervals so we can stop quickly if needed
+        for _ in range(30):
+            if not _cleanup_running:
+                break
+            time.sleep(1)
+
+
+def _start_browser_cleanup_thread():
+    """Start the background cleanup thread if not already running."""
+    global _cleanup_thread, _cleanup_running
+    
+    with _cleanup_lock:
+        if _cleanup_thread is None or not _cleanup_thread.is_alive():
+            _cleanup_running = True
+            _cleanup_thread = threading.Thread(
+                target=_browser_cleanup_thread_worker,
+                daemon=True,
+                name="browser-cleanup"
+            )
+            _cleanup_thread.start()
+            if not os.getenv("HERMES_QUIET"):
+                print(f"[browser_tool] Started inactivity cleanup thread "
+                      f"(timeout: {BROWSER_SESSION_INACTIVITY_TIMEOUT}s)", file=sys.stderr)
+
+
+def _stop_browser_cleanup_thread():
+    """Stop the background cleanup thread."""
+    global _cleanup_running
+    _cleanup_running = False
+    if _cleanup_thread is not None:
+        _cleanup_thread.join(timeout=5)
+
+
+def _update_session_activity(task_id: str):
+    """Update the last activity timestamp for a session."""
+    with _cleanup_lock:
+        _session_last_activity[task_id] = time.time()
+
+
+# Register cleanup thread stop on exit
+atexit.register(_stop_browser_cleanup_thread)
+
+
 # ============================================================================
 # Tool Schemas
 # ============================================================================
@@ -461,6 +573,7 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
    Get or create session info for the given task.
    
    Creates a Browserbase session with proxies enabled if one doesn't exist.
+    Also starts the inactivity cleanup thread and updates activity tracking.
    
    Args:
        task_id: Unique identifier for the task
@@ -471,6 +584,12 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
    if task_id is None:
        task_id = "default"
    
+    # Start the cleanup thread if not running (handles inactivity timeouts)
+    _start_browser_cleanup_thread()
+    
+    # Update activity timestamp for this session
+    _update_session_activity(task_id)
+    
    # Check if we already have a session for this task
    if task_id in _active_sessions:
        return _active_sessions[task_id]
@@ -1334,7 +1453,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
    """
    Clean up browser session for a task.
    
-    Called automatically when a task completes.
+    Called automatically when a task completes or when inactivity timeout is reached.
    Closes both the agent-browser session and the Browserbase session.
    
    Args:
@@ -1374,6 +1493,11 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
    elif not os.getenv("HERMES_QUIET"):
        print(f"[browser_tool] No active session found for task_id: {task_id}", file=sys.stderr)
    
+    # Clean up activity tracking
+    with _cleanup_lock:
+        if task_id in _session_last_activity:
+            del _session_last_activity[task_id]
+

 def cleanup_all_browsers() -> None:
    """
@@ -1384,6 +1508,10 @@ def cleanup_all_browsers() -> None:
    for task_id in list(_active_sessions.keys()):
        cleanup_browser(task_id)
    
+    # Clear any remaining activity tracking
+    with _cleanup_lock:
+        _session_last_activity.clear()
+

 def get_active_browser_sessions() -> Dict[str, Dict[str, str]]:
    """